diff options
author | Paul Mundt <lethal@linux-sh.org> | 2010-07-05 10:46:08 +0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2010-07-05 10:46:08 +0400 |
commit | 285eba57db7bd7d7c3c5929fb8621fdcaaea1b00 (patch) | |
tree | a9e7f0563cef296b24c53b20dbb388ec5c210172 /fs/xfs | |
parent | 1c14e6cecb1811543b1016f27e5d308fbea8c08a (diff) | |
parent | 815c4163b6c8ebf8152f42b0a5fd015cfdcedc78 (diff) | |
download | linux-285eba57db7bd7d7c3c5929fb8621fdcaaea1b00.tar.xz |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
include/linux/serial_sci.h
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 23 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_export.c | 11 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 7 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl32.c | 15 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 16 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_quotaops.c | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.c | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 356 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm.c | 22 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_syscalls.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 142 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 39 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 149 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 285 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.h | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 70 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.h | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 446 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.h | 411 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 4 |
27 files changed, 908 insertions, 1189 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 089eaca860b4..34640d6dbdcb 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1333,6 +1333,21 @@ xfs_vm_writepage( trace_xfs_writepage(inode, page, 0); /* + * Refuse to write the page out if we are called from reclaim context. + * + * This is primarily to avoid stack overflows when called from deep + * used stacks in random callers for direct reclaim, but disabling + * reclaim for kswap is a nice side-effect as kswapd causes rather + * suboptimal I/O patters, too. + * + * This should really be done by the core VM, but until that happens + * filesystems like XFS, btrfs and ext4 have to take care of this + * by themselves. + */ + if (current->flags & PF_MEMALLOC) + goto out_fail; + + /* * We need a transaction if: * 1. There are delalloc buffers on the page * 2. The page is uptodate and we have unmapped buffers @@ -1366,14 +1381,6 @@ xfs_vm_writepage( if (!page_has_buffers(page)) create_empty_buffers(page, 1 << inode->i_blkbits, 0); - - /* - * VM calculation for nr_to_write seems off. Bump it way - * up, this gets simple streaming writes zippy again. - * To be reviewed again after Jens' writeback changes. - */ - wbc->nr_to_write *= 4; - /* * Convert delayed allocate, unwritten or unmapped space * to real space and flush out to disk. diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 846b75aeb2ab..e7839ee49e43 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -128,13 +128,12 @@ xfs_nfs_get_inode( return ERR_PTR(-ESTALE); /* - * The XFS_IGET_BULKSTAT means that an invalid inode number is just - * fine and not an indication of a corrupted filesystem. Because - * clients can send any kind of invalid file handle, e.g. after - * a restore on the server we have to deal with this case gracefully. + * The XFS_IGET_UNTRUSTED means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem as clients can + * send invalid file handles and we have to handle it gracefully.. */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, - XFS_ILOCK_SHARED, &ip, 0); + error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, + XFS_ILOCK_SHARED, &ip); if (error) { /* * EINVAL means the inode cluster doesn't exist anymore. diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 699b60cbab9c..e59a81062830 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -679,10 +679,9 @@ xfs_ioc_bulkstat( error = xfs_bulkstat_single(mp, &inlast, bulkreq.ubuffer, &done); else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, - (bulkstat_one_pf)xfs_bulkstat_one, NULL, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - BULKSTAT_FG_QUICK, &done); + error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + &done); if (error) return -error; diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 9287135e9bfc..52ed49e6465c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -237,15 +237,12 @@ xfs_bulkstat_one_compat( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ - xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ int *stat) /* BULKSTAT_RV_... */ { return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, bno, - ubused, dibuff, stat); + xfs_bulkstat_one_fmt_compat, + ubused, stat); } /* copied from xfs_ioctl.c */ @@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat( int res; error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), - NULL, 0, NULL, NULL, &res); + sizeof(compat_xfs_bstat_t), 0, &res); } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, NULL, - sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, - BULKSTAT_FG_QUICK, &done); + xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bulkreq.ubuffer, &done); } else error = XFS_ERROR(EINVAL); if (error) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 9c8019c78c92..44f0b2de153e 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -585,11 +585,20 @@ xfs_vn_fallocate( bf.l_len = len; xfs_ilock(ip, XFS_IOLOCK_EXCL); + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 0, XFS_ATTR_NOLOCK); - if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) - new_size = offset + len; + if (error) + goto out_unlock; /* Change file size if needed */ if (new_size) { @@ -600,6 +609,7 @@ xfs_vn_fallocate( error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } +out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); out_error: return error; diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 9ac8aea91529..067cafbfc635 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -23,7 +23,6 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_quota.h" -#include "xfs_log.h" #include "xfs_trans.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3884e20bc14e..ef7f0218bccb 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -164,10 +164,6 @@ xfs_inode_ag_iterator( struct xfs_perag *pag; pag = xfs_perag_get(mp, ag); - if (!pag->pag_ici_init) { - xfs_perag_put(pag); - continue; - } error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, exclusive, &nr); xfs_perag_put(pag); @@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink( down_read(&xfs_mount_list_lock); list_for_each_entry(mp, &xfs_mount_list, m_mplist) { for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - pag = xfs_perag_get(mp, ag); - if (!pag->pag_ici_init) { - xfs_perag_put(pag); - continue; - } reclaimable += pag->pag_ici_reclaimable; xfs_perag_put(pag); } diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 207fa77f63ae..d12be8470cba 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -50,7 +50,6 @@ #include "quota/xfs_dquot_item.h" #include "quota/xfs_dquot.h" #include "xfs_log_recover.h" -#include "xfs_buf_item.h" #include "xfs_inode_item.h" /* diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index ff6bc797baf2..73d5aa117384 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class, ) ) -#define DEFINE_PERAG_REF_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ - unsigned long caller_ip), \ - TP_ARGS(mp, agno, refcount, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_agnumber_t, agno) \ - __field(int, refcount) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - __entry->dev = mp->m_super->s_dev; \ - __entry->agno = agno; \ - __entry->refcount = refcount; \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d agno %u refcount %d caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->agno, \ - __entry->refcount, \ - (char *)__entry->caller_ip) \ -); - -DEFINE_PERAG_REF_EVENT(xfs_perag_get) -DEFINE_PERAG_REF_EVENT(xfs_perag_put) - #define DEFINE_ATTR_LIST_EVENT(name) \ DEFINE_EVENT(xfs_attr_list_class, name, \ TP_PROTO(struct xfs_attr_list_context *ctx), \ @@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); +DECLARE_EVENT_CLASS(xfs_perag_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, + unsigned long caller_ip), + TP_ARGS(mp, agno, refcount, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, refcount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->refcount = refcount; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u refcount %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->refcount, + (char *)__entry->caller_ip) +); + +#define DEFINE_PERAG_REF_EVENT(name) \ +DEFINE_EVENT(xfs_perag_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip)) +DEFINE_PERAG_REF_EVENT(xfs_perag_get); +DEFINE_PERAG_REF_EVENT(xfs_perag_put); + TRACE_EVENT(xfs_attr_list_node_descend, TP_PROTO(struct xfs_attr_list_context *ctx, struct xfs_da_node_entry *btree), @@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); -#define DEFINE_RW_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ - TP_ARGS(ip, count, offset, flags), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(xfs_fsize_t, size) \ - __field(xfs_fsize_t, new_size) \ - __field(loff_t, offset) \ - __field(size_t, count) \ - __field(int, flags) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = ip->i_new_size; \ - __entry->offset = offset; \ - __entry->count = count; \ - __entry->flags = flags; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ - "offset 0x%llx count 0x%zx ioflags %s", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size, \ - __entry->offset, \ - __entry->count, \ - __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \ +DECLARE_EVENT_CLASS(xfs_file_class, + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), + TP_ARGS(ip, count, offset, flags), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count 0x%zx ioflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) ) + +#define DEFINE_RW_EVENT(name) \ +DEFINE_EVENT(xfs_file_class, name, \ + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ + TP_ARGS(ip, count, offset, flags)) DEFINE_RW_EVENT(xfs_file_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); DEFINE_RW_EVENT(xfs_file_splice_write); - -#define DEFINE_PAGE_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ - TP_ARGS(inode, page, off), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(pgoff_t, pgoff) \ - __field(loff_t, size) \ - __field(unsigned long, offset) \ - __field(int, delalloc) \ - __field(int, unmapped) \ - __field(int, unwritten) \ - ), \ - TP_fast_assign( \ - int delalloc = -1, unmapped = -1, unwritten = -1; \ - \ - if (page_has_buffers(page)) \ - xfs_count_page_state(page, &delalloc, \ - &unmapped, &unwritten); \ - __entry->dev = inode->i_sb->s_dev; \ - __entry->ino = XFS_I(inode)->i_ino; \ - __entry->pgoff = page_offset(page); \ - __entry->size = i_size_read(inode); \ - __entry->offset = off; \ - __entry->delalloc = delalloc; \ - __entry->unmapped = unmapped; \ - __entry->unwritten = unwritten; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \ - "delalloc %d unmapped %d unwritten %d", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->pgoff, \ - __entry->size, \ - __entry->offset, \ - __entry->delalloc, \ - __entry->unmapped, \ - __entry->unwritten) \ +DECLARE_EVENT_CLASS(xfs_page_class, + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), + TP_ARGS(inode, page, off), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(pgoff_t, pgoff) + __field(loff_t, size) + __field(unsigned long, offset) + __field(int, delalloc) + __field(int, unmapped) + __field(int, unwritten) + ), + TP_fast_assign( + int delalloc = -1, unmapped = -1, unwritten = -1; + + if (page_has_buffers(page)) + xfs_count_page_state(page, &delalloc, + &unmapped, &unwritten); + __entry->dev = inode->i_sb->s_dev; + __entry->ino = XFS_I(inode)->i_ino; + __entry->pgoff = page_offset(page); + __entry->size = i_size_read(inode); + __entry->offset = off; + __entry->delalloc = delalloc; + __entry->unmapped = unmapped; + __entry->unwritten = unwritten; + ), + TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " + "delalloc %d unmapped %d unwritten %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->size, + __entry->offset, + __entry->delalloc, + __entry->unmapped, + __entry->unwritten) ) + +#define DEFINE_PAGE_EVENT(name) \ +DEFINE_EVENT(xfs_page_class, name, \ + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ + TP_ARGS(inode, page, off)) DEFINE_PAGE_EVENT(xfs_writepage); DEFINE_PAGE_EVENT(xfs_releasepage); DEFINE_PAGE_EVENT(xfs_invalidatepage); -#define DEFINE_IOMAP_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ - int flags, struct xfs_bmbt_irec *irec), \ - TP_ARGS(ip, offset, count, flags, irec), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(loff_t, size) \ - __field(loff_t, new_size) \ - __field(loff_t, offset) \ - __field(size_t, count) \ - __field(int, flags) \ - __field(xfs_fileoff_t, startoff) \ - __field(xfs_fsblock_t, startblock) \ - __field(xfs_filblks_t, blockcount) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = ip->i_new_size; \ - __entry->offset = offset; \ - __entry->count = count; \ - __entry->flags = flags; \ - __entry->startoff = irec ? irec->br_startoff : 0; \ - __entry->startblock = irec ? irec->br_startblock : 0; \ - __entry->blockcount = irec ? irec->br_blockcount : 0; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ - "offset 0x%llx count %zd flags %s " \ - "startoff 0x%llx startblock %lld blockcount 0x%llx", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size, \ - __entry->offset, \ - __entry->count, \ - __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ - __entry->startoff, \ - (__int64_t)__entry->startblock, \ - __entry->blockcount) \ +DECLARE_EVENT_CLASS(xfs_iomap_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, + int flags, struct xfs_bmbt_irec *irec), + TP_ARGS(ip, offset, count, flags, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, size) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->flags = flags; + __entry->startoff = irec ? irec->br_startoff : 0; + __entry->startblock = irec ? irec->br_startblock : 0; + __entry->blockcount = irec ? irec->br_blockcount : 0; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd flags %s " + "startoff 0x%llx startblock %lld blockcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_flags(__entry->flags, "|", BMAPI_FLAGS), + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount) ) + +#define DEFINE_IOMAP_EVENT(name) \ +DEFINE_EVENT(xfs_iomap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ + int flags, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, offset, count, flags, irec)) DEFINE_IOMAP_EVENT(xfs_iomap_enter); DEFINE_IOMAP_EVENT(xfs_iomap_found); DEFINE_IOMAP_EVENT(xfs_iomap_alloc); -#define DEFINE_SIMPLE_IO_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ - TP_ARGS(ip, offset, count), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(loff_t, size) \ - __field(loff_t, new_size) \ - __field(loff_t, offset) \ - __field(size_t, count) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = ip->i_new_size; \ - __entry->offset = offset; \ - __entry->count = count; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ - "offset 0x%llx count %zd", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size, \ - __entry->offset, \ - __entry->count) \ +DECLARE_EVENT_CLASS(xfs_simple_io_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), + TP_ARGS(ip, offset, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, size) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count) ); + +#define DEFINE_SIMPLE_IO_EVENT(name) \ +DEFINE_EVENT(xfs_simple_io_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ + TP_ARGS(ip, offset, count)) DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 38e764146644..8c117ff2e3ab 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref( if (!xfs_Gqm) { xfs_Gqm = xfs_Gqm_init(); - if (!xfs_Gqm) + if (!xfs_Gqm) { + mutex_unlock(&xfs_Gqm_lock); return ENOMEM; + } } /* @@ -1630,10 +1632,7 @@ xfs_qm_dqusage_adjust( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* not used */ int ubsize, /* not used */ - void *private_data, /* not used */ - xfs_daddr_t bno, /* starting block of inode cluster */ int *ubused, /* not used */ - void *dip, /* on-disk inode pointer (not used) */ int *res) /* result code value */ { xfs_inode_t *ip; @@ -1658,7 +1657,7 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { + if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { *res = BULKSTAT_RV_NOTHING; return error; } @@ -1794,12 +1793,13 @@ xfs_qm_quotacheck( * Iterate thru all the inodes in the file system, * adjusting the corresponding dquot counters in core. */ - if ((error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, NULL, - structsz, NULL, BULKSTAT_FG_IGET, &done))) + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_dqusage_adjust, + structsz, NULL, &done); + if (error) break; - } while (! done); + } while (!done); /* * We've made all the changes that we need to make incore. @@ -1887,14 +1887,14 @@ xfs_qm_init_quotainos( mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip, 0))) + 0, 0, &uip))) return XFS_ERROR(error); } if (XFS_IS_OQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip, 0))) { + 0, 0, &gip))) { if (uip) IRELE(uip); return XFS_ERROR(error); diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 92b002f1805f..b4487764e923 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles( } if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { - error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); + error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip); if (!error) { error = xfs_truncate_file(mp, qip); IRELE(qip); @@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles( if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && mp->m_sb.sb_gquotino != NULLFSINO) { - error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); + error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip); if (!error2) { error2 = xfs_truncate_file(mp, qip); IRELE(qip); @@ -417,12 +417,12 @@ xfs_qm_scall_getqstat( } if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip, 0) == 0) + 0, 0, &uip) == 0) tempuqip = B_TRUE; } if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip, 0) == 0) + 0, 0, &gip) == 0) tempgqip = B_TRUE; } if (uip) { @@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust( xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* not used */ int ubsize, /* not used */ - void *private_data, /* not used */ - xfs_daddr_t bno, /* starting block of inode cluster */ int *ubused, /* not used */ - void *dip, /* not used */ int *res) /* bulkstat result code */ { xfs_inode_t *ip; @@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust( ipreleased = B_FALSE; again: lock_flags = XFS_ILOCK_SHARED; - if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { + if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) { *res = BULKSTAT_RV_NOTHING; return (error); } @@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck( * Iterate thru all the inodes in the file system, * adjusting the corresponding dquot counters */ - if ((error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_internalqcheck_adjust, NULL, - 0, NULL, BULKSTAT_FG_IGET, &done))) { + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_internalqcheck_adjust, + 0, NULL, &done); + if (error) { + cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); break; } - } while (! done); - if (error) { - cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); - } + } while (!done); + cmn_err(CE_DEBUG, "Checking results against system dquots"); for (i = 0; i < qmtest_hashmask; i++) { xfs_dqtest_t *d, *n; diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 401f364ad36c..4917d4eed4ed 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -227,7 +227,6 @@ typedef struct xfs_perag { atomic_t pagf_fstrms; /* # of filestreams active in this AG */ - int pag_ici_init; /* incore inode cache initialised */ rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 5bba29a07812..7f159d2a429a 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -69,7 +69,9 @@ xfs_swapext( goto out; } - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { + if (!(file->f_mode & FMODE_WRITE) || + !(file->f_mode & FMODE_READ) || + (file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_file; } @@ -81,6 +83,7 @@ xfs_swapext( } if (!(tmp_file->f_mode & FMODE_WRITE) || + !(tmp_file->f_mode & FMODE_READ) || (tmp_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_tmp_file; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 9d884c127bb9..c7142a064c48 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1203,6 +1203,63 @@ error0: return error; } +STATIC int +xfs_imap_lookup( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agblock_t agbno, + xfs_agblock_t *chunk_agbno, + xfs_agblock_t *offset_agbno, + int flags) +{ + struct xfs_inobt_rec_incore rec; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + xfs_agino_t startino; + int error; + int i; + + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "xfs_ialloc_read_agi() returned " + "error %d, agno %d", + error, agno); + return error; + } + + /* + * derive and lookup the exact inode record for the given agino. If the + * record cannot be found, then it's an invalid inode number and we + * should abort. + */ + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); + startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); + error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); + if (!error) { + if (i) + error = xfs_inobt_get_rec(cur, &rec, &i); + if (!error && i == 0) + error = EINVAL; + } + + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + if (error) + return error; + + /* for untrusted inodes check it is allocated first */ + if ((flags & XFS_IGET_UNTRUSTED) && + (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) + return EINVAL; + + *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); + *offset_agbno = agbno - *chunk_agbno; + return 0; +} + /* * Return the location of the inode in imap, for mapping it into a buffer. */ @@ -1235,8 +1292,11 @@ xfs_imap( if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || ino != XFS_AGINO_TO_INO(mp, agno, agino)) { #ifdef DEBUG - /* no diagnostics for bulkstat, ino comes from userspace */ - if (flags & XFS_IGET_BULKSTAT) + /* + * Don't output diagnostic information for untrusted inodes + * as they can be invalid without implying corruption. + */ + if (flags & XFS_IGET_UNTRUSTED) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, @@ -1263,6 +1323,23 @@ xfs_imap( return XFS_ERROR(EINVAL); } + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + + /* + * For bulkstat and handle lookups, we have an untrusted inode number + * that we have to verify is valid. We cannot do this just by reading + * the inode buffer as it may have been unlinked and removed leaving + * inodes in stale state on disk. Hence we have to do a btree lookup + * in all cases where an untrusted inode number is passed. + */ + if (flags & XFS_IGET_UNTRUSTED) { + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); + if (error) + return error; + goto out_map; + } + /* * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. @@ -1277,24 +1354,6 @@ xfs_imap( return 0; } - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; - - /* - * If we get a block number passed from bulkstat we can use it to - * find the buffer easily. - */ - if (imap->im_blkno) { - offset = XFS_INO_TO_OFFSET(mp, ino); - ASSERT(offset < mp->m_sb.sb_inopblock); - - cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno); - offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; - - imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); - return 0; - } - /* * If the inode chunks are aligned then use simple maths to * find the location. Otherwise we have to do a btree @@ -1304,50 +1363,13 @@ xfs_imap( offset_agbno = agbno & mp->m_inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { - xfs_btree_cur_t *cur; /* inode btree cursor */ - xfs_inobt_rec_incore_t chunk_rec; - xfs_buf_t *agbp; /* agi buffer */ - int i; /* temp state */ - - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_ialloc_read_agi() returned " - "error %d, agno %d", - error, agno); - return error; - } - - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_lookup() failed"); - goto error0; - } - - error = xfs_inobt_get_rec(cur, &chunk_rec, &i); - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_get_rec() failed"); - goto error0; - } - if (i == 0) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ - error = XFS_ERROR(EINVAL); - } - error0: - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + error = xfs_imap_lookup(mp, tp, agno, agino, agbno, + &chunk_agbno, &offset_agbno, flags); if (error) return error; - chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); - offset_agbno = agbno - chunk_agbno; } +out_map: ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + ((offset_agbno / blks_per_cluster) * blks_per_cluster); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 6845db90818f..8f8b91be2c99 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -259,7 +259,6 @@ xfs_iget_cache_miss( xfs_trans_t *tp, xfs_ino_t ino, struct xfs_inode **ipp, - xfs_daddr_t bno, int flags, int lock_flags) { @@ -272,7 +271,7 @@ xfs_iget_cache_miss( if (!ip) return ENOMEM; - error = xfs_iread(mp, tp, ip, bno, flags); + error = xfs_iread(mp, tp, ip, flags); if (error) goto out_destroy; @@ -358,8 +357,6 @@ out_destroy: * within the file system for the inode being requested. * lock_flags -- flags indicating how to lock the inode. See the comment * for xfs_ilock() for a list of valid values. - * bno -- the block number starting the buffer containing the inode, - * if known (as by bulkstat), else 0. */ int xfs_iget( @@ -368,8 +365,7 @@ xfs_iget( xfs_ino_t ino, uint flags, uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) + xfs_inode_t **ipp) { xfs_inode_t *ip; int error; @@ -382,9 +378,6 @@ xfs_iget( /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); - if (!pag->pagi_inodeok) - return EINVAL; - ASSERT(pag->pag_ici_init); agino = XFS_INO_TO_AGINO(mp, ino); again: @@ -400,7 +393,7 @@ again: read_unlock(&pag->pag_ici_lock); XFS_STATS_INC(xs_ig_missed); - error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, + error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, flags, lock_flags); if (error) goto out_error_or_again; @@ -744,30 +737,24 @@ xfs_ilock_demote( } #ifdef DEBUG -/* - * Debug-only routine, without additional rw_semaphore APIs, we can - * now only answer requests regarding whether we hold the lock for write - * (reader state is outside our visibility, we only track writer state). - * - * Note: this means !xfs_isilocked would give false positives, so don't do that. - */ int xfs_isilocked( xfs_inode_t *ip, uint lock_flags) { - if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) == - XFS_ILOCK_EXCL) { - if (!ip->i_lock.mr_writer) - return 0; + if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { + if (!(lock_flags & XFS_ILOCK_SHARED)) + return !!ip->i_lock.mr_writer; + return rwsem_is_locked(&ip->i_lock.mr_lock); } - if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) == - XFS_IOLOCK_EXCL) { - if (!ip->i_iolock.mr_writer) - return 0; + if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { + if (!(lock_flags & XFS_IOLOCK_SHARED)) + return !!ip->i_iolock.mr_writer; + return rwsem_is_locked(&ip->i_iolock.mr_lock); } - return 1; + ASSERT(0); + return 0; } #endif diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8cd6e8d8fe9c..b76a829d7e20 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -177,7 +177,7 @@ xfs_imap_to_bp( if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { - if (iget_flags & XFS_IGET_BULKSTAT) { + if (iget_flags & XFS_IGET_UNTRUSTED) { xfs_trans_brelse(tp, bp); return XFS_ERROR(EINVAL); } @@ -787,7 +787,6 @@ xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, - xfs_daddr_t bno, uint iget_flags) { xfs_buf_t *bp; @@ -797,11 +796,9 @@ xfs_iread( /* * Fill in the location information in the in-core inode. */ - ip->i_imap.im_blkno = bno; error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; - ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); /* * Get pointers to the on-disk inode and the buffer containing it. @@ -1940,10 +1937,10 @@ xfs_ifree_cluster( int blks_per_cluster; int nbufs; int ninodes; - int i, j, found, pre_flushed; + int i, j; xfs_daddr_t blkno; xfs_buf_t *bp; - xfs_inode_t *ip, **ip_found; + xfs_inode_t *ip; xfs_inode_log_item_t *iip; xfs_log_item_t *lip; struct xfs_perag *pag; @@ -1960,114 +1957,97 @@ xfs_ifree_cluster( nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; } - ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); - for (j = 0; j < nbufs; j++, inum += ninodes) { + int found = 0; + blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); + /* + * We obtain and lock the backing buffer first in the process + * here, as we have to ensure that any dirty inode that we + * can't get the flush lock on is attached to the buffer. + * If we scan the in-memory inodes first, then buffer IO can + * complete before we get a lock on it, and hence we may fail + * to mark all the active inodes on the buffer stale. + */ + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, + mp->m_bsize * blks_per_cluster, + XBF_LOCK); + + /* + * Walk the inodes already attached to the buffer and mark them + * stale. These will all have the flush locks held, so an + * in-memory inode walk can't lock them. + */ + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + while (lip) { + if (lip->li_type == XFS_LI_INODE) { + iip = (xfs_inode_log_item_t *)lip; + ASSERT(iip->ili_logged == 1); + lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; + xfs_trans_ail_copy_lsn(mp->m_ail, + &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); + xfs_iflags_set(iip->ili_inode, XFS_ISTALE); + found++; + } + lip = lip->li_bio_list; + } /* - * Look for each inode in memory and attempt to lock it, - * we can be racing with flush and tail pushing here. - * any inode we get the locks on, add to an array of - * inode items to process later. + * For each inode in memory attempt to add it to the inode + * buffer and set it up for being staled on buffer IO + * completion. This is safe as we've locked out tail pushing + * and flushing by locking the buffer. * - * The get the buffer lock, we could beat a flush - * or tail pushing thread to the lock here, in which - * case they will go looking for the inode buffer - * and fail, we need some other form of interlock - * here. + * We have already marked every inode that was part of a + * transaction stale above, which means there is no point in + * even trying to lock them. */ - found = 0; for (i = 0; i < ninodes; i++) { read_lock(&pag->pag_ici_lock); ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, (inum + i))); - /* Inode not in memory or we found it already, - * nothing to do - */ + /* Inode not in memory or stale, nothing to do */ if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { read_unlock(&pag->pag_ici_lock); continue; } - if (xfs_inode_clean(ip)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - - /* If we can get the locks then add it to the - * list, otherwise by the time we get the bp lock - * below it will already be attached to the - * inode buffer. - */ - - /* This inode will already be locked - by us, lets - * keep it that way. - */ - - if (ip == free_ip) { - if (xfs_iflock_nowait(ip)) { - xfs_iflags_set(ip, XFS_ISTALE); - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - } else { - ip_found[found++] = ip; - } - } + /* don't try to lock/unlock the current inode */ + if (ip != free_ip && + !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { read_unlock(&pag->pag_ici_lock); continue; } + read_unlock(&pag->pag_ici_lock); - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - if (xfs_iflock_nowait(ip)) { - xfs_iflags_set(ip, XFS_ISTALE); - - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } else { - ip_found[found++] = ip; - } - } else { + if (!xfs_iflock_nowait(ip)) { + if (ip != free_ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); - } + continue; } - read_unlock(&pag->pag_ici_lock); - } - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, - mp->m_bsize * blks_per_cluster, - XBF_LOCK); - - pre_flushed = 0; - lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); - while (lip) { - if (lip->li_type == XFS_LI_INODE) { - iip = (xfs_inode_log_item_t *)lip; - ASSERT(iip->ili_logged == 1); - lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; - xfs_trans_ail_copy_lsn(mp->m_ail, - &iip->ili_flush_lsn, - &iip->ili_item.li_lsn); - xfs_iflags_set(iip->ili_inode, XFS_ISTALE); - pre_flushed++; + xfs_iflags_set(ip, XFS_ISTALE); + if (xfs_inode_clean(ip)) { + ASSERT(ip != free_ip); + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; } - lip = lip->li_bio_list; - } - for (i = 0; i < found; i++) { - ip = ip_found[i]; iip = ip->i_itemp; - if (!iip) { + /* inode with unlogged changes only */ + ASSERT(ip != free_ip); ip->i_update_core = 0; xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); continue; } + found++; iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; @@ -2078,17 +2058,16 @@ xfs_ifree_cluster( xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done, (xfs_log_item_t *)iip); - if (ip != free_ip) { + + if (ip != free_ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); - } } - if (found || pre_flushed) + if (found) xfs_trans_stale_inode_buf(tp, bp); xfs_trans_binval(tp, bp); } - kmem_free(ip_found); xfs_perag_put(pag); } @@ -2649,8 +2628,6 @@ xfs_iflush_cluster( int i; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - ASSERT(pag->pagi_inodeok); - ASSERT(pag->pag_ici_init); inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9965e40a4615..78550df13cd6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) * xfs_iget.c prototypes. */ int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - uint, uint, xfs_inode_t **, xfs_daddr_t); + uint, uint, xfs_inode_t **); void xfs_iput(xfs_inode_t *, uint); void xfs_iput_new(xfs_inode_t *, uint); void xfs_ilock(xfs_inode_t *, uint); @@ -500,7 +500,7 @@ do { \ * Flags for xfs_iget() */ #define XFS_IGET_CREATE 0x1 -#define XFS_IGET_BULKSTAT 0x2 +#define XFS_IGET_UNTRUSTED 0x2 int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, struct xfs_dinode **, @@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, xfs_daddr_t, uint); + struct xfs_inode *, uint); void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); void xfs_idestroy_fork(struct xfs_inode *, int); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b1b801e4a28e..2b86f8610512 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -49,24 +49,40 @@ xfs_internal_inum( (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); } -STATIC int -xfs_bulkstat_one_iget( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - xfs_daddr_t bno, /* starting bno of inode cluster */ - xfs_bstat_t *buf, /* return buffer */ - int *stat) /* BULKSTAT_RV_... */ +/* + * Return stat information for one inode. + * Return 0 if ok, else errno. + */ +int +xfs_bulkstat_one_int( + struct xfs_mount *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ + int *ubused, /* bytes used by me */ + int *stat) /* BULKSTAT_RV_... */ { - xfs_icdinode_t *dic; /* dinode core info pointer */ - xfs_inode_t *ip; /* incore inode pointer */ - struct inode *inode; - int error; + struct xfs_icdinode *dic; /* dinode core info pointer */ + struct xfs_inode *ip; /* incore inode pointer */ + struct inode *inode; + struct xfs_bstat *buf; /* return buffer */ + int error = 0; /* error value */ + + *stat = BULKSTAT_RV_NOTHING; + + if (!buffer || xfs_internal_inum(mp, ino)) + return XFS_ERROR(EINVAL); + + buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); + if (!buf) + return XFS_ERROR(ENOMEM); error = xfs_iget(mp, NULL, ino, - XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); + XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip); if (error) { *stat = BULKSTAT_RV_NOTHING; - return error; + goto out_free; } ASSERT(ip != NULL); @@ -127,77 +143,16 @@ xfs_bulkstat_one_iget( buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; break; } - xfs_iput(ip, XFS_ILOCK_SHARED); - return error; -} -STATIC void -xfs_bulkstat_one_dinode( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - xfs_dinode_t *dic, /* dinode inode pointer */ - xfs_bstat_t *buf) /* return buffer */ -{ - /* - * The inode format changed when we moved the link count and - * made it 32 bits long. If this is an old format inode, - * convert it in memory to look like a new one. If it gets - * flushed to disk we will convert back before flushing or - * logging it. We zero out the new projid field and the old link - * count field. We'll handle clearing the pad field (the remains - * of the old uuid field) when we actually convert the inode to - * the new format. We don't change the version number so that we - * can distinguish this from a real new format inode. - */ - if (dic->di_version == 1) { - buf->bs_nlink = be16_to_cpu(dic->di_onlink); - buf->bs_projid = 0; - } else { - buf->bs_nlink = be32_to_cpu(dic->di_nlink); - buf->bs_projid = be16_to_cpu(dic->di_projid); - } + error = formatter(buffer, ubsize, ubused, buf); - buf->bs_ino = ino; - buf->bs_mode = be16_to_cpu(dic->di_mode); - buf->bs_uid = be32_to_cpu(dic->di_uid); - buf->bs_gid = be32_to_cpu(dic->di_gid); - buf->bs_size = be64_to_cpu(dic->di_size); - buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); - buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); - buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); - buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); - buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); - buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); - buf->bs_xflags = xfs_dic2xflags(dic); - buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; - buf->bs_extents = be32_to_cpu(dic->di_nextents); - buf->bs_gen = be32_to_cpu(dic->di_gen); - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); - buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); - buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); - buf->bs_aextents = be16_to_cpu(dic->di_anextents); - buf->bs_forkoff = XFS_DFORK_BOFF(dic); + if (!error) + *stat = BULKSTAT_RV_DIDONE; - switch (dic->di_format) { - case XFS_DINODE_FMT_DEV: - buf->bs_rdev = xfs_dinode_get_rdev(dic); - buf->bs_blksize = BLKDEV_IOSIZE; - buf->bs_blocks = 0; - break; - case XFS_DINODE_FMT_LOCAL: - case XFS_DINODE_FMT_UUID: - buf->bs_rdev = 0; - buf->bs_blksize = mp->m_sb.sb_blocksize; - buf->bs_blocks = 0; - break; - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - buf->bs_rdev = 0; - buf->bs_blksize = mp->m_sb.sb_blocksize; - buf->bs_blocks = be64_to_cpu(dic->di_nblocks); - break; - } + out_free: + kmem_free(buf); + return error; } /* Return 0 on success or positive error */ @@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt( return 0; } -/* - * Return stat information for one inode. - * Return 0 if ok, else errno. - */ -int /* error status */ -xfs_bulkstat_one_int( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ - xfs_daddr_t bno, /* starting bno of inode cluster */ - int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ - int *stat) /* BULKSTAT_RV_... */ -{ - xfs_bstat_t *buf; /* return buffer */ - int error = 0; /* error value */ - xfs_dinode_t *dip; /* dinode inode pointer */ - - dip = (xfs_dinode_t *)dibuff; - *stat = BULKSTAT_RV_NOTHING; - - if (!buffer || xfs_internal_inum(mp, ino)) - return XFS_ERROR(EINVAL); - - buf = kmem_alloc(sizeof(*buf), KM_SLEEP); - - if (dip == NULL) { - /* We're not being passed a pointer to a dinode. This happens - * if BULKSTAT_FG_IGET is selected. Do the iget. - */ - error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); - if (error) - goto out_free; - } else { - xfs_bulkstat_one_dinode(mp, ino, dip, buf); - } - - error = formatter(buffer, ubsize, ubused, buf); - if (error) - goto out_free; - - *stat = BULKSTAT_RV_DIDONE; - - out_free: - kmem_free(buf); - return error; -} - int xfs_bulkstat_one( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ - xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ - void *dibuff, /* on-disk inode buffer */ int *stat) /* BULKSTAT_RV_... */ { return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt, bno, - ubused, dibuff, stat); -} - -/* - * Test to see whether we can use the ondisk inode directly, based - * on the given bulkstat flags, filling in dipp accordingly. - * Returns zero if the inode is dodgey. - */ -STATIC int -xfs_bulkstat_use_dinode( - xfs_mount_t *mp, - int flags, - xfs_buf_t *bp, - int clustidx, - xfs_dinode_t **dipp) -{ - xfs_dinode_t *dip; - unsigned int aformat; - - *dipp = NULL; - if (!bp || (flags & BULKSTAT_FG_IGET)) - return 1; - dip = (xfs_dinode_t *) - xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); - /* - * Check the buffer containing the on-disk inode for di_mode == 0. - * This is to prevent xfs_bulkstat from picking up just reclaimed - * inodes that have their in-core state initialized but not flushed - * to disk yet. This is a temporary hack that would require a proper - * fix in the future. - */ - if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || - !XFS_DINODE_GOOD_VERSION(dip->di_version) || - !dip->di_mode) - return 0; - if (flags & BULKSTAT_FG_QUICK) { - *dipp = dip; - return 1; - } - /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ - aformat = dip->di_aformat; - if ((XFS_DFORK_Q(dip) == 0) || - (aformat == XFS_DINODE_FMT_LOCAL) || - (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { - *dipp = dip; - return 1; - } - return 1; + xfs_bulkstat_one_fmt, ubused, stat); } #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) @@ -342,10 +196,8 @@ xfs_bulkstat( xfs_ino_t *lastinop, /* last inode returned */ int *ubcountp, /* size of buffer/count returned */ bulkstat_one_pf formatter, /* func that'd fill a single buf */ - void *private_data,/* private data for formatter */ size_t statstruct_size, /* sizeof struct filling */ char __user *ubuffer, /* buffer with inode stats */ - int flags, /* defined in xfs_itable.h */ int *done) /* 1 if there are more stats to get */ { xfs_agblock_t agbno=0;/* allocation group block number */ @@ -380,14 +232,12 @@ xfs_bulkstat( int ubelem; /* spaces used in user's buffer */ int ubused; /* bytes used by formatter */ xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ - xfs_dinode_t *dip; /* ptr into bp for specific inode */ /* * Get the last inode value, see if there's nothing to do. */ ino = (xfs_ino_t)*lastinop; lastino = ino; - dip = NULL; agno = XFS_INO_TO_AGNO(mp, ino); agino = XFS_INO_TO_AGINO(mp, ino); if (agno >= mp->m_sb.sb_agcount || @@ -612,37 +462,6 @@ xfs_bulkstat( irbp->ir_startino) + ((chunkidx & nimask) >> mp->m_sb.sb_inopblog); - - if (flags & (BULKSTAT_FG_QUICK | - BULKSTAT_FG_INLINE)) { - int offset; - - ino = XFS_AGINO_TO_INO(mp, agno, - agino); - bno = XFS_AGB_TO_DADDR(mp, agno, - agbno); - - /* - * Get the inode cluster buffer - */ - if (bp) - xfs_buf_relse(bp); - - error = xfs_inotobp(mp, NULL, ino, &dip, - &bp, &offset, - XFS_IGET_BULKSTAT); - - if (!error) - clustidx = offset / mp->m_sb.sb_inodesize; - if (XFS_TEST_ERROR(error != 0, - mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, - XFS_RANDOM_BULKSTAT_READ_CHUNK)) { - bp = NULL; - ubleft = 0; - rval = error; - break; - } - } } ino = XFS_AGINO_TO_INO(mp, agno, agino); bno = XFS_AGB_TO_DADDR(mp, agno, agbno); @@ -658,35 +477,13 @@ xfs_bulkstat( * when the chunk is used up. */ irbp->ir_freecount++; - if (!xfs_bulkstat_use_dinode(mp, flags, bp, - clustidx, &dip)) { - lastino = ino; - continue; - } - /* - * If we need to do an iget, cannot hold bp. - * Drop it, until starting the next cluster. - */ - if ((flags & BULKSTAT_FG_INLINE) && !dip) { - if (bp) - xfs_buf_relse(bp); - bp = NULL; - } /* * Get the inode and fill in a single buffer. - * BULKSTAT_FG_QUICK uses dip to fill it in. - * BULKSTAT_FG_IGET uses igets. - * BULKSTAT_FG_INLINE uses dip if we have an - * inline attr fork, else igets. - * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. - * This is also used to count inodes/blks, etc - * in xfs_qm_quotacheck. */ ubused = statstruct_size; - error = formatter(mp, ino, ubufp, - ubleft, private_data, - bno, &ubused, dip, &fmterror); + error = formatter(mp, ino, ubufp, ubleft, + &ubused, &fmterror); if (fmterror == BULKSTAT_RV_NOTHING) { if (error && error != ENOENT && error != EINVAL) { @@ -778,8 +575,7 @@ xfs_bulkstat_single( */ ino = (xfs_ino_t)*lastinop; - error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), - NULL, 0, NULL, NULL, &res); + error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res); if (error) { /* * Special case way failed, do it the "long" way @@ -788,8 +584,7 @@ xfs_bulkstat_single( (*lastinop)--; count = 1; if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, - NULL, sizeof(xfs_bstat_t), buffer, - BULKSTAT_FG_IGET, done)) + sizeof(xfs_bstat_t), buffer, done)) return error; if (count == 0 || (xfs_ino_t)*lastinop != ino) return error == EFSCORRUPTED ? diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 20792bf45946..97295d91d170 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, xfs_ino_t ino, void __user *buffer, int ubsize, - void *private_data, - xfs_daddr_t bno, int *ubused, - void *dip, int *stat); /* @@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, #define BULKSTAT_RV_GIVEUP 2 /* - * Values for bulkstat flag argument. - */ -#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */ -#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */ -#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */ - -/* * Return stat information in bulk (by-inode) for the filesystem. */ int /* error status */ @@ -56,10 +46,8 @@ xfs_bulkstat( xfs_ino_t *lastino, /* last inode returned */ int *count, /* size of buffer/count returned */ bulkstat_one_pf formatter, /* func that'd fill a single buf */ - void *private_data, /* private data for formatter */ size_t statstruct_size,/* sizeof struct that we're filling */ char __user *ubuffer,/* buffer with inode stats */ - int flags, /* flag to control access method */ int *done); /* 1 if there are more stats to get */ int @@ -82,9 +70,7 @@ xfs_bulkstat_one_int( void __user *buffer, int ubsize, bulkstat_one_fmt_pf formatter, - xfs_daddr_t bno, int *ubused, - void *dibuff, int *stat); int @@ -93,10 +79,7 @@ xfs_bulkstat_one( xfs_ino_t ino, void __user *buffer, int ubsize, - void *private_data, - xfs_daddr_t bno, int *ubused, - void *dibuff, int *stat); typedef int (*inumbers_fmt_pf)( diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 14a69aec2c0b..9ac5cfab27b9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -132,15 +132,10 @@ xlog_align( int nbblks, xfs_buf_t *bp) { - xfs_daddr_t offset; - xfs_caddr_t ptr; + xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); - offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1); - ptr = XFS_BUF_PTR(bp) + BBTOB(offset); - - ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp)); - - return ptr; + ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); + return XFS_BUF_PTR(bp) + BBTOB(offset); } @@ -3203,7 +3198,7 @@ xlog_recover_process_one_iunlink( int error; ino = XFS_AGINO_TO_INO(mp, agno, agino); - error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); + error = xfs_iget(mp, NULL, ino, 0, 0, &ip); if (error) goto fail; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d7bf38c8cd1c..69f62d8b2816 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count( #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) - return E2BIG; + return EFBIG; #else /* Limited by UINT_MAX of sectors */ if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) - return E2BIG; + return EFBIG; #endif return 0; } @@ -393,7 +393,7 @@ xfs_mount_validate_sb( xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { xfs_fs_mount_cmn_err(flags, "file system too large to be mounted on this system."); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } if (unlikely(sbp->sb_inprogress)) { @@ -413,17 +413,6 @@ xfs_mount_validate_sb( return 0; } -STATIC void -xfs_initialize_perag_icache( - xfs_perag_t *pag) -{ - if (!pag->pag_ici_init) { - rwlock_init(&pag->pag_ici_lock); - INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); - pag->pag_ici_init = 1; - } -} - int xfs_initialize_perag( xfs_mount_t *mp, @@ -436,13 +425,8 @@ xfs_initialize_perag( xfs_agino_t agino; xfs_ino_t ino; xfs_sb_t *sbp = &mp->m_sb; - xfs_ino_t max_inum = XFS_MAXINUMBER_32; int error = -ENOMEM; - /* Check to see if the filesystem can overflow 32 bit inodes */ - agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); - ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); - /* * Walk the current per-ag tree so we don't try to initialise AGs * that already exist (growfs case). Allocate and insert all the @@ -456,11 +440,18 @@ xfs_initialize_perag( } if (!first_initialised) first_initialised = index; + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); if (!pag) goto out_unwind; + pag->pag_agno = index; + pag->pag_mount = mp; + rwlock_init(&pag->pag_ici_lock); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + if (radix_tree_preload(GFP_NOFS)) goto out_unwind; + spin_lock(&mp->m_perag_lock); if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { BUG(); @@ -469,25 +460,26 @@ xfs_initialize_perag( error = -EEXIST; goto out_unwind; } - pag->pag_agno = index; - pag->pag_mount = mp; spin_unlock(&mp->m_perag_lock); radix_tree_preload_end(); } - /* Clear the mount flag if no inode can overflow 32 bits - * on this filesystem, or if specifically requested.. + /* + * If we mount with the inode64 option, or no inode overflows + * the legacy 32-bit address space clear the inode32 option. */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { + agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); + ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); + + if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) mp->m_flags |= XFS_MOUNT_32BITINODES; - } else { + else mp->m_flags &= ~XFS_MOUNT_32BITINODES; - } - /* If we can overflow then setup the ag headers accordingly */ if (mp->m_flags & XFS_MOUNT_32BITINODES) { - /* Calculate how much should be reserved for inodes to - * meet the max inode percentage. + /* + * Calculate how much should be reserved for inodes to meet + * the max inode percentage. */ if (mp->m_maxicount) { __uint64_t icount; @@ -500,30 +492,28 @@ xfs_initialize_perag( } else { max_metadata = agcount; } + for (index = 0; index < agcount; index++) { ino = XFS_AGINO_TO_INO(mp, index, agino); - if (ino > max_inum) { + if (ino > XFS_MAXINUMBER_32) { index++; break; } - /* This ag is preferred for inodes */ pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; if (index < max_metadata) pag->pagf_metadata = 1; - xfs_initialize_perag_icache(pag); xfs_perag_put(pag); } } else { - /* Setup default behavior for smaller filesystems */ for (index = 0; index < agcount; index++) { pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; - xfs_initialize_perag_icache(pag); xfs_perag_put(pag); } } + if (maxagi) *maxagi = index; return 0; @@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { cmn_err(CE_WARN, "XFS: size check 1 failed"); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } error = xfs_read_buf(mp, mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), @@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp) } else { cmn_err(CE_WARN, "XFS: size check 2 failed"); if (error == ENOSPC) - error = XFS_ERROR(E2BIG); + error = XFS_ERROR(EFBIG); return error; } @@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { cmn_err(CE_WARN, "XFS: size check 3 failed"); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } error = xfs_read_buf(mp, mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp) } else { cmn_err(CE_WARN, "XFS: size check 3 failed"); if (error == ENOSPC) - error = XFS_ERROR(E2BIG); + error = XFS_ERROR(EFBIG); return error; } } @@ -1254,7 +1244,7 @@ xfs_mountfs( * Allocate and initialize the per-ag data. */ spin_lock_init(&mp->m_perag_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); @@ -1310,7 +1300,7 @@ xfs_mountfs( * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. */ - error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); if (error) { cmn_err(CE_WARN, "XFS: failed to read root inode"); goto out_log_dealloc; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6be05f756d59..a2d32ce335aa 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -2247,7 +2247,7 @@ xfs_rtmount_init( cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", (unsigned long long) XFS_BB_TO_FSB(mp, d), (unsigned long long) mp->m_sb.sb_rblocks); - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); } error = xfs_read_buf(mp, mp->m_rtdev_targp, d - XFS_FSB_TO_BB(mp, 1), @@ -2256,7 +2256,7 @@ xfs_rtmount_init( cmn_err(CE_WARN, "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); if (error == ENOSPC) - return XFS_ERROR(E2BIG); + return XFS_ERROR(EFBIG); return error; } xfs_buf_relse(bp); @@ -2277,12 +2277,12 @@ xfs_rtmount_inodes( sbp = &mp->m_sb; if (sbp->sb_rbmino == NULLFSINO) return 0; - error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip); if (error) return error; ASSERT(mp->m_rbmip != NULL); ASSERT(sbp->sb_rsumino != NULLFSINO); - error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); if (error) { IRELE(mp->m_rbmip); return error; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index b2d67adb6a08..ff614c29b441 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -147,7 +147,16 @@ xfs_growfs_rt( # define xfs_rtfree_extent(t,b,l) (ENOSYS) # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) # define xfs_growfs_rt(mp,in) (ENOSYS) -# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) +static inline int /* error */ +xfs_rtmount_init( + xfs_mount_t *mp) /* file system mount structure */ +{ + if (mp->m_sb.sb_rblocks == 0) + return 0; + + cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); + return ENOSYS; +} # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) # define xfs_rtunmount_inodes(m) #endif /* CONFIG_XFS_RT */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ce558efa2ea0..28547dfce037 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -48,134 +48,489 @@ kmem_zone_t *xfs_trans_zone; + /* - * Reservation functions here avoid a huge stack in xfs_trans_init - * due to register overflow from temporaries in the calculations. + * Various log reservation values. + * + * These are based on the size of the file system block because that is what + * most transactions manipulate. Each adds in an additional 128 bytes per + * item logged to try to account for the overhead of the transaction mechanism. + * + * Note: Most of the reservations underestimate the number of allocation + * groups into which they could free extents in the xfs_bmap_finish() call. + * This is because the number in the worst case is quite high and quite + * unusual. In order to fix this we need to change xfs_bmap_finish() to free + * extents in only a single AG at a time. This will require changes to the + * EFI code as well, however, so that the EFI for the extents not freed is + * logged again in each transaction. See SGI PV #261917. + * + * Reservation functions here avoid a huge stack in xfs_trans_init due to + * register overflow from temporaries in the calculations. + */ + + +/* + * In a write transaction we can allocate a maximum of 2 + * extents. This gives: + * the inode getting the new extents: inode size + * the inode's bmap btree: max depth * block size + * the agfs of the ags from which the extents are allocated: 2 * sector + * the superblock free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + * And the bmap_finish transaction can free bmap blocks in a join: + * the agfs of the ags containing the blocks: 2 * sector size + * the agfls of the ags containing the blocks: 2 * sector size + * the super block free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size */ STATIC uint -xfs_calc_write_reservation(xfs_mount_t *mp) +xfs_calc_write_reservation( + struct xfs_mount *mp) { - return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + + XFS_ALLOCFREE_LOG_COUNT(mp, 2))), + (2 * mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); } +/* + * In truncating a file we free up to two extents at once. We can modify: + * the inode being truncated: inode size + * the inode's bmap btree: (max depth + 1) * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ STATIC uint -xfs_calc_itruncate_reservation(xfs_mount_t *mp) +xfs_calc_itruncate_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + + 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), + (4 * mp->m_sb.sb_sectsize + + 4 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 4) + + 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + + 128 * 5 + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * In renaming a files we can modify: + * the four inodes involved: 4 * inode size + * the two directory btrees: 2 * (max depth + v2) * dir block size + * the two directory bmap btrees: 2 * max depth * block size + * And the bmap_finish transaction can free dir and bmap blocks (two sets + * of bmap blocks) giving: + * the agf for the ags in which the blocks live: 3 * sector size + * the agfl for the ags in which the blocks live: 3 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_rename_reservation(xfs_mount_t *mp) +xfs_calc_rename_reservation( + struct xfs_mount *mp) { - return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((4 * mp->m_sb.sb_inodesize + + 2 * XFS_DIROP_LOG_RES(mp) + + 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), + (3 * mp->m_sb.sb_sectsize + + 3 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 3) + + 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))); } +/* + * For creating a link to an inode: + * the parent directory inode: inode size + * the linked inode: inode size + * the directory btree could split: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free some bmap blocks giving: + * the agf for the ag in which the blocks live: sector size + * the agfl for the ag in which the blocks live: sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_link_reservation(xfs_mount_t *mp) +xfs_calc_link_reservation( + struct xfs_mount *mp) { - return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + XFS_DIROP_LOG_RES(mp) + + 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), + (mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * For removing a directory entry we can modify: + * the parent directory inode: inode size + * the removed inode: inode size + * the directory btree could join: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free the dir and bmap blocks giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_remove_reservation(xfs_mount_t *mp) +xfs_calc_remove_reservation( + struct xfs_mount *mp) { - return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + XFS_DIROP_LOG_RES(mp) + + 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), + (2 * mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); } +/* + * For symlink we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: 1 block + * the directory btree: (max depth + v2) * dir block size + * the directory inode's bmap btree: (max depth + v2) * block size + * the blocks for the symlink: 1 kB + * Or in the first xact we allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_symlink_reservation(xfs_mount_t *mp) +xfs_calc_symlink_reservation( + struct xfs_mount *mp) { - return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, 1) + + XFS_DIROP_LOG_RES(mp) + + 1024 + + 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), + (2 * mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + + XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * For create we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: block size + * the superblock for the nlink flag: sector size + * the directory btree: (max depth + v2) * dir block size + * the directory inode's bmap btree: (max depth + v2) * block size + * Or in the first xact we allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the superblock for the nlink flag: sector size + * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ STATIC uint -xfs_calc_create_reservation(xfs_mount_t *mp) +xfs_calc_create_reservation( + struct xfs_mount *mp) { - return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, 1) + + XFS_DIROP_LOG_RES(mp) + + 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), + (3 * mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + + XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); } +/* + * Making a new directory is the same as creating a new file. + */ STATIC uint -xfs_calc_mkdir_reservation(xfs_mount_t *mp) +xfs_calc_mkdir_reservation( + struct xfs_mount *mp) { - return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return xfs_calc_create_reservation(mp); } +/* + * In freeing an inode we can modify: + * the inode being freed: inode size + * the super block free inode counter: sector size + * the agi hash list and counters: sector size + * the inode btree entry: block size + * the on disk inode before ours in the agi hash list: inode cluster size + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ STATIC uint -xfs_calc_ifree_reservation(xfs_mount_t *mp) +xfs_calc_ifree_reservation( + struct xfs_mount *mp) { - return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, 1) + + MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), + XFS_INODE_CLUSTER_SIZE(mp)) + + 128 * 5 + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * When only changing the inode we log the inode and possibly the superblock + * We also add a bit of slop for the transaction stuff. + */ STATIC uint -xfs_calc_ichange_reservation(xfs_mount_t *mp) +xfs_calc_ichange_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + 512; + } +/* + * Growing the data section of the filesystem. + * superblock + * agi and agf + * allocation btrees + */ STATIC uint -xfs_calc_growdata_reservation(xfs_mount_t *mp) +xfs_calc_growdata_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWDATA_LOG_RES(mp); + return mp->m_sb.sb_sectsize * 3 + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * Growing the rt section of the filesystem. + * In the first set of transactions (ALLOC) we allocate space to the + * bitmap or summary files. + * superblock: sector size + * agf of the ag from which the extent is allocated: sector size + * bmap btree for bitmap/summary inode: max depth * blocksize + * bitmap/summary inode: inode size + * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize + */ STATIC uint -xfs_calc_growrtalloc_reservation(xfs_mount_t *mp) +xfs_calc_growrtalloc_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWRTALLOC_LOG_RES(mp); + return 2 * mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + + mp->m_sb.sb_inodesize + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * Growing the rt section of the filesystem. + * In the second set of transactions (ZERO) we zero the new metadata blocks. + * one bitmap/summary block: blocksize + */ STATIC uint -xfs_calc_growrtzero_reservation(xfs_mount_t *mp) +xfs_calc_growrtzero_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWRTZERO_LOG_RES(mp); + return mp->m_sb.sb_blocksize + 128; } +/* + * Growing the rt section of the filesystem. + * In the third set of transactions (FREE) we update metadata without + * allocating any new blocks. + * superblock: sector size + * bitmap inode: inode size + * summary inode: inode size + * one bitmap block: blocksize + * summary blocks: new summary size + */ STATIC uint -xfs_calc_growrtfree_reservation(xfs_mount_t *mp) +xfs_calc_growrtfree_reservation( + struct xfs_mount *mp) { - return XFS_CALC_GROWRTFREE_LOG_RES(mp); + return mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_inodesize + + mp->m_sb.sb_blocksize + + mp->m_rsumsize + + 128 * 5; } +/* + * Logging the inode modification timestamp on a synchronous write. + * inode + */ STATIC uint -xfs_calc_swrite_reservation(xfs_mount_t *mp) +xfs_calc_swrite_reservation( + struct xfs_mount *mp) { - return XFS_CALC_SWRITE_LOG_RES(mp); + return mp->m_sb.sb_inodesize + 128; } +/* + * Logging the inode mode bits when writing a setuid/setgid file + * inode + */ STATIC uint xfs_calc_writeid_reservation(xfs_mount_t *mp) { - return XFS_CALC_WRITEID_LOG_RES(mp); + return mp->m_sb.sb_inodesize + 128; } +/* + * Converting the inode from non-attributed to attributed. + * the inode being converted: inode size + * agf block and superblock (for block allocation) + * the new block (directory sized) + * bmap blocks for the new directory block + * allocation btrees + */ STATIC uint -xfs_calc_addafork_reservation(xfs_mount_t *mp) +xfs_calc_addafork_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize * 2 + + mp->m_dirblksize + + XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + + XFS_ALLOCFREE_LOG_RES(mp, 1) + + 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); } +/* + * Removing the attribute fork of a file + * the inode being truncated: inode size + * the inode's bmap btree: max depth * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_attrinval_reservation(xfs_mount_t *mp) +xfs_calc_attrinval_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ATTRINVAL_LOG_RES(mp); + return MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + + 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), + (4 * mp->m_sb.sb_sectsize + + 4 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 4) + + 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))); } +/* + * Setting an attribute. + * the inode getting the attribute + * the superblock for allocations + * the agfs extents are allocated from + * the attribute btree * max depth + * the inode allocation btree + * Since attribute transaction space is dependent on the size of the attribute, + * the calculation is done partially at mount time and partially at runtime. + */ STATIC uint -xfs_calc_attrset_reservation(xfs_mount_t *mp) +xfs_calc_attrset_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + mp->m_sb.sb_inodesize + + mp->m_sb.sb_sectsize + + XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + + 128 * (2 + XFS_DA_NODE_MAXDEPTH); } +/* + * Removing an attribute. + * the inode: inode size + * the attribute btree could join: max depth * block size + * the inode bmap btree could join or split: max depth * block size + * And the bmap_finish transaction can free the attr blocks freed giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ STATIC uint -xfs_calc_attrrm_reservation(xfs_mount_t *mp) +xfs_calc_attrrm_reservation( + struct xfs_mount *mp) { - return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); + return XFS_DQUOT_LOGRES(mp) + + MAX((mp->m_sb.sb_inodesize + + XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + + XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + + 128 * (1 + XFS_DA_NODE_MAXDEPTH + + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), + (2 * mp->m_sb.sb_sectsize + + 2 * mp->m_sb.sb_sectsize + + mp->m_sb.sb_sectsize + + XFS_ALLOCFREE_LOG_RES(mp, 2) + + 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))); } +/* + * Clearing a bad agino number in an agi hash bucket. + */ STATIC uint -xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) +xfs_calc_clear_agi_bucket_reservation( + struct xfs_mount *mp) { - return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); + return mp->m_sb.sb_sectsize + 128; } /* @@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) */ void xfs_trans_init( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_trans_reservations_t *resp; + struct xfs_trans_reservations *resp = &mp->m_reservations; - resp = &(mp->m_reservations); resp->tr_write = xfs_calc_write_reservation(mp); resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); resp->tr_rename = xfs_calc_rename_reservation(mp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 8c69e7824f68..e639e8e9a2a9 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) /* - * Various log reservation values. - * These are based on the size of the file system block - * because that is what most transactions manipulate. - * Each adds in an additional 128 bytes per item logged to - * try to account for the overhead of the transaction mechanism. - * - * Note: - * Most of the reservations underestimate the number of allocation - * groups into which they could free extents in the xfs_bmap_finish() - * call. This is because the number in the worst case is quite high - * and quite unusual. In order to fix this we need to change - * xfs_bmap_finish() to free extents in only a single AG at a time. - * This will require changes to the EFI code as well, however, so that - * the EFI for the extents not freed is logged again in each transaction. - * See bug 261917. - */ - -/* * Per-extent log reservation for the allocation btree changes * involved in freeing or allocating an extent. * 2 trees * (2 blocks/level * max depth - 1) * block size @@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) -/* - * In a write transaction we can allocate a maximum of 2 - * extents. This gives: - * the inode getting the new extents: inode size - * the inode's bmap btree: max depth * block size - * the agfs of the ags from which the extents are allocated: 2 * sector - * the superblock free block counter: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - * And the bmap_finish transaction can free bmap blocks in a join: - * the agfs of the ags containing the blocks: 2 * sector size - * the agfls of the ags containing the blocks: 2 * sector size - * the super block free block counter: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_WRITE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\ - ((2 * (mp)->m_sb.sb_sectsize) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) #define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write) - -/* - * In truncating a file we free up to two extents at once. We can modify: - * the inode being truncated: inode size - * the inode's bmap btree: (max depth + 1) * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: - * the agf for each of the ags: 4 * sector size - * the agfl for each of the ags: 4 * sector size - * the super block to reflect the freed blocks: sector size - * worst case split in allocation btrees per extent assuming 4 extents: - * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \ - (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \ - ((4 * (mp)->m_sb.sb_sectsize) + \ - (4 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 4) + \ - (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \ - (128 * 5) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) - -/* - * In renaming a files we can modify: - * the four inodes involved: 4 * inode size - * the two directory btrees: 2 * (max depth + v2) * dir block size - * the two directory bmap btrees: 2 * max depth * block size - * And the bmap_finish transaction can free dir and bmap blocks (two sets - * of bmap blocks) giving: - * the agf for the ags in which the blocks live: 3 * sector size - * the agfl for the ags in which the blocks live: 3 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_RENAME_LOG_RES(mp) \ - (MAX( \ - ((4 * (mp)->m_sb.sb_inodesize) + \ - (2 * XFS_DIROP_LOG_RES(mp)) + \ - (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \ - ((3 * (mp)->m_sb.sb_sectsize) + \ - (3 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 3) + \ - (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))))) - #define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename) - -/* - * For creating a link to an inode: - * the parent directory inode: inode size - * the linked inode: inode size - * the directory btree could split: (max depth + v2) * dir block size - * the directory bmap btree could join or split: (max depth + v2) * blocksize - * And the bmap_finish transaction can free some bmap blocks giving: - * the agf for the ag in which the blocks live: sector size - * the agfl for the ag in which the blocks live: sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_LINK_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - XFS_DIROP_LOG_RES(mp) + \ - (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \ - ((mp)->m_sb.sb_sectsize + \ - (mp)->m_sb.sb_sectsize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link) - -/* - * For removing a directory entry we can modify: - * the parent directory inode: inode size - * the removed inode: inode size - * the directory btree could join: (max depth + v2) * dir block size - * the directory bmap btree could join or split: (max depth + v2) * blocksize - * And the bmap_finish transaction can free the dir and bmap blocks giving: - * the agf for the ag in which the blocks live: 2 * sector size - * the agfl for the ag in which the blocks live: 2 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_REMOVE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - XFS_DIROP_LOG_RES(mp) + \ - (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \ - ((2 * (mp)->m_sb.sb_sectsize) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) - #define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove) - -/* - * For symlink we can modify: - * the parent directory inode: inode size - * the new inode: inode size - * the inode btree entry: 1 block - * the directory btree: (max depth + v2) * dir block size - * the directory inode's bmap btree: (max depth + v2) * block size - * the blocks for the symlink: 1 kB - * Or in the first xact we allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_SYMLINK_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B(mp, 1) + \ - XFS_DIROP_LOG_RES(mp) + \ - 1024 + \ - (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \ - (2 * (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ - XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) - -/* - * For create we can modify: - * the parent directory inode: inode size - * the new inode: inode size - * the inode btree entry: block size - * the superblock for the nlink flag: sector size - * the directory btree: (max depth + v2) * dir block size - * the directory inode's bmap btree: (max depth + v2) * block size - * Or in the first xact we allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -#define XFS_CALC_CREATE_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B(mp, 1) + \ - XFS_DIROP_LOG_RES(mp) + \ - (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \ - (3 * (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ - XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) - #define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) - -/* - * Making a new directory is the same as creating a new file. - */ -#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp) - #define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir) - -/* - * In freeing an inode we can modify: - * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -#define XFS_CALC_IFREE_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), 1) + \ - MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ - (128 * 5) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - - #define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) - -/* - * When only changing the inode we log the inode and possibly the superblock - * We also add a bit of slop for the transaction stuff. - */ -#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + 512) - #define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange) - -/* - * Growing the data section of the filesystem. - * superblock - * agi and agf - * allocation btrees - */ -#define XFS_CALC_GROWDATA_LOG_RES(mp) \ - ((mp)->m_sb.sb_sectsize * 3 + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - #define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata) - -/* - * Growing the rt section of the filesystem. - * In the first set of transactions (ALLOC) we allocate space to the - * bitmap or summary files. - * superblock: sector size - * agf of the ag from which the extent is allocated: sector size - * bmap btree for bitmap/summary inode: max depth * blocksize - * bitmap/summary inode: inode size - * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize - */ -#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \ - (2 * (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \ - (mp)->m_sb.sb_inodesize + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * \ - (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - #define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc) - -/* - * Growing the rt section of the filesystem. - * In the second set of transactions (ZERO) we zero the new metadata blocks. - * one bitmap/summary block: blocksize - */ -#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \ - ((mp)->m_sb.sb_blocksize + 128) - #define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero) - -/* - * Growing the rt section of the filesystem. - * In the third set of transactions (FREE) we update metadata without - * allocating any new blocks. - * superblock: sector size - * bitmap inode: inode size - * summary inode: inode size - * one bitmap block: blocksize - * summary blocks: new summary size - */ -#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \ - ((mp)->m_sb.sb_sectsize + \ - 2 * (mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_blocksize + \ - (mp)->m_rsumsize + \ - (128 * 5)) - #define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree) - -/* - * Logging the inode modification timestamp on a synchronous write. - * inode - */ -#define XFS_CALC_SWRITE_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + 128) - #define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) - /* * Logging the inode timestamps on an fsync -- same as SWRITE * as long as SWRITE logs the entire inode core */ #define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) - -/* - * Logging the inode mode bits when writing a setuid/setgid file - * inode - */ -#define XFS_CALC_WRITEID_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + 128) - #define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) - -/* - * Converting the inode from non-attributed to attributed. - * the inode being converted: inode size - * agf block and superblock (for block allocation) - * the new block (directory sized) - * bmap blocks for the new directory block - * allocation btrees - */ -#define XFS_CALC_ADDAFORK_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize * 2 + \ - (mp)->m_dirblksize + \ - XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \ - XFS_ALLOCFREE_LOG_RES(mp, 1) + \ - (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \ - XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) - #define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) - -/* - * Removing the attribute fork of a file - * the inode being truncated: inode size - * the inode's bmap btree: max depth * block size - * And the bmap_finish transaction can free the blocks and bmap blocks: - * the agf for each of the ags: 4 * sector size - * the agfl for each of the ags: 4 * sector size - * the super block to reflect the freed blocks: sector size - * worst case split in allocation btrees per extent assuming 4 extents: - * 4 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \ - (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \ - ((4 * (mp)->m_sb.sb_sectsize) + \ - (4 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 4) + \ - (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))))) - #define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) - -/* - * Setting an attribute. - * the inode getting the attribute - * the superblock for allocations - * the agfs extents are allocated from - * the attribute btree * max depth - * the inode allocation btree - * Since attribute transaction space is dependent on the size of the attribute, - * the calculation is done partially at mount time and partially at runtime. - */ -#define XFS_CALC_ATTRSET_LOG_RES(mp) \ - ((mp)->m_sb.sb_inodesize + \ - (mp)->m_sb.sb_sectsize + \ - XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \ - (128 * (2 + XFS_DA_NODE_MAXDEPTH))) - #define XFS_ATTRSET_LOG_RES(mp, ext) \ ((mp)->m_reservations.tr_attrset + \ (ext * (mp)->m_sb.sb_sectsize) + \ (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) - -/* - * Removing an attribute. - * the inode: inode size - * the attribute btree could join: max depth * block size - * the inode bmap btree could join or split: max depth * block size - * And the bmap_finish transaction can free the attr blocks freed giving: - * the agf for the ag in which the blocks live: 2 * sector size - * the agfl for the ag in which the blocks live: 2 * sector size - * the superblock for the free block count: sector size - * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size - */ -#define XFS_CALC_ATTRRM_LOG_RES(mp) \ - (MAX( \ - ((mp)->m_sb.sb_inodesize + \ - XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \ - XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \ - (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \ - ((2 * (mp)->m_sb.sb_sectsize) + \ - (2 * (mp)->m_sb.sb_sectsize) + \ - (mp)->m_sb.sb_sectsize + \ - XFS_ALLOCFREE_LOG_RES(mp, 2) + \ - (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) - #define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) - -/* - * Clearing a bad agino number in an agi hash bucket. - */ -#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \ - ((mp)->m_sb.sb_sectsize + 128) - #define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 785ff101da0a..2559dfec946b 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -62,7 +62,7 @@ xfs_trans_iget( { int error; - error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); + error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); if (!error && tp) xfs_trans_ijoin(tp, *ipp, lock_flags); return error; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9d376be0ea38..c1646838898f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -267,7 +267,7 @@ xfs_setattr( if (code) { ASSERT(tp == NULL); lock_flags &= ~XFS_ILOCK_EXCL; - ASSERT(lock_flags == XFS_IOLOCK_EXCL); + ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock); goto error_return; } tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); @@ -1269,7 +1269,7 @@ xfs_lookup( if (error) goto out; - error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); + error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); if (error) goto out_free_name; |