summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/Kconfig21
-rw-r--r--fs/adfs/dir.c1
-rw-r--r--fs/affs/dir.c1
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/binfmt_flat.c4
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/bio.c57
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/cifs/CHANGES16
-rw-r--r--fs/cifs/README44
-rw-r--r--fs/cifs/asn1.c263
-rw-r--r--fs/cifs/cifs_debug.c53
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_spnego.c22
-rw-r--r--fs/cifs/cifs_spnego.h2
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/cifsfs.c73
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h24
-rw-r--r--fs/cifs/cifssmb.c46
-rw-r--r--fs/cifs/connect.c186
-rw-r--r--fs/cifs/dir.c67
-rw-r--r--fs/cifs/dns_resolve.c7
-rw-r--r--fs/cifs/file.c25
-rw-r--r--fs/cifs/inode.c423
-rw-r--r--fs/cifs/sess.c13
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/compat.c8
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c219
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/cramfs/inode.c84
-rw-r--r--fs/dcache.c102
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/dlm/config.c203
-rw-r--r--fs/dlm/user.c10
-rw-r--r--fs/dquot.c33
-rw-r--r--fs/efs/namei.c3
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/ext3/super.c3
-rw-r--r--fs/ext4/acl.c188
-rw-r--r--fs/ext4/balloc.c14
-rw-r--r--fs/ext4/dir.c20
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/ext4_extents.h4
-rw-r--r--fs/ext4/ext4_jbd2.h8
-rw-r--r--fs/ext4/extents.c168
-rw-r--r--fs/ext4/ialloc.c60
-rw-r--r--fs/ext4/inode.c634
-rw-r--r--fs/ext4/mballoc.c293
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/migrate.c3
-rw-r--r--fs/ext4/resize.c82
-rw-r--r--fs/ext4/super.c320
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fcntl.c140
-rw-r--r--fs/file.c61
-rw-r--r--fs/inode.c1
-rw-r--r--fs/ioprio.c8
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c1
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/jffs2_fs_i.h1
-rw-r--r--fs/jffs2/summary.c40
-rw-r--r--fs/jffs2/summary.h6
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c17
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/nfs4acl.c2
-rw-r--r--fs/nfsd/nfs4proc.c17
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/namei.c89
-rw-r--r--fs/ntfs/usnjrnl.h4
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/cluster/netdebug.c26
-rw-r--r--fs/ocfs2/cluster/tcp.c44
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h32
-rw-r--r--fs/ocfs2/dir.c11
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c192
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/stackglue.c7
-rw-r--r--fs/ocfs2/super.c12
-rw-r--r--fs/omfs/bitmap.c11
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/omfs/file.c39
-rw-r--r--fs/omfs/inode.c6
-rw-r--r--fs/open.c56
-rw-r--r--fs/proc/array.c59
-rw-r--r--fs/proc/base.c11
-rw-r--r--fs/proc/generic.c28
-rw-r--r--fs/proc/nommu.c4
-rw-r--r--fs/proc/proc_misc.c7
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/super.c17
-rw-r--r--fs/romfs/inode.c37
-rw-r--r--fs/seq_file.c25
-rw-r--r--fs/splice.c2
-rw-r--r--fs/ubifs/budget.c147
-rw-r--r--fs/ubifs/commit.c3
-rw-r--r--fs/ubifs/debug.c27
-rw-r--r--fs/ubifs/debug.h143
-rw-r--r--fs/ubifs/dir.c25
-rw-r--r--fs/ubifs/file.c28
-rw-r--r--fs/ubifs/find.c27
-rw-r--r--fs/ubifs/gc.c6
-rw-r--r--fs/ubifs/io.c14
-rw-r--r--fs/ubifs/journal.c110
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/misc.h63
-rw-r--r--fs/ubifs/orphan.c4
-rw-r--r--fs/ubifs/super.c70
-rw-r--r--fs/ubifs/tnc.c116
-rw-r--r--fs/ubifs/tnc_commit.c37
-rw-r--r--fs/ubifs/ubifs-media.h6
-rw-r--r--fs/ubifs/ubifs.h47
-rw-r--r--fs/ubifs/xattr.c54
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/linux-2.6/kmem.c6
-rw-r--r--fs/xfs/linux-2.6/kmem.h4
-rw-r--r--fs/xfs/linux-2.6/sema.h52
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c30
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c378
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c531
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c1087
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c72
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h154
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c330
-rw-r--r--fs/xfs/quota/xfs_dquot.c41
-rw-r--r--fs/xfs/quota/xfs_dquot.h31
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c12
-rw-r--r--fs/xfs/quota/xfs_qm.c38
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c16
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h3
-rw-r--r--fs/xfs/support/ktrace.c4
-rw-r--r--fs/xfs/support/uuid.c8
-rw-r--r--fs/xfs/support/uuid.h1
-rw-r--r--fs/xfs/xfs_acl.c69
-rw-r--r--fs/xfs/xfs_acl.h18
-rw-r--r--fs/xfs/xfs_arch.h68
-rw-r--r--fs/xfs/xfs_attr.c718
-rw-r--r--fs/xfs/xfs_attr.h91
-rw-r--r--fs/xfs/xfs_attr_leaf.c174
-rw-r--r--fs/xfs/xfs_attr_leaf.h31
-rw-r--r--fs/xfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/xfs_bit.c103
-rw-r--r--fs/xfs/xfs_bit.h34
-rw-r--r--fs/xfs/xfs_bmap.c152
-rw-r--r--fs/xfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_btree.c76
-rw-r--r--fs/xfs/xfs_btree.c105
-rw-r--r--fs/xfs/xfs_btree.h8
-rw-r--r--fs/xfs/xfs_buf_item.c12
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_da_btree.c48
-rw-r--r--fs/xfs/xfs_da_btree.h36
-rw-r--r--fs/xfs/xfs_dfrag.c37
-rw-r--r--fs/xfs/xfs_dir2.c125
-rw-r--r--fs/xfs/xfs_dir2.h6
-rw-r--r--fs/xfs/xfs_dir2_block.c56
-rw-r--r--fs/xfs/xfs_dir2_data.c5
-rw-r--r--fs/xfs/xfs_dir2_leaf.c93
-rw-r--r--fs/xfs/xfs_dir2_node.c402
-rw-r--r--fs/xfs/xfs_dir2_sf.c83
-rw-r--r--fs/xfs/xfs_dir2_sf.h6
-rw-r--r--fs/xfs/xfs_dir2_trace.c20
-rw-r--r--fs/xfs/xfs_dmapi.h3
-rw-r--r--fs/xfs/xfs_error.c18
-rw-r--r--fs/xfs/xfs_error.h13
-rw-r--r--fs/xfs/xfs_extfree_item.c6
-rw-r--r--fs/xfs/xfs_filestream.c6
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc_btree.c30
-rw-r--r--fs/xfs/xfs_iget.c48
-rw-r--r--fs/xfs/xfs_inode.c235
-rw-r--r--fs/xfs/xfs_inode.h49
-rw-r--r--fs/xfs/xfs_inode_item.c18
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_itable.c10
-rw-r--r--fs/xfs/xfs_log.c135
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_priv.h20
-rw-r--r--fs/xfs/xfs_log_recover.c28
-rw-r--r--fs/xfs/xfs_mount.c188
-rw-r--r--fs/xfs/xfs_mount.h32
-rw-r--r--fs/xfs/xfs_mru_cache.c21
-rw-r--r--fs/xfs/xfs_rename.c22
-rw-r--r--fs/xfs/xfs_rtalloc.c21
-rw-r--r--fs/xfs/xfs_rw.c2
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c79
-rw-r--r--fs/xfs/xfs_trans.h12
-rw-r--r--fs/xfs/xfs_trans_buf.c12
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_item.c74
-rw-r--r--fs/xfs/xfs_utils.c4
-rw-r--r--fs/xfs/xfs_utils.h3
-rw-r--r--fs/xfs/xfs_vfsops.c623
-rw-r--r--fs/xfs/xfs_vfsops.h5
-rw-r--r--fs/xfs/xfs_vnodeops.c908
-rw-r--r--fs/xfs/xfs_vnodeops.h12
242 files changed, 7428 insertions, 6816 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 88e3787c6ea9..e298fe194093 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
const struct file_operations v9fs_dir_operations = {
.read = generic_read_dir,
+ .llseek = generic_file_llseek,
.readdir = v9fs_dir_readdir,
.open = v9fs_file_open,
.release = v9fs_dir_release,
diff --git a/fs/Kconfig b/fs/Kconfig
index d3873583360b..abccb5dab9a8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH
If unsure, say N.
+config CIFS_UPCALL
+ bool "Kerberos/SPNEGO advanced session setup"
+ depends on CIFS && KEYS
+ help
+ Enables an upcall mechanism for CIFS which accesses
+ userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+ Kerberos tickets which are needed to mount to certain secure servers
+ (for which more secure Kerberos authentication is required). If
+ unsure, say N.
+
config CIFS_XATTR
bool "CIFS extended attributes"
depends on CIFS
@@ -1982,17 +1992,6 @@ config CIFS_EXPERIMENTAL
(which is disabled by default). See the file fs/cifs/README
for more details. If unsure, say N.
-config CIFS_UPCALL
- bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
- depends on CIFS_EXPERIMENTAL
- depends on KEYS
- help
- Enables an upcall mechanism for CIFS which accesses
- userspace helper utilities to provide SPNEGO packaged (RFC 4178)
- Kerberos tickets which are needed to mount to certain secure servers
- (for which more secure Kerberos authentication is required). If
- unsure, say N.
-
config CIFS_DFS_UPCALL
bool "DFS feature support (EXPERIMENTAL)"
depends on CIFS_EXPERIMENTAL
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index fc1a8dc64d78..85a30e929800 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,6 +197,7 @@ out:
const struct file_operations adfs_dir_operations = {
.read = generic_read_dir,
+ .llseek = generic_file_llseek,
.readdir = adfs_readdir,
.fsync = file_fsync,
};
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 6e3f282424b0..7b36904dbeac 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t);
const struct file_operations affs_dir_operations = {
.read = generic_read_dir,
+ .llseek = generic_file_llseek,
.readdir = affs_readdir,
.fsync = file_fsync,
};
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c37..78db4953a800 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
}
mntget(newmnt);
- err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+ err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
switch (err) {
case 0:
path_put(&nd->path);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
page = pages[loop];
if (page->index > wb->last)
break;
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
break;
if (!PageDirty(page) ||
page_private(page) != (unsigned long) wb) {
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index bcfb2dc0a61b..2a41c2a7fc52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = {
.release = dcache_dir_close,
.read = generic_read_dir,
.readdir = dcache_readdir,
+ .llseek = dcache_dir_lseek,
.ioctl = autofs4_root_ioctl,
};
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = {
.release = dcache_dir_close,
.read = generic_read_dir,
.readdir = dcache_readdir,
+ .llseek = dcache_dir_lseek,
};
const struct inode_operations autofs4_indirect_root_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 02c6e62b72f8..740f53672a8a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep;
static const struct file_operations befs_dir_operations = {
.read = generic_read_dir,
.readdir = befs_readdir,
+ .llseek = generic_file_llseek,
};
static const struct inode_operations befs_dir_inode_operations = {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 56372ecf1690..dfc0197905ca 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
/* Stash our initial stack pointer into the mm structure */
current->mm->start_stack = (unsigned long )sp;
-
+#ifdef FLAT_PLAT_INIT
+ FLAT_PLAT_INIT(regs);
+#endif
DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n",
(int)regs, (int)start_addr, (int)current->mm->start_stack);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 756205314c24..8d7e88e02e0f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (bprm->misc_bang)
goto _ret;
- bprm->misc_bang = 1;
-
/* to keep locking time low, we copy the interpreter string */
read_lock(&entries_lock);
fmt = check_file(bprm);
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (retval < 0)
goto _error;
+ bprm->misc_bang = 1;
+
retval = search_binary_handler (bprm, regs);
if (retval < 0)
goto _error;
diff --git a/fs/bio.c b/fs/bio.c
index 25f1af0d81e5..3cba7ae34d75 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
*/
bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
- if (bvl) {
- struct biovec_slab *bp = bvec_slabs + *idx;
-
- memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
- }
+ if (bvl)
+ memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
return bvl;
}
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
goto out;
}
bio->bi_flags |= idx << BIO_POOL_OFFSET;
- bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+ bio->bi_max_vecs = bvec_nr_vecs(idx);
}
bio->bi_io_vec = bvl;
}
@@ -472,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd)
kfree(bmd);
}
-static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
+static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
+ gfp_t gfp_mask)
{
- struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL);
+ struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
if (!bmd)
return NULL;
- bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL);
+ bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
if (!bmd->iovecs) {
kfree(bmd);
return NULL;
}
- bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL);
+ bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
if (bmd->sgvecs)
return bmd;
@@ -494,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
return NULL;
}
-static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
- int uncopy)
+static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
+ struct sg_iovec *iov, int iov_count, int uncopy)
{
int ret = 0, i;
struct bio_vec *bvec;
@@ -505,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
__bio_for_each_segment(bvec, bio, i, 0) {
char *bv_addr = page_address(bvec->bv_page);
- unsigned int bv_len = bvec->bv_len;
+ unsigned int bv_len = iovecs[i].bv_len;
while (bv_len && iov_idx < iov_count) {
unsigned int bytes;
@@ -557,7 +555,7 @@ int bio_uncopy_user(struct bio *bio)
struct bio_map_data *bmd = bio->bi_private;
int ret;
- ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1);
+ ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
bio_free_map_data(bmd);
bio_put(bio);
@@ -599,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
len += iov[i].iov_len;
}
- bmd = bio_alloc_map_data(nr_pages, iov_count);
+ bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
if (!bmd)
return ERR_PTR(-ENOMEM);
@@ -636,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
* success
*/
if (!write_to_vm) {
- ret = __bio_copy_iov(bio, iov, iov_count, 0);
+ ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
if (ret)
goto cleanup;
}
@@ -945,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
{
struct bio_vec *bvec;
const int read = bio_data_dir(bio) == READ;
- char *p = bio->bi_private;
+ struct bio_map_data *bmd = bio->bi_private;
int i;
+ char *p = bmd->sgvecs[0].iov_base;
__bio_for_each_segment(bvec, bio, i, 0) {
char *addr = page_address(bvec->bv_page);
+ int len = bmd->iovecs[i].bv_len;
if (read && !err)
- memcpy(p, addr, bvec->bv_len);
+ memcpy(p, addr, len);
__free_page(bvec->bv_page);
- p += bvec->bv_len;
+ p += len;
}
+ bio_free_map_data(bmd);
bio_put(bio);
}
@@ -981,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
const int nr_pages = end - start;
struct bio *bio;
struct bio_vec *bvec;
+ struct bio_map_data *bmd;
int i, ret;
+ struct sg_iovec iov;
+
+ iov.iov_base = data;
+ iov.iov_len = len;
+
+ bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
+ if (!bmd)
+ return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
bio = bio_alloc(gfp_mask, nr_pages);
if (!bio)
- return ERR_PTR(-ENOMEM);
+ goto out_bmd;
while (len) {
struct page *page;
@@ -1019,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
}
}
- bio->bi_private = data;
+ bio->bi_private = bmd;
bio->bi_end_io = bio_copy_kern_endio;
+
+ bio_set_map_data(bmd, bio, &iov, 1);
return bio;
cleanup:
bio_for_each_segment(bvec, bio, i)
__free_page(bvec->bv_page);
bio_put(bio);
+out_bmd:
+ bio_free_map_data(bmd);
return ERR_PTR(ret);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dcf37cada369..aff54219e049 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
* hooks: /n/, see "layering violations".
*/
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0)
+ if (ret != 0) {
+ bdput(bdev);
return ret;
+ }
ret = -ENXIO;
file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
bdev = ERR_PTR(error);
goto out;
}
+EXPORT_SYMBOL(lookup_bdev);
/**
* open_bdev_excl - open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index ca12a6bb82b1..ac78d4c19b3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
/*
* The buffer's backing address_space's private_lock must be held
*/
-static inline void __remove_assoc_queue(struct buffer_head *bh)
+static void __remove_assoc_queue(struct buffer_head *bh)
{
list_del_init(&bh->b_assoc_buffers);
WARN_ON(!bh->b_assoc_map);
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
*/
if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
lock_buffer(bh);
- } else if (test_set_buffer_locked(bh)) {
+ } else if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
continue;
}
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh)
BUG_ON(!buffer_mapped(bh));
BUG_ON(!bh->b_end_io);
- if (buffer_ordered(bh) && (rw == WRITE))
- rw = WRITE_BARRIER;
+ /*
+ * Mask in barrier bit for a write (could be either a WRITE or a
+ * WRITE_SYNC
+ */
+ if (buffer_ordered(bh) && (rw & WRITE))
+ rw |= WRITE_BARRIER;
/*
- * Only clear out a write error when rewriting, should this
- * include WRITE_SYNC as well?
+ * Only clear out a write error when rewriting
*/
- if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
+ if (test_set_buffer_req(bh) && (rw & WRITE))
clear_buffer_write_io_error(bh);
/*
@@ -3000,7 +3003,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
if (rw == SWRITE || rw == SWRITE_SYNC)
lock_buffer(bh);
- else if (test_set_buffer_locked(bh))
+ else if (!trylock_buffer(bh))
continue;
if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fdf..06e521a945c3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,19 @@
+Version 1.54
+------------
+Fix premature write failure on congested networks (we would give up
+on EAGAIN from the socket too quickly on large writes).
+Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
+Fix endian problems in acl (mode from/to cifs acl) on bigendian
+architectures. Fix problems with preserving timestamps on copying open
+files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit
+on parent directory when server supports Unix Extensions but not POSIX
+create. Update cifs.upcall version to handle new Kerberos sec flags
+(this requires update of cifs.upcall program from Samba). Fix memory leak
+on dns_upcall (resolving DFS referralls). Fix plain text password
+authentication (requires setting SecurityFlags to 0x30030 to enable
+lanman and plain text though). Fix writes to be at correct offset when
+file is open with O_APPEND and file is on a directio (forcediretio) mount.
+
Version 1.53
------------
DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/README b/fs/cifs/README
index 2bd6fe556f88..bd2343d4c6a6 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and
hashing mechanisms (as "must use") on the other hand
does not make much sense. Default flags are
0x07007
- (NTLM, NTLMv2 and packet signing allowed). Maximum
+ (NTLM, NTLMv2 and packet signing allowed). The maximum
allowable flags if you want to allow mounts to servers
using weaker password hashes is 0x37037 (lanman,
- plaintext, ntlm, ntlmv2, signing allowed):
+ plaintext, ntlm, ntlmv2, signing allowed). Some
+ SecurityFlags require the corresponding menuconfig
+ options to be enabled (lanman and plaintext require
+ CONFIG_CIFS_WEAK_PW_HASH for example). Enabling
+ plaintext authentication currently requires also
+ enabling lanman authentication in the security flags
+ because the cifs module only supports sending
+ laintext passwords using the older lanman dialect
+ form of the session setup SMB. (e.g. for authentication
+ using plain text passwords, set the SecurityFlags
+ to 0x30030):
may use packet signing 0x00001
must use packet signing 0x01001
@@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in
that they represent all for that share, not just those for which the server
returned success.
-Also note that "cat /proc/fs/cifs/DebugData" will display information about
+Also note that "cat /proc/fs/cifs/DebugData" will display information about
the active sessions and the shares that are mounted.
-Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is
-on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and
-LANMAN support do not require this helper.
+
+Enabling Kerberos (extended security) works but requires version 1.2 or later
+of the helper program cifs.upcall to be present and to be configured in the
+/etc/request-key.conf file. The cifs.upcall helper program is from the Samba
+project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
+require this helper. Note that NTLMv2 security (which does not require the
+cifs.upcall helper program), instead of using Kerberos, is sufficient for
+some use cases.
+
+Enabling DFS support (used to access shares transparently in an MS-DFS
+global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In
+addition, DFS support for target shares which are specified as UNC
+names which begin with host names (rather than IP addresses) requires
+a user space helper (such as cifs.upcall) to be present in order to
+translate host names to ip address, and the user space helper must also
+be configured in the file /etc/request-key.conf
+
+To use cifs Kerberos and DFS support, the Linux keyutils package should be
+installed and something like the following lines should be added to the
+/etc/request-key.conf file:
+
+create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
+create dns_resolver * * /usr/local/sbin/cifs.upcall %k
+
+
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 6bb440b257b0..1b09f1670061 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
unsigned int cls, con, tag, oidlen, rc;
bool use_ntlmssp = false;
bool use_kerberos = false;
+ bool use_mskerberos = false;
*secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
@@ -483,6 +484,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
asn1_open(&ctx, security_blob, length);
+ /* GSSAPI header */
if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
cFYI(1, ("Error decoding negTokenInit header"));
return 0;
@@ -490,156 +492,149 @@ decode_negTokenInit(unsigned char *security_blob, int length,
|| (tag != ASN1_EOC)) {
cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
return 0;
- } else {
- /* remember to free obj->oid */
- rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
- if (rc) {
- if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
- rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
- if (rc) {
- rc = compare_oid(oid, oidlen,
- SPNEGO_OID,
- SPNEGO_OID_LEN);
- kfree(oid);
- }
- } else
- rc = 0;
- }
+ }
- if (!rc) {
- cFYI(1, ("Error decoding negTokenInit header"));
- return 0;
- }
+ /* Check for SPNEGO OID -- remember to free obj->oid */
+ rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+ if (rc) {
+ if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
+ (cls == ASN1_UNI)) {
+ rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
+ if (rc) {
+ rc = compare_oid(oid, oidlen, SPNEGO_OID,
+ SPNEGO_OID_LEN);
+ kfree(oid);
+ }
+ } else
+ rc = 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding negTokenInit"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
- || (tag != ASN1_EOC)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
- cls, con, tag, end, *end));
- return 0;
- }
+ /* SPNEGO OID not present or garbled -- bail out */
+ if (!rc) {
+ cFYI(1, ("Error decoding negTokenInit header"));
+ return 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding negTokenInit"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
- cls, con, tag, end, *end));
- return 0;
- }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+ || (tag != ASN1_EOC)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding 2nd part of negTokenInit"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
- || (tag != ASN1_EOC)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
- cls, con, tag, end, *end));
- return 0;
- }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+ || (tag != ASN1_SEQ)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- if (asn1_header_decode
- (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
- cFYI(1, ("Error decoding 2nd part of negTokenInit"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1,
- ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
- cls, con, tag, end, *end));
- return 0;
- }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+ || (tag != ASN1_EOC)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- while (!asn1_eoc_decode(&ctx, sequence_end)) {
- rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
- if (!rc) {
- cFYI(1,
- ("Error decoding negTokenInit hdr exit2"));
- return 0;
- }
- if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
- if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
-
- cFYI(1,
- ("OID len = %d oid = 0x%lx 0x%lx "
- "0x%lx 0x%lx",
- oidlen, *oid, *(oid + 1),
- *(oid + 2), *(oid + 3)));
-
- if (compare_oid(oid, oidlen,
- MSKRB5_OID,
- MSKRB5_OID_LEN))
- use_kerberos = true;
- else if (compare_oid(oid, oidlen,
- KRB5_OID,
- KRB5_OID_LEN))
- use_kerberos = true;
- else if (compare_oid(oid, oidlen,
- NTLMSSP_OID,
- NTLMSSP_OID_LEN))
- use_ntlmssp = true;
-
- kfree(oid);
- }
- } else {
- cFYI(1, ("Should be an oid what is going on?"));
- }
- }
+ if (asn1_header_decode
+ (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+ || (tag != ASN1_SEQ)) {
+ cFYI(1,
+ ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+ cls, con, tag, end, *end));
+ return 0;
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit3"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
- /* tag = 3 indicating mechListMIC */
+ while (!asn1_eoc_decode(&ctx, sequence_end)) {
+ rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+ if (!rc) {
cFYI(1,
- ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
+ ("Error decoding negTokenInit hdr exit2"));
return 0;
}
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit5"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
+ if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
+ if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
+
+ cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
+ "0x%lx 0x%lx", oidlen, *oid,
+ *(oid + 1), *(oid + 2), *(oid + 3)));
+
+ if (compare_oid(oid, oidlen, MSKRB5_OID,
+ MSKRB5_OID_LEN) &&
+ !use_kerberos)
+ use_mskerberos = true;
+ else if (compare_oid(oid, oidlen, KRB5_OID,
+ KRB5_OID_LEN) &&
+ !use_mskerberos)
+ use_kerberos = true;
+ else if (compare_oid(oid, oidlen, NTLMSSP_OID,
+ NTLMSSP_OID_LEN))
+ use_ntlmssp = true;
+
+ kfree(oid);
+ }
+ } else {
+ cFYI(1, ("Should be an oid what is going on?"));
}
+ }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit 7"));
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
- cFYI(1,
- ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
- return 0;
- }
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1,
- ("Error decoding last part negTokenInit exit9"));
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
- || (tag != ASN1_GENSTR)) {
- cFYI(1,
- ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end));
- return 0;
- }
- cFYI(1, ("Need to call asn1_octets_decode() function for %s",
- ctx.pointer)); /* is this UTF-8 or ASCII? */
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit3"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+ /* tag = 3 indicating mechListMIC */
+ cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ return 0;
+ }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit5"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+ || (tag != ASN1_SEQ)) {
+ cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ }
+
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit 7"));
+ return 0;
+ } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+ cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ return 0;
+ }
+ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+ cFYI(1, ("Error decoding last part negTokenInit exit9"));
+ return 0;
+ } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
+ || (tag != ASN1_GENSTR)) {
+ cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
+ cls, con, tag, end, *end));
+ return 0;
}
+ cFYI(1, ("Need to call asn1_octets_decode() function for %s",
+ ctx.pointer)); /* is this UTF-8 or ASCII? */
if (use_kerberos)
*secType = Kerberos;
+ else if (use_mskerberos)
+ *secType = MSKerberos;
else if (use_ntlmssp)
*secType = NTLMSSP;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 688a2d42153f..69a12aae91d3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
spin_lock(&GlobalMid_Lock);
list_for_each(tmp, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
- if (mid_entry) {
- cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
- mid_entry->midState,
- (int)mid_entry->command,
- mid_entry->pid,
- mid_entry->tsk,
- mid_entry->mid));
+ cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+ mid_entry->midState,
+ (int)mid_entry->command,
+ mid_entry->pid,
+ mid_entry->tsk,
+ mid_entry->mid));
#ifdef CONFIG_CIFS_STATS2
- cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
- mid_entry->largeBuf,
- mid_entry->resp_buf,
- mid_entry->when_received,
- jiffies));
+ cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+ mid_entry->largeBuf,
+ mid_entry->resp_buf,
+ mid_entry->when_received,
+ jiffies));
#endif /* STATS2 */
- cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
- mid_entry->multiEnd));
- if (mid_entry->resp_buf) {
- cifs_dump_detail(mid_entry->resp_buf);
- cifs_dump_mem("existing buf: ",
- mid_entry->resp_buf, 62);
- }
+ cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+ mid_entry->multiEnd));
+ if (mid_entry->resp_buf) {
+ cifs_dump_detail(mid_entry->resp_buf);
+ cifs_dump_mem("existing buf: ",
+ mid_entry->resp_buf, 62);
}
}
spin_unlock(&GlobalMid_Lock);
@@ -163,16 +161,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
mid_entry = list_entry(tmp1, struct
mid_q_entry,
qhead);
- if (mid_entry) {
- seq_printf(m,
- "State: %d com: %d pid:"
- " %d tsk: %p mid %d\n",
- mid_entry->midState,
- (int)mid_entry->command,
- mid_entry->pid,
- mid_entry->tsk,
- mid_entry->mid);
- }
+ seq_printf(m, "State: %d com: %d pid:"
+ " %d tsk: %p mid %d\n",
+ mid_entry->midState,
+ (int)mid_entry->command,
+ mid_entry->pid,
+ mid_entry->tsk,
+ mid_entry->mid);
}
spin_unlock(&GlobalMid_Lock);
}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e329..d2c8eef84f3c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
int err;
mntget(newmnt);
- err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+ err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
switch (err) {
case 0:
path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6aed..117ef4bba68e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
.describe = user_describe,
};
-#define MAX_VER_STR_LEN 9 /* length of longest version string e.g.
- strlen(";ver=0xFF") */
+#define MAX_VER_STR_LEN 8 /* length of longest version string e.g.
+ strlen("ver=0xFF") */
#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
in future could have strlen(";sec=ntlmsspi") */
#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
struct key *spnego_key;
const char *hostname = server->hostname;
- /* BB: come up with better scheme for determining length */
- /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host=
- hostname sec=mechanism uid=0x uid */
- desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 +
- strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2);
+ /* length of fields (with semicolons): ver=0xyz ip4=ipaddress
+ host=hostname sec=mechanism uid=0xFF user=username */
+ desc_len = MAX_VER_STR_LEN +
+ 6 /* len of "host=" */ + strlen(hostname) +
+ 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
+ MAX_MECH_STR_LEN +
+ 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
+ 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
+
spnego_key = ERR_PTR(-ENOMEM);
description = kzalloc(desc_len, GFP_KERNEL);
if (description == NULL)
@@ -110,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
dp = description + strlen(description);
- /* for now, only sec=krb5 is valid */
+ /* for now, only sec=krb5 and sec=mskrb5 are valid */
if (server->secType == Kerberos)
sprintf(dp, ";sec=krb5");
+ else if (server->secType == MSKerberos)
+ sprintf(dp, ";sec=mskrb5");
else
goto out;
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index 05a34b17a1ab..e4041ec4d712 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -23,7 +23,7 @@
#ifndef _CIFS_SPNEGO_H
#define _CIFS_SPNEGO_H
-#define CIFS_SPNEGO_UPCALL_VERSION 1
+#define CIFS_SPNEGO_UPCALL_VERSION 2
/*
* The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION.
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 83fd40dc1ef0..bd5f13d38450 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
if (extended_security & CIFSSEC_MAY_PLNTXT) {
+ memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
memcpy(lnm_session_key, password_with_pad,
CIFS_ENCPWD_SIZE);
return;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1ec7076f7b24..25ecbd5b0404 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -175,6 +175,8 @@ out_no_root:
if (inode)
iput(inode);
+ cifs_umount(sb, cifs_sb);
+
out_mount_failed:
if (cifs_sb) {
#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -930,36 +932,34 @@ static int cifs_oplock_thread(void *dummyarg)
schedule_timeout(39*HZ);
} else {
oplock_item = list_entry(GlobalOplock_Q.next,
- struct oplock_q_entry, qhead);
- if (oplock_item) {
- cFYI(1, ("found oplock item to write out"));
- pTcon = oplock_item->tcon;
- inode = oplock_item->pinode;
- netfid = oplock_item->netfid;
- spin_unlock(&GlobalMid_Lock);
- DeleteOplockQEntry(oplock_item);
- /* can not grab inode sem here since it would
+ struct oplock_q_entry, qhead);
+ cFYI(1, ("found oplock item to write out"));
+ pTcon = oplock_item->tcon;
+ inode = oplock_item->pinode;
+ netfid = oplock_item->netfid;
+ spin_unlock(&GlobalMid_Lock);
+ DeleteOplockQEntry(oplock_item);
+ /* can not grab inode sem here since it would
deadlock when oplock received on delete
since vfs_unlink holds the i_mutex across
the call */
- /* mutex_lock(&inode->i_mutex);*/
- if (S_ISREG(inode->i_mode)) {
- rc =
- filemap_fdatawrite(inode->i_mapping);
- if (CIFS_I(inode)->clientCanCacheRead
- == 0) {
- waitrc = filemap_fdatawait(inode->i_mapping);
- invalidate_remote_inode(inode);
- }
- if (rc == 0)
- rc = waitrc;
- } else
- rc = 0;
- /* mutex_unlock(&inode->i_mutex);*/
- if (rc)
- CIFS_I(inode)->write_behind_rc = rc;
- cFYI(1, ("Oplock flush inode %p rc %d",
- inode, rc));
+ /* mutex_lock(&inode->i_mutex);*/
+ if (S_ISREG(inode->i_mode)) {
+ rc = filemap_fdatawrite(inode->i_mapping);
+ if (CIFS_I(inode)->clientCanCacheRead == 0) {
+ waitrc = filemap_fdatawait(
+ inode->i_mapping);
+ invalidate_remote_inode(inode);
+ }
+ if (rc == 0)
+ rc = waitrc;
+ } else
+ rc = 0;
+ /* mutex_unlock(&inode->i_mutex);*/
+ if (rc)
+ CIFS_I(inode)->write_behind_rc = rc;
+ cFYI(1, ("Oplock flush inode %p rc %d",
+ inode, rc));
/* releasing stale oplock after recent reconnect
of smb session using a now incorrect file
@@ -967,15 +967,13 @@ static int cifs_oplock_thread(void *dummyarg)
not bother sending an oplock release if session
to server still is disconnected since oplock
already released by the server in that case */
- if (pTcon->tidStatus != CifsNeedReconnect) {
- rc = CIFSSMBLock(0, pTcon, netfid,
- 0 /* len */ , 0 /* offset */, 0,
- 0, LOCKING_ANDX_OPLOCK_RELEASE,
- false /* wait flag */);
- cFYI(1, ("Oplock release rc = %d", rc));
- }
- } else
- spin_unlock(&GlobalMid_Lock);
+ if (pTcon->tidStatus != CifsNeedReconnect) {
+ rc = CIFSSMBLock(0, pTcon, netfid,
+ 0 /* len */ , 0 /* offset */, 0,
+ 0, LOCKING_ANDX_OPLOCK_RELEASE,
+ false /* wait flag */);
+ cFYI(1, ("Oplock release rc = %d", rc));
+ }
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1); /* yield in case q were corrupt */
}
@@ -1001,8 +999,7 @@ static int cifs_dnotify_thread(void *dummyarg)
list_for_each(tmp, &GlobalSMBSessionList) {
ses = list_entry(tmp, struct cifsSesInfo,
cifsSessionList);
- if (ses && ses->server &&
- atomic_read(&ses->server->inFlight))
+ if (ses->server && atomic_read(&ses->server->inFlight))
wake_up_all(&ses->server->response_q);
}
read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd15529..135c965c4137 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.53"
+#define CIFS_VERSION "1.54"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7e1cf262effe..8dfd6f24d488 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -80,7 +80,8 @@ enum securityEnum {
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO */
NTLMSSP, /* NTLMSSP via SPNEGO */
- Kerberos /* Kerberos via SPNEGO */
+ Kerberos, /* Kerberos via SPNEGO */
+ MSKerberos, /* MS Kerberos via SPNEGO */
};
enum protocolEnum {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 409abce12732..d2a073edd1b8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -262,7 +262,7 @@
*/
#define CIFS_NO_HANDLE 0xFFFF
-#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL)
+#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
#define NO_CHANGE_32 0xFFFFFFFFUL
/* IPC$ in ASCII */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f821..a729d083e6f4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
struct kstatfs *FSData);
-extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon,
+extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
const char *fileName, const FILE_BASIC_INFO *data,
const struct nls_table *nls_codepage,
int remap_special_chars);
-extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
- const FILE_BASIC_INFO *data, __u16 fid);
+extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+ const FILE_BASIC_INFO *data, __u16 fid,
+ __u32 pid_of_opener);
#if 0
extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
char *fileName, __u16 dos_attributes,
@@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
__u64 size, __u16 fileHandle, __u32 opener_pid,
bool AllocSizeFlag);
-extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon,
- char *full_path, __u64 mode, __u64 uid,
- __u64 gid, dev_t dev,
+
+struct cifs_unix_set_info_args {
+ __u64 ctime;
+ __u64 atime;
+ __u64 mtime;
+ __u64 mode;
+ __u64 uid;
+ __u64 gid;
+ dev_t device;
+};
+
+extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
+ char *fileName,
+ const struct cifs_unix_set_info_args *args,
const struct nls_table *nls_codepage,
int remap_special_chars);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c621ffa2ca90..994de7c90474 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
write_lock(&GlobalSMBSeslock);
list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
open_file = list_entry(tmp, struct cifsFileInfo, tlist);
- if (open_file)
- open_file->invalidHandle = true;
+ open_file->invalidHandle = true;
}
write_unlock(&GlobalSMBSeslock);
/* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -4816,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
time and resort to the original setpathinfo level which takes the ancient
DOS time format with 2 second granularity */
int
-CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
- const FILE_BASIC_INFO *data, __u16 fid)
+CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
+ const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
{
struct smb_com_transaction2_sfi_req *pSMB = NULL;
char *data_offset;
@@ -4830,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
if (rc)
return rc;
- /* At this point there is no need to override the current pid
- with the pid of the opener, but that could change if we someday
- use an existing handle (rather than opening one on the fly) */
- /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
- pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
+ pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
+ pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
params = 6;
pSMB->MaxSetupCount = 0;
@@ -4882,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
int
-CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName,
- const FILE_BASIC_INFO *data,
- const struct nls_table *nls_codepage, int remap)
+CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
+ const char *fileName, const FILE_BASIC_INFO *data,
+ const struct nls_table *nls_codepage, int remap)
{
TRANSACTION2_SPI_REQ *pSMB = NULL;
TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5013,10 +5009,9 @@ SetAttrLgcyRetry:
#endif /* temporarily unneeded SetAttr legacy function */
int
-CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
- char *fileName, __u64 mode, __u64 uid, __u64 gid,
- dev_t device, const struct nls_table *nls_codepage,
- int remap)
+CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
+ const struct cifs_unix_set_info_args *args,
+ const struct nls_table *nls_codepage, int remap)
{
TRANSACTION2_SPI_REQ *pSMB = NULL;
TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5025,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
int bytes_returned = 0;
FILE_UNIX_BASIC_INFO *data_offset;
__u16 params, param_offset, offset, count, byte_count;
+ __u64 mode = args->mode;
cFYI(1, ("In SetUID/GID/Mode"));
setPermsRetry:
@@ -5080,16 +5076,16 @@ setPermsRetry:
set file size and do not want to truncate file size to zero
accidently as happened on one Samba server beta by putting
zero instead of -1 here */
- data_offset->EndOfFile = NO_CHANGE_64;
- data_offset->NumOfBytes = NO_CHANGE_64;
- data_offset->LastStatusChange = NO_CHANGE_64;
- data_offset->LastAccessTime = NO_CHANGE_64;
- data_offset->LastModificationTime = NO_CHANGE_64;
- data_offset->Uid = cpu_to_le64(uid);
- data_offset->Gid = cpu_to_le64(gid);
+ data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
+ data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
+ data_offset->LastStatusChange = cpu_to_le64(args->ctime);
+ data_offset->LastAccessTime = cpu_to_le64(args->atime);
+ data_offset->LastModificationTime = cpu_to_le64(args->mtime);
+ data_offset->Uid = cpu_to_le64(args->uid);
+ data_offset->Gid = cpu_to_le64(args->gid);
/* better to leave device as zero when it is */
- data_offset->DevMajor = cpu_to_le64(MAJOR(device));
- data_offset->DevMinor = cpu_to_le64(MINOR(device));
+ data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
+ data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
data_offset->Permissions = cpu_to_le64(mode);
if (S_ISREG(mode))
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b51d5777cde6..4c13bcdb92a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
list_for_each(tmp, &GlobalTreeConnectionList) {
tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
- if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
+ if ((tcon->ses) && (tcon->ses->server == server))
tcon->tidStatus = CifsNeedReconnect;
}
read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
mid_entry = list_entry(tmp, struct
mid_q_entry,
qhead);
- if (mid_entry) {
- if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
+ if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
/* Mark other intransit requests as needing
retry so we do not immediately mark the
session bad again (ie after we reconnect
below) as they timeout too */
- mid_entry->midState = MID_RETRY_NEEDED;
- }
+ mid_entry->midState = MID_RETRY_NEEDED;
}
}
spin_unlock(&GlobalMid_Lock);
@@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
current->flags |= PF_MEMALLOC;
cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
- write_lock(&GlobalSMBSeslock);
- atomic_inc(&tcpSesAllocCount);
- length = tcpSesAllocCount.counter;
- write_unlock(&GlobalSMBSeslock);
- if (length > 1)
+
+ length = atomic_inc_return(&tcpSesAllocCount);
+ if (length > 1)
mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
GFP_KERNEL);
@@ -745,14 +741,11 @@ multi_t2_fnd:
coming home not much else we can do but free the memory */
}
- write_lock(&GlobalSMBSeslock);
- atomic_dec(&tcpSesAllocCount);
- length = tcpSesAllocCount.counter;
-
/* last chance to mark ses pointers invalid
if there are any pointing to this (e.g
if a crazy root user tried to kill cifsd
kernel thread explicitly this might happen) */
+ write_lock(&GlobalSMBSeslock);
list_for_each(tmp, &GlobalSMBSessionList) {
ses = list_entry(tmp, struct cifsSesInfo,
cifsSessionList);
@@ -763,6 +756,8 @@ multi_t2_fnd:
kfree(server->hostname);
kfree(server);
+
+ length = atomic_dec_return(&tcpSesAllocCount);
if (length > 0)
mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
GFP_KERNEL);
@@ -3603,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
char ntlm_session_key[CIFS_SESS_KEY_SIZE];
bool ntlmv2_flag = false;
int first_time = 0;
+ struct TCP_Server_Info *server = pSesInfo->server;
/* what if server changes its buffer size after dropping the session? */
- if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ {
+ if (server->maxBuf == 0) /* no need to send on reconnect */ {
rc = CIFSSMBNegotiate(xid, pSesInfo);
- if (rc == -EAGAIN) /* retry only once on 1st time connection */ {
+ if (rc == -EAGAIN) {
+ /* retry only once on 1st time connection */
rc = CIFSSMBNegotiate(xid, pSesInfo);
if (rc == -EAGAIN)
rc = -EHOSTDOWN;
}
if (rc == 0) {
spin_lock(&GlobalMid_Lock);
- if (pSesInfo->server->tcpStatus != CifsExiting)
- pSesInfo->server->tcpStatus = CifsGood;
+ if (server->tcpStatus != CifsExiting)
+ server->tcpStatus = CifsGood;
else
rc = -EHOSTDOWN;
spin_unlock(&GlobalMid_Lock);
@@ -3623,97 +3620,90 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
}
first_time = 1;
}
- if (!rc) {
- pSesInfo->flags = 0;
- pSesInfo->capabilities = pSesInfo->server->capabilities;
- if (linuxExtEnabled == 0)
- pSesInfo->capabilities &= (~CAP_UNIX);
+
+ if (rc)
+ goto ss_err_exit;
+
+ pSesInfo->flags = 0;
+ pSesInfo->capabilities = server->capabilities;
+ if (linuxExtEnabled == 0)
+ pSesInfo->capabilities &= (~CAP_UNIX);
/* pSesInfo->sequence_number = 0;*/
- cFYI(1,
- ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
- pSesInfo->server->secMode,
- pSesInfo->server->capabilities,
- pSesInfo->server->timeAdj));
- if (experimEnabled < 2)
- rc = CIFS_SessSetup(xid, pSesInfo,
- first_time, nls_info);
- else if (extended_security
- && (pSesInfo->capabilities
- & CAP_EXTENDED_SECURITY)
- && (pSesInfo->server->secType == NTLMSSP)) {
- rc = -EOPNOTSUPP;
- } else if (extended_security
- && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
- && (pSesInfo->server->secType == RawNTLMSSP)) {
- cFYI(1, ("NTLMSSP sesssetup"));
- rc = CIFSNTLMSSPNegotiateSessSetup(xid,
- pSesInfo,
- &ntlmv2_flag,
- nls_info);
- if (!rc) {
- if (ntlmv2_flag) {
- char *v2_response;
- cFYI(1, ("more secure NTLM ver2 hash"));
- if (CalcNTLMv2_partial_mac_key(pSesInfo,
- nls_info)) {
- rc = -ENOMEM;
- goto ss_err_exit;
- } else
- v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL);
- if (v2_response) {
- CalcNTLMv2_response(pSesInfo,
- v2_response);
- /* if (first_time)
- cifs_calculate_ntlmv2_mac_key(
- pSesInfo->server->mac_signing_key,
- response, ntlm_session_key,*/
- kfree(v2_response);
+ cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
+ server->secMode, server->capabilities, server->timeAdj));
+
+ if (experimEnabled < 2)
+ rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
+ else if (extended_security
+ && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+ && (server->secType == NTLMSSP)) {
+ rc = -EOPNOTSUPP;
+ } else if (extended_security
+ && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+ && (server->secType == RawNTLMSSP)) {
+ cFYI(1, ("NTLMSSP sesssetup"));
+ rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
+ nls_info);
+ if (!rc) {
+ if (ntlmv2_flag) {
+ char *v2_response;
+ cFYI(1, ("more secure NTLM ver2 hash"));
+ if (CalcNTLMv2_partial_mac_key(pSesInfo,
+ nls_info)) {
+ rc = -ENOMEM;
+ goto ss_err_exit;
+ } else
+ v2_response = kmalloc(16 + 64 /* blob*/,
+ GFP_KERNEL);
+ if (v2_response) {
+ CalcNTLMv2_response(pSesInfo,
+ v2_response);
+ /* if (first_time)
+ cifs_calculate_ntlmv2_mac_key */
+ kfree(v2_response);
/* BB Put dummy sig in SessSetup PDU? */
- } else {
- rc = -ENOMEM;
- goto ss_err_exit;
- }
-
} else {
- SMBNTencrypt(pSesInfo->password,
- pSesInfo->server->cryptKey,
- ntlm_session_key);
-
- if (first_time)
- cifs_calculate_mac_key(
- &pSesInfo->server->mac_signing_key,
- ntlm_session_key,
- pSesInfo->password);
+ rc = -ENOMEM;
+ goto ss_err_exit;
}
+
+ } else {
+ SMBNTencrypt(pSesInfo->password,
+ server->cryptKey,
+ ntlm_session_key);
+
+ if (first_time)
+ cifs_calculate_mac_key(
+ &server->mac_signing_key,
+ ntlm_session_key,
+ pSesInfo->password);
+ }
/* for better security the weaker lanman hash not sent
in AuthSessSetup so we no longer calculate it */
- rc = CIFSNTLMSSPAuthSessSetup(xid,
- pSesInfo,
- ntlm_session_key,
- ntlmv2_flag,
- nls_info);
- }
- } else { /* old style NTLM 0.12 session setup */
- SMBNTencrypt(pSesInfo->password,
- pSesInfo->server->cryptKey,
- ntlm_session_key);
+ rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
+ ntlm_session_key,
+ ntlmv2_flag,
+ nls_info);
+ }
+ } else { /* old style NTLM 0.12 session setup */
+ SMBNTencrypt(pSesInfo->password, server->cryptKey,
+ ntlm_session_key);
- if (first_time)
- cifs_calculate_mac_key(
- &pSesInfo->server->mac_signing_key,
- ntlm_session_key, pSesInfo->password);
+ if (first_time)
+ cifs_calculate_mac_key(&server->mac_signing_key,
+ ntlm_session_key,
+ pSesInfo->password);
- rc = CIFSSessSetup(xid, pSesInfo,
- ntlm_session_key, nls_info);
- }
- if (rc) {
- cERROR(1, ("Send error in SessSetup = %d", rc));
- } else {
- cFYI(1, ("CIFS Session Established successfully"));
+ rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
+ }
+ if (rc) {
+ cERROR(1, ("Send error in SessSetup = %d", rc));
+ } else {
+ cFYI(1, ("CIFS Session Established successfully"));
pSesInfo->status = CifsGood;
- }
}
+
ss_err_exit:
return rc;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c9..e962e75e6f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
/* If Open reported that we actually created a file
then we now have to set the mode if possible */
if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+ struct cifs_unix_set_info_args args = {
+ .mode = mode,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
+
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
- (__u64)current->fsuid,
- (__u64)current->fsgid,
- 0 /* dev */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = (__u64) current->fsuid;
+ if (inode->i_mode & S_ISGID)
+ args.gid = (__u64) inode->i_gid;
+ else
+ args.gid = (__u64) current->fsgid;
} else {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
- (__u64)-1,
- (__u64)-1,
- 0 /* dev */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
}
+ CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else {
/* BB implement mode setting via Windows security
descriptors e.g. */
@@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_SET_UID)) {
newinode->i_uid = current->fsuid;
- newinode->i_gid = current->fsgid;
+ if (inode->i_mode & S_ISGID)
+ newinode->i_gid =
+ inode->i_gid;
+ else
+ newinode->i_gid =
+ current->fsgid;
}
}
}
@@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
if (full_path == NULL)
rc = -ENOMEM;
else if (pTcon->unix_ext) {
- mode &= ~current->fs->umask;
+ struct cifs_unix_set_info_args args = {
+ .mode = mode & ~current->fs->umask,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = device_number,
+ };
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- mode, (__u64)current->fsuid,
- (__u64)current->fsgid,
- device_number, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = (__u64) current->fsuid;
+ args.gid = (__u64) current->fsgid;
} else {
- rc = CIFSSMBUnixSetPerms(xid, pTcon,
- full_path, mode, (__u64)-1, (__u64)-1,
- device_number, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
}
+ rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
+ &args, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
if (!rc) {
rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index f730ef35499e..a2e0673e1b08 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data,
return rc;
}
+static void
+dns_resolver_destroy(struct key *key)
+{
+ kfree(key->payload.data);
+}
+
struct key_type key_type_dns_resolver = {
.name = "dns_resolver",
.def_datalen = sizeof(struct in_addr),
.describe = user_describe,
.instantiate = dns_resolver_instantiate,
+ .destroy = dns_resolver_destroy,
.match = user_match,
};
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..cbefe1f1f9fe 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
if (pTcon->unix_ext) {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- inode->i_mode,
- (__u64)-1, (__u64)-1, 0 /* dev */,
+ struct cifs_unix_set_info_args args = {
+ .mode = inode->i_mode,
+ .uid = NO_CHANGE_64,
+ .gid = NO_CHANGE_64,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
+ CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
- } else {
- /* BB implement via Windows security descriptors eg
- CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
- -1, -1, local_nls);
- in the meantime could set r/o dos attribute when
- perms are eg: mode & 0222 == 0 */
}
}
@@ -832,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
return -EBADF;
open_file = (struct cifsFileInfo *) file->private_data;
+ rc = generic_write_checks(file, poffset, &write_size, 0);
+ if (rc)
+ return rc;
+
xid = GetXid();
if (*poffset > file->f_path.dentry->d_inode->i_size)
@@ -1280,7 +1285,7 @@ retry:
if (first < 0)
lock_page(page);
- else if (TestSetPageLocked(page))
+ else if (!trylock_page(page))
break;
if (unlikely(page->mapping != mapping)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 46e54d39461d..9c548f110102 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode,
if ((inode->i_mode & S_IWUGO) == 0 &&
(attr & ATTR_READONLY) == 0)
inode->i_mode |= (S_IWUGO & default_mode);
- inode->i_mode &= ~S_IFMT;
+
+ inode->i_mode &= ~S_IFMT;
}
/* clear write bits if ATTR_READONLY is set */
if (attr & ATTR_READONLY)
@@ -649,6 +650,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &simple_dir_operations;
inode->i_uid = cifs_sb->mnt_uid;
inode->i_gid = cifs_sb->mnt_gid;
+ } else if (rc) {
_FreeXid(xid);
iget_failed(inode);
return ERR_PTR(rc);
@@ -737,7 +739,7 @@ psx_del_no_retry:
/* ATTRS set to normal clears r/o bit */
pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
if (!(pTcon->ses->flags & CIFS_SES_NT4))
- rc = CIFSSMBSetTimes(xid, pTcon, full_path,
+ rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
pinfo_buf,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
@@ -767,9 +769,10 @@ psx_del_no_retry:
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
if (rc == 0) {
- rc = CIFSSMBSetFileTimes(xid, pTcon,
- pinfo_buf,
- netfid);
+ rc = CIFSSMBSetFileInfo(xid, pTcon,
+ pinfo_buf,
+ netfid,
+ current->tgid);
CIFSSMBClose(xid, pTcon, netfid);
}
}
@@ -984,32 +987,41 @@ mkdir_get_info:
* failed to get it from the server or was set bogus */
if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
direntry->d_inode->i_nlink = 2;
+
mode &= ~current->fs->umask;
+ /* must turn on setgid bit if parent dir has it */
+ if (inode->i_mode & S_ISGID)
+ mode |= S_ISGID;
+
if (pTcon->unix_ext) {
+ struct cifs_unix_set_info_args args = {
+ .mode = mode,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- mode,
- (__u64)current->fsuid,
- (__u64)current->fsgid,
- 0 /* dev_t */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = (__u64)current->fsuid;
+ if (inode->i_mode & S_ISGID)
+ args.gid = (__u64)inode->i_gid;
+ else
+ args.gid = (__u64)current->fsgid;
} else {
- CIFSSMBUnixSetPerms(xid, pTcon, full_path,
- mode, (__u64)-1,
- (__u64)-1, 0 /* dev_t */,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
}
+ CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else {
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
(mode & S_IWUGO) == 0) {
FILE_BASIC_INFO pInfo;
memset(&pInfo, 0, sizeof(pInfo));
pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
- CIFSSMBSetTimes(xid, pTcon, full_path,
+ CIFSSMBSetPathInfo(xid, pTcon, full_path,
&pInfo, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1024,8 +1036,12 @@ mkdir_get_info:
CIFS_MOUNT_SET_UID) {
direntry->d_inode->i_uid =
current->fsuid;
- direntry->d_inode->i_gid =
- current->fsgid;
+ if (inode->i_mode & S_ISGID)
+ direntry->d_inode->i_gid =
+ inode->i_gid;
+ else
+ direntry->d_inode->i_gid =
+ current->fsgid;
}
}
}
@@ -1310,10 +1326,11 @@ int cifs_revalidate(struct dentry *direntry)
/* if (S_ISDIR(direntry->d_inode->i_mode))
shrink_dcache_parent(direntry); */
if (S_ISREG(direntry->d_inode->i_mode)) {
- if (direntry->d_inode->i_mapping)
+ if (direntry->d_inode->i_mapping) {
wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
if (wbrc)
CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
+ }
/* may eventually have to do this for open files too */
if (list_empty(&(cifsInode->openFileList))) {
/* changed on server - flush read ahead pages */
@@ -1489,30 +1506,228 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
return rc;
}
-int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+static int
+cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
+ char *full_path, __u32 dosattr)
{
+ int rc;
+ int oplock = 0;
+ __u16 netfid;
+ __u32 netpid;
+ bool set_time = false;
+ struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+ FILE_BASIC_INFO info_buf;
+
+ if (attrs->ia_valid & ATTR_ATIME) {
+ set_time = true;
+ info_buf.LastAccessTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
+ } else
+ info_buf.LastAccessTime = 0;
+
+ if (attrs->ia_valid & ATTR_MTIME) {
+ set_time = true;
+ info_buf.LastWriteTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
+ } else
+ info_buf.LastWriteTime = 0;
+
+ /*
+ * Samba throws this field away, but windows may actually use it.
+ * Do not set ctime unless other time stamps are changed explicitly
+ * (i.e. by utimes()) since we would then have a mix of client and
+ * server times.
+ */
+ if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
+ cFYI(1, ("CIFS - CTIME changed"));
+ info_buf.ChangeTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
+ } else
+ info_buf.ChangeTime = 0;
+
+ info_buf.CreationTime = 0; /* don't change */
+ info_buf.Attributes = cpu_to_le32(dosattr);
+
+ /*
+ * If the file is already open for write, just use that fileid
+ */
+ open_file = find_writable_file(cifsInode);
+ if (open_file) {
+ netfid = open_file->netfid;
+ netpid = open_file->pid;
+ goto set_via_filehandle;
+ }
+
+ /*
+ * NT4 apparently returns success on this call, but it doesn't
+ * really work.
+ */
+ if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
+ rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
+ &info_buf, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc != -EOPNOTSUPP && rc != -EINVAL)
+ goto out;
+ }
+
+ cFYI(1, ("calling SetFileInfo since SetPathInfo for "
+ "times not supported by this server"));
+ rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
+ SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
+ CREATE_NOT_DIR, &netfid, &oplock,
+ NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (rc != 0) {
+ if (rc == -EIO)
+ rc = -EINVAL;
+ goto out;
+ }
+
+ netpid = current->tgid;
+
+set_via_filehandle:
+ rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
+ if (open_file == NULL)
+ CIFSSMBClose(xid, pTcon, netfid);
+ else
+ atomic_dec(&open_file->wrtPending);
+out:
+ return rc;
+}
+
+static int
+cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
+{
+ int rc;
int xid;
- struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *pTcon;
char *full_path = NULL;
- int rc = -EACCES;
- FILE_BASIC_INFO time_buf;
- bool set_time = false;
- bool set_dosattr = false;
- __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
- __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
- __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
- struct cifsInodeInfo *cifsInode;
struct inode *inode = direntry->d_inode;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+ struct cifs_unix_set_info_args *args = NULL;
+
+ cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
+ direntry->d_name.name, attrs->ia_valid));
+
+ xid = GetXid();
+
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
+ /* check if we have permission to change attrs */
+ rc = inode_change_ok(inode, attrs);
+ if (rc < 0)
+ goto out;
+ else
+ rc = 0;
+ }
+
+ full_path = build_path_from_dentry(direntry);
+ if (full_path == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
+ /*
+ Flush data before changing file size or changing the last
+ write time of the file on the server. If the
+ flush returns error, store it to report later and continue.
+ BB: This should be smarter. Why bother flushing pages that
+ will be truncated anyway? Also, should we error out here if
+ the flush returns error?
+ */
+ rc = filemap_write_and_wait(inode->i_mapping);
+ if (rc != 0) {
+ cifsInode->write_behind_rc = rc;
+ rc = 0;
+ }
+ }
+
+ if (attrs->ia_valid & ATTR_SIZE) {
+ rc = cifs_set_file_size(inode, attrs, xid, full_path);
+ if (rc != 0)
+ goto out;
+ }
+
+ /* skip mode change if it's just for clearing setuid/setgid */
+ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+ attrs->ia_valid &= ~ATTR_MODE;
+
+ args = kmalloc(sizeof(*args), GFP_KERNEL);
+ if (args == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* set up the struct */
+ if (attrs->ia_valid & ATTR_MODE)
+ args->mode = attrs->ia_mode;
+ else
+ args->mode = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_UID)
+ args->uid = attrs->ia_uid;
+ else
+ args->uid = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_GID)
+ args->gid = attrs->ia_gid;
+ else
+ args->gid = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_ATIME)
+ args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
+ else
+ args->atime = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_MTIME)
+ args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
+ else
+ args->mtime = NO_CHANGE_64;
+
+ if (attrs->ia_valid & ATTR_CTIME)
+ args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
+ else
+ args->ctime = NO_CHANGE_64;
+
+ args->device = 0;
+ rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (!rc)
+ rc = inode_setattr(inode, attrs);
+out:
+ kfree(args);
+ kfree(full_path);
+ FreeXid(xid);
+ return rc;
+}
+
+static int
+cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
+{
+ int xid;
+ struct inode *inode = direntry->d_inode;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ char *full_path = NULL;
+ int rc = -EACCES;
+ __u32 dosattr = 0;
+ __u64 mode = NO_CHANGE_64;
xid = GetXid();
cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
direntry->d_name.name, attrs->ia_valid));
- cifs_sb = CIFS_SB(inode->i_sb);
- pTcon = cifs_sb->tcon;
-
if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
/* check if we have permission to change attrs */
rc = inode_change_ok(inode, attrs);
@@ -1528,7 +1743,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
FreeXid(xid);
return -ENOMEM;
}
- cifsInode = CIFS_I(inode);
if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
/*
@@ -1559,21 +1773,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
* CIFSACL support + proper Windows to Unix idmapping, we may be
* able to support this in the future.
*/
- if (!pTcon->unix_ext &&
- !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
- } else {
- if (attrs->ia_valid & ATTR_UID) {
- cFYI(1, ("UID changed to %d", attrs->ia_uid));
- uid = attrs->ia_uid;
- }
- if (attrs->ia_valid & ATTR_GID) {
- cFYI(1, ("GID changed to %d", attrs->ia_gid));
- gid = attrs->ia_gid;
- }
- }
-
- time_buf.Attributes = 0;
/* skip mode change if it's just for clearing setuid/setgid */
if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -1584,13 +1785,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
mode = attrs->ia_mode;
}
- if ((pTcon->unix_ext)
- && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
- rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
- 0 /* dev_t */, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- else if (attrs->ia_valid & ATTR_MODE) {
+ if (attrs->ia_valid & ATTR_MODE) {
rc = 0;
#ifdef CONFIG_CIFS_EXPERIMENTAL
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
@@ -1599,24 +1794,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
#endif
if (((mode & S_IWUGO) == 0) &&
(cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
- set_dosattr = true;
- time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs |
- ATTR_READONLY);
+
+ dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
+
/* fix up mode if we're not using dynperm */
if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
attrs->ia_mode = inode->i_mode & ~S_IWUGO;
} else if ((mode & S_IWUGO) &&
(cifsInode->cifsAttrs & ATTR_READONLY)) {
- /* If file is readonly on server, we would
- not be able to write to it - so if any write
- bit is enabled for user or group or other we
- need to at least try to remove r/o dos attr */
- set_dosattr = true;
- time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
- (~ATTR_READONLY));
- /* Windows ignores set to zero */
- if (time_buf.Attributes == 0)
- time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
+
+ dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
+ /* Attributes of 0 are ignored */
+ if (dosattr == 0)
+ dosattr |= ATTR_NORMAL;
/* reset local inode permissions to normal */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1634,82 +1824,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
}
}
- if (attrs->ia_valid & ATTR_ATIME) {
- set_time = true;
- time_buf.LastAccessTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
- } else
- time_buf.LastAccessTime = 0;
-
- if (attrs->ia_valid & ATTR_MTIME) {
- set_time = true;
- time_buf.LastWriteTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
- } else
- time_buf.LastWriteTime = 0;
- /* Do not set ctime explicitly unless other time
- stamps are changed explicitly (i.e. by utime()
- since we would then have a mix of client and
- server times */
+ if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
+ ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
+ rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
+ /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
- if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
- set_time = true;
- /* Although Samba throws this field away
- it may be useful to Windows - but we do
- not want to set ctime unless some other
- timestamp is changing */
- cFYI(1, ("CIFS - CTIME changed"));
- time_buf.ChangeTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
- } else
- time_buf.ChangeTime = 0;
-
- if (set_time || set_dosattr) {
- time_buf.CreationTime = 0; /* do not change */
- /* In the future we should experiment - try setting timestamps
- via Handle (SetFileInfo) instead of by path */
- if (!(pTcon->ses->flags & CIFS_SES_NT4))
- rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- else
- rc = -EOPNOTSUPP;
-
- if (rc == -EOPNOTSUPP) {
- int oplock = 0;
- __u16 netfid;
-
- cFYI(1, ("calling SetFileInfo since SetPathInfo for "
- "times not supported by this server"));
- /* BB we could scan to see if we already have it open
- and pass in pid of opener to function */
- rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
- SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
- CREATE_NOT_DIR, &netfid, &oplock,
- NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
- netfid);
- CIFSSMBClose(xid, pTcon, netfid);
- } else {
- /* BB For even older servers we could convert time_buf
- into old DOS style which uses two second
- granularity */
-
- /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
- &time_buf, cifs_sb->local_nls); */
- }
- }
/* Even if error on time set, no sense failing the call if
the server would set the time to a reasonable value anyway,
and this check ensures that we are not being called from
sys_utimes in which case we ought to fail the call back to
the user when the server rejects the call */
if ((rc) && (attrs->ia_valid &
- (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
+ (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
rc = 0;
}
@@ -1723,6 +1849,21 @@ cifs_setattr_exit:
return rc;
}
+int
+cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+ struct inode *inode = direntry->d_inode;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+
+ if (pTcon->unix_ext)
+ return cifs_setattr_unix(direntry, attrs);
+
+ return cifs_setattr_nounix(direntry, attrs);
+
+ /* BB: add cifs_setattr_legacy for really old servers */
+}
+
#if 0
void cifs_delete_inode(struct inode *inode)
{
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index ed150efbe27c..252fdc0567f1 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
#ifdef CONFIG_CIFS_WEAK_PW_HASH
char lnm_session_key[CIFS_SESS_KEY_SIZE];
+ pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
+
/* no capabilities flags in old lanman negotiation */
pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
@@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
} else
ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
- } else if (type == Kerberos) {
+ } else if (type == Kerberos || type == MSKerberos) {
#ifdef CONFIG_CIFS_UPCALL
struct cifs_spnego_msg *msg;
spnego_key = cifs_get_spnego_key(ses);
@@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
}
msg = spnego_key->payload.data;
+ /* check version field to make sure that cifs.upcall is
+ sending us a response in an expected form */
+ if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+ cERROR(1, ("incorrect version of cifs.upcall (expected"
+ " %d but got %d)",
+ CIFS_SPNEGO_UPCALL_VERSION, msg->version));
+ rc = -EKEYREJECTED;
+ goto ssetup_exit;
+ }
/* bail out if key is too long */
if (msg->sesskey_len >
sizeof(ses->server->mac_signing_key.data.krb5)) {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98a..e286db9f5ee2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
cFYI(1, ("Sending smb: total_len %d", total_len));
dump_smb(smb_buffer, len);
+ i = 0;
while (total_len) {
rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
n_vec - first_vec, total_len);
diff --git a/fs/compat.c b/fs/compat.c
index c9d1472e65c5..075d0509970d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen,
if (buf->result)
return -EINVAL;
d_ino = ino;
- if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+ if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+ buf->result = -EOVERFLOW;
return -EOVERFLOW;
+ }
buf->result++;
dirent = buf->dirent;
if (!access_ok(VERIFY_WRITE, dirent,
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
if (reclen > buf->count)
return -EINVAL;
d_ino = ino;
- if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+ if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+ buf->error = -EOVERFLOW;
return -EOVERFLOW;
+ }
dirent = buf->previous;
if (dirent) {
if (__put_user(offset, &dirent->d_off))
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3ea..762d287123ca 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
#define CONFIGFS_USET_DEFAULT 0x0080
#define CONFIGFS_USET_DROPPING 0x0100
#define CONFIGFS_USET_IN_MKDIR 0x0200
+#define CONFIGFS_USET_CREATING 0x0400
#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
+extern struct mutex configfs_symlink_mutex;
extern spinlock_t configfs_dirent_lock;
extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
extern int configfs_make_dirent(struct configfs_dirent *,
struct dentry *, void *, umode_t, int);
+extern int configfs_dirent_is_ready(struct configfs_dirent *);
extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..8e93341f3e82 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
if (!error)
error = configfs_make_dirent(p->d_fsdata, d, k, mode,
- CONFIGFS_DIR);
+ CONFIGFS_DIR | CONFIGFS_USET_CREATING);
if (!error) {
error = configfs_create(d, mode, init_dir);
if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
* configfs_create_dir - create a directory for an config_item.
* @item: config_itemwe're creating directory for.
* @dentry: config_item's dentry.
+ *
+ * Note: user-created entries won't be allowed under this new directory
+ * until it is validated by configfs_dir_set_ready()
*/
static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
return error;
}
+/*
+ * Allow userspace to create new entries under a new directory created with
+ * configfs_create_dir(), and under all of its chidlren directories recursively.
+ * @sd configfs_dirent of the new directory to validate
+ *
+ * Caller must hold configfs_dirent_lock.
+ */
+static void configfs_dir_set_ready(struct configfs_dirent *sd)
+{
+ struct configfs_dirent *child_sd;
+
+ sd->s_type &= ~CONFIGFS_USET_CREATING;
+ list_for_each_entry(child_sd, &sd->s_children, s_sibling)
+ if (child_sd->s_type & CONFIGFS_USET_CREATING)
+ configfs_dir_set_ready(child_sd);
+}
+
+/*
+ * Check that a directory does not belong to a directory hierarchy being
+ * attached and not validated yet.
+ * @sd configfs_dirent of the directory to check
+ *
+ * @return non-zero iff the directory was validated
+ *
+ * Note: takes configfs_dirent_lock, so the result may change from false to true
+ * in two consecutive calls, but never from true to false.
+ */
+int configfs_dirent_is_ready(struct configfs_dirent *sd)
+{
+ int ret;
+
+ spin_lock(&configfs_dirent_lock);
+ ret = !(sd->s_type & CONFIGFS_USET_CREATING);
+ spin_unlock(&configfs_dirent_lock);
+
+ return ret;
+}
+
int configfs_create_link(struct configfs_symlink *sl,
struct dentry *parent,
struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
* The only thing special about this is that we remove any files in
* the directory before we remove the directory, and we've inlined
* what used to be configfs_rmdir() below, instead of calling separately.
+ *
+ * Caller holds the mutex of the item's inode
*/
static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
struct configfs_dirent * sd;
int found = 0;
- int err = 0;
+ int err;
+
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ *
+ * This forbids userspace to read/write attributes of items which may
+ * not complete their initialization, since the dentries of the
+ * attributes won't be instantiated.
+ */
+ err = -ENOENT;
+ if (!configfs_dirent_is_ready(parent_sd))
+ goto out;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
return simple_lookup(dir, dentry, nd);
}
+out:
return ERR_PTR(err);
}
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
struct configfs_dirent *sd;
int ret;
+ /* Mark that we're trying to drop the group */
+ parent_sd->s_type |= CONFIGFS_USET_DROPPING;
+
ret = -EBUSY;
if (!list_empty(&parent_sd->s_links))
goto out;
ret = 0;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
- if (sd->s_type & CONFIGFS_NOT_PINNED)
+ if (!sd->s_element ||
+ (sd->s_type & CONFIGFS_NOT_PINNED))
continue;
if (sd->s_type & CONFIGFS_USET_DEFAULT) {
/* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
*wait_mutex = &sd->s_dentry->d_inode->i_mutex;
return -EAGAIN;
}
- /* Mark that we're trying to drop the group */
- sd->s_type |= CONFIGFS_USET_DROPPING;
/*
* Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
struct configfs_dirent *parent_sd = dentry->d_fsdata;
struct configfs_dirent *sd;
- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
- if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+ parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
+
+ list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
+ if (sd->s_type & CONFIGFS_USET_DEFAULT)
configfs_detach_rollback(sd->s_dentry);
- sd->s_type &= ~CONFIGFS_USET_DROPPING;
- }
- }
}
static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
static int populate_groups(struct config_group *group)
{
struct config_group *new_group;
- struct dentry *dentry = group->cg_item.ci_dentry;
int ret = 0;
int i;
if (group->default_groups) {
- /*
- * FYI, we're faking mkdir here
- * I'm not sure we need this semaphore, as we're called
- * from our parent's mkdir. That holds our parent's
- * i_mutex, so afaik lookup cannot continue through our
- * parent to find us, let alone mess with our tree.
- * That said, taking our i_mutex is closer to mkdir
- * emulation, and shouldn't hurt.
- */
- mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
-
for (i = 0; group->default_groups[i]; i++) {
new_group = group->default_groups[i];
ret = create_default_group(group, new_group);
- if (ret)
+ if (ret) {
+ detach_groups(group);
break;
+ }
}
-
- mutex_unlock(&dentry->d_inode->i_mutex);
}
- if (ret)
- detach_groups(group);
-
return ret;
}
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
if (!ret) {
ret = populate_attrs(item);
if (ret) {
+ /*
+ * We are going to remove an inode and its dentry but
+ * the VFS may already have hit and used them. Thus,
+ * we must lock them as rmdir() would.
+ */
+ mutex_lock(&dentry->d_inode->i_mutex);
configfs_remove_dir(item);
+ dentry->d_inode->i_flags |= S_DEAD;
+ mutex_unlock(&dentry->d_inode->i_mutex);
d_delete(dentry);
}
}
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
return ret;
}
+/* Caller holds the mutex of the item's inode */
static void configfs_detach_item(struct config_item *item)
{
detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
sd = dentry->d_fsdata;
sd->s_type |= CONFIGFS_USET_DIR;
+ /*
+ * FYI, we're faking mkdir in populate_groups()
+ * We must lock the group's inode to avoid races with the VFS
+ * which can already hit the inode and try to add/remove entries
+ * under it.
+ *
+ * We must also lock the inode to remove it safely in case of
+ * error, as rmdir() would.
+ */
+ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
ret = populate_groups(to_config_group(item));
if (ret) {
configfs_detach_item(item);
- d_delete(dentry);
+ dentry->d_inode->i_flags |= S_DEAD;
}
+ mutex_unlock(&dentry->d_inode->i_mutex);
+ if (ret)
+ d_delete(dentry);
}
return ret;
}
+/* Caller holds the mutex of the group's inode */
static void configfs_detach_group(struct config_item *item)
{
detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct configfs_subsystem *subsys;
struct configfs_dirent *sd;
struct config_item_type *type;
- struct module *owner = NULL;
+ struct module *subsys_owner = NULL, *new_item_owner = NULL;
char *name;
if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
sd = dentry->d_parent->d_fsdata;
+
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ */
+ if (!configfs_dirent_is_ready(sd)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
if (!(sd->s_type & CONFIGFS_USET_DIR)) {
ret = -EPERM;
goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
goto out_put;
}
+ /*
+ * The subsystem may belong to a different module than the item
+ * being created. We don't want to safely pin the new item but
+ * fail to pin the subsystem it sits under.
+ */
+ if (!subsys->su_group.cg_item.ci_type) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+ subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+ if (!try_module_get(subsys_owner)) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
if (!name) {
ret = -ENOMEM;
- goto out_put;
+ goto out_subsys_put;
}
snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
kfree(name);
if (ret) {
/*
- * If item == NULL, then link_obj() was never called.
+ * If ret != 0, then link_obj() was never called.
* There are no extra references to clean up.
*/
- goto out_put;
+ goto out_subsys_put;
}
/*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
goto out_unlink;
}
- owner = type->ct_owner;
- if (!try_module_get(owner)) {
+ new_item_owner = type->ct_owner;
+ if (!try_module_get(new_item_owner)) {
ret = -EINVAL;
goto out_unlink;
}
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
spin_lock(&configfs_dirent_lock);
sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
+ if (!ret)
+ configfs_dir_set_ready(dentry->d_fsdata);
spin_unlock(&configfs_dirent_lock);
out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
mutex_unlock(&subsys->su_mutex);
if (module_got)
- module_put(owner);
+ module_put(new_item_owner);
}
+out_subsys_put:
+ if (ret)
+ module_put(subsys_owner);
+
out_put:
/*
* link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
struct config_item *item;
struct configfs_subsystem *subsys;
struct configfs_dirent *sd;
- struct module *owner = NULL;
+ struct module *subsys_owner = NULL, *dead_item_owner = NULL;
int ret;
if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,14 +1303,26 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
return -EINVAL;
}
- spin_lock(&configfs_dirent_lock);
+ /* configfs_mkdir() shouldn't have allowed this */
+ BUG_ON(!subsys->su_group.cg_item.ci_type);
+ subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+
+ /*
+ * Ensure that no racing symlink() will make detach_prep() fail while
+ * the new link is temporarily attached
+ */
do {
struct mutex *wait_mutex;
+ mutex_lock(&configfs_symlink_mutex);
+ spin_lock(&configfs_dirent_lock);
ret = configfs_detach_prep(dentry, &wait_mutex);
- if (ret) {
+ if (ret)
configfs_detach_rollback(dentry);
- spin_unlock(&configfs_dirent_lock);
+ spin_unlock(&configfs_dirent_lock);
+ mutex_unlock(&configfs_symlink_mutex);
+
+ if (ret) {
if (ret != -EAGAIN) {
config_item_put(parent_item);
return ret;
@@ -1223,11 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
/* Wait until the racing operation terminates */
mutex_lock(wait_mutex);
mutex_unlock(wait_mutex);
-
- spin_lock(&configfs_dirent_lock);
}
} while (ret == -EAGAIN);
- spin_unlock(&configfs_dirent_lock);
/* Get a working ref for the duration of this function */
item = configfs_get_config_item(dentry);
@@ -1236,7 +1341,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
config_item_put(parent_item);
if (item->ci_type)
- owner = item->ci_type->ct_owner;
+ dead_item_owner = item->ci_type->ct_owner;
if (sd->s_type & CONFIGFS_USET_DIR) {
configfs_detach_group(item);
@@ -1258,7 +1363,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
/* Drop our reference from above */
config_item_put(item);
- module_put(owner);
+ module_put(dead_item_owner);
+ module_put(subsys_owner);
return 0;
}
@@ -1314,13 +1420,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
{
struct dentry * dentry = file->f_path.dentry;
struct configfs_dirent * parent_sd = dentry->d_fsdata;
+ int err;
mutex_lock(&dentry->d_inode->i_mutex);
- file->private_data = configfs_new_dirent(parent_sd, NULL);
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ */
+ err = -ENOENT;
+ if (configfs_dirent_is_ready(parent_sd)) {
+ file->private_data = configfs_new_dirent(parent_sd, NULL);
+ if (IS_ERR(file->private_data))
+ err = PTR_ERR(file->private_data);
+ else
+ err = 0;
+ }
mutex_unlock(&dentry->d_inode->i_mutex);
- return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0;
-
+ return err;
}
static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1608,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
if (err) {
d_delete(dentry);
dput(dentry);
+ } else {
+ spin_lock(&configfs_dirent_lock);
+ configfs_dir_set_ready(dentry->d_fsdata);
+ spin_unlock(&configfs_dirent_lock);
}
}
@@ -1517,11 +1638,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
I_MUTEX_PARENT);
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+ mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
if (configfs_detach_prep(dentry, NULL)) {
printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
}
spin_unlock(&configfs_dirent_lock);
+ mutex_unlock(&configfs_symlink_mutex);
configfs_detach_group(&group->cg_item);
dentry->d_inode->i_flags |= S_DEAD;
mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40ac..bf74973b0492 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
#include <linux/configfs.h>
#include "configfs_internal.h"
+/* Protects attachments of new symlinks */
+DEFINE_MUTEX(configfs_symlink_mutex);
+
static int item_depth(struct config_item * item)
{
struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
struct configfs_symlink *sl;
int ret;
+ ret = -ENOENT;
+ if (!configfs_dirent_is_ready(target_sd))
+ goto out;
ret = -ENOMEM;
sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
if (sl) {
sl->sl_target = config_item_get(item);
spin_lock(&configfs_dirent_lock);
+ if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
+ spin_unlock(&configfs_dirent_lock);
+ config_item_put(item);
+ kfree(sl);
+ return -ENOENT;
+ }
list_add(&sl->sl_list, &target_sd->s_links);
spin_unlock(&configfs_dirent_lock);
ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
}
}
+out:
return ret;
}
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
{
int ret;
struct nameidata nd;
+ struct configfs_dirent *sd;
struct config_item *parent_item;
struct config_item *target_item;
struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
if (dentry->d_parent == configfs_sb->s_root)
goto out;
+ sd = dentry->d_parent->d_fsdata;
+ /*
+ * Fake invisibility if dir belongs to a group/default groups hierarchy
+ * being attached
+ */
+ ret = -ENOENT;
+ if (!configfs_dirent_is_ready(sd))
+ goto out;
+
parent_item = configfs_get_config_item(dentry->d_parent);
type = parent_item->ci_type;
+ ret = -EPERM;
if (!type || !type->ct_item_ops ||
!type->ct_item_ops->allow_link)
goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
ret = type->ct_item_ops->allow_link(parent_item, target_item);
if (!ret) {
+ mutex_lock(&configfs_symlink_mutex);
ret = create_link(parent_item, target_item, dentry);
+ mutex_unlock(&configfs_symlink_mutex);
if (ret && type->ct_item_ops->drop_link)
type->ct_item_ops->drop_link(parent_item,
target_item);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 0c3b618c15b3..f40423eb1a14 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex);
static int cramfs_iget5_test(struct inode *inode, void *opaque)
{
struct cramfs_inode *cramfs_inode = opaque;
-
- if (inode->i_ino != CRAMINO(cramfs_inode))
- return 0; /* does not match */
-
- if (inode->i_ino != 1)
- return 1;
-
- /* all empty directories, char, block, pipe, and sock, share inode #1 */
-
- if ((inode->i_mode != cramfs_inode->mode) ||
- (inode->i_gid != cramfs_inode->gid) ||
- (inode->i_uid != cramfs_inode->uid))
- return 0; /* does not match */
-
- if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
- (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
- return 0; /* does not match */
-
- return 1; /* matches */
+ return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
}
static int cramfs_iget5_set(struct inode *inode, void *opaque)
{
- static struct timespec zerotime;
struct cramfs_inode *cramfs_inode = opaque;
- inode->i_mode = cramfs_inode->mode;
- inode->i_uid = cramfs_inode->uid;
- inode->i_size = cramfs_inode->size;
- inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
- inode->i_gid = cramfs_inode->gid;
- /* Struct copy intentional */
- inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
inode->i_ino = CRAMINO(cramfs_inode);
- /* inode->i_nlink is left 1 - arguably wrong for directories,
- but it's the best we can do without reading the directory
- contents. 1 yields the right result in GNU find, even
- without -noleaf option. */
- if (S_ISREG(inode->i_mode)) {
- inode->i_fop = &generic_ro_fops;
- inode->i_data.a_ops = &cramfs_aops;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &cramfs_dir_inode_operations;
- inode->i_fop = &cramfs_directory_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &page_symlink_inode_operations;
- inode->i_data.a_ops = &cramfs_aops;
- } else {
- inode->i_size = 0;
- inode->i_blocks = 0;
- init_special_inode(inode, inode->i_mode,
- old_decode_dev(cramfs_inode->size));
- }
return 0;
}
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
cramfs_iget5_test, cramfs_iget5_set,
cramfs_inode);
+ static struct timespec zerotime;
+
if (inode && (inode->i_state & I_NEW)) {
+ inode->i_mode = cramfs_inode->mode;
+ inode->i_uid = cramfs_inode->uid;
+ inode->i_size = cramfs_inode->size;
+ inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+ inode->i_gid = cramfs_inode->gid;
+ /* Struct copy intentional */
+ inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+ /* inode->i_nlink is left 1 - arguably wrong for directories,
+ but it's the best we can do without reading the directory
+ contents. 1 yields the right result in GNU find, even
+ without -noleaf option. */
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_fop = &generic_ro_fops;
+ inode->i_data.a_ops = &cramfs_aops;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &cramfs_dir_inode_operations;
+ inode->i_fop = &cramfs_directory_operations;
+ } else if (S_ISLNK(inode->i_mode)) {
+ inode->i_op = &page_symlink_inode_operations;
+ inode->i_data.a_ops = &cramfs_aops;
+ } else {
+ inode->i_size = 0;
+ inode->i_blocks = 0;
+ init_special_inode(inode, inode->i_mode,
+ old_decode_dev(cramfs_inode->size));
+ }
unlock_new_inode(inode);
}
return inode;
}
+static void cramfs_drop_inode(struct inode *inode)
+{
+ if (inode->i_ino == 1)
+ generic_delete_inode(inode);
+ else
+ generic_drop_inode(inode);
+}
+
/*
* We have our own block cache: don't fill up the buffer cache
* with the rom-image, because the way the filesystem is set
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = {
.put_super = cramfs_put_super,
.remount_fs = cramfs_remount,
.statfs = cramfs_statfs,
+ .drop_inode = cramfs_drop_inode,
};
static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb45..80e93956aced 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
return new;
}
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode: the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name: the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
+ struct qstr *name)
+{
+ int error;
+ struct dentry *found;
+ struct dentry *new;
+
+ /* Does a dentry matching the name exist already? */
+ found = d_hash_and_lookup(dentry->d_parent, name);
+ /* If not, create it now and return */
+ if (!found) {
+ new = d_alloc(dentry->d_parent, name);
+ if (!new) {
+ error = -ENOMEM;
+ goto err_out;
+ }
+ found = d_splice_alias(inode, new);
+ if (found) {
+ dput(new);
+ return found;
+ }
+ return new;
+ }
+ /* Matching dentry exists, check if it is negative. */
+ if (found->d_inode) {
+ if (unlikely(found->d_inode != inode)) {
+ /* This can't happen because bad inodes are unhashed. */
+ BUG_ON(!is_bad_inode(inode));
+ BUG_ON(!is_bad_inode(found->d_inode));
+ }
+ /*
+ * Already have the inode and the dentry attached, decrement
+ * the reference count to balance the iget() done
+ * earlier on. We found the dentry using d_lookup() so it
+ * cannot be disconnected and thus we do not need to worry
+ * about any NFS/disconnectedness issues here.
+ */
+ iput(inode);
+ return found;
+ }
+ /*
+ * Negative dentry: instantiate it unless the inode is a directory and
+ * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
+ * in which case d_move() that in place of the found dentry.
+ */
+ if (!S_ISDIR(inode->i_mode)) {
+ /* Not a directory; everything is easy. */
+ d_instantiate(found, inode);
+ return found;
+ }
+ spin_lock(&dcache_lock);
+ if (list_empty(&inode->i_dentry)) {
+ /*
+ * Directory without a 'disconnected' dentry; we need to do
+ * d_instantiate() by hand because it takes dcache_lock which
+ * we already hold.
+ */
+ list_add(&found->d_alias, &inode->i_dentry);
+ found->d_inode = inode;
+ spin_unlock(&dcache_lock);
+ security_d_instantiate(found, inode);
+ return found;
+ }
+ /*
+ * Directory with a 'disconnected' dentry; get a reference to the
+ * 'disconnected' dentry.
+ */
+ new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ dget_locked(new);
+ spin_unlock(&dcache_lock);
+ /* Do security vodoo. */
+ security_d_instantiate(found, inode);
+ /* Move new in place of found. */
+ d_move(new, found);
+ /* Balance the iget() we did above. */
+ iput(inode);
+ /* Throw away found. */
+ dput(found);
+ /* Use new as the actual dentry. */
+ return new;
+
+err_out:
+ iput(inode);
+ return ERR_PTR(error);
+}
/**
* d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
EXPORT_SYMBOL(d_prune_aliases);
EXPORT_SYMBOL(d_rehash);
EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
EXPORT_SYMBOL(d_validate);
EXPORT_SYMBOL(dget_locked);
EXPORT_SYMBOL(dput);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06e..488eb424f662 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
#define DEVPTS_DEFAULT_MODE 0600
extern int pty_limit; /* Config limit on Unix98 ptys */
-static DEFINE_IDR(allocated_ptys);
+static DEFINE_IDA(allocated_ptys);
static DEFINE_MUTEX(allocated_ptys_lock);
static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
int devpts_new_index(void)
{
int index;
- int idr_ret;
+ int ida_ret;
retry:
- if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+ if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
return -ENOMEM;
}
mutex_lock(&allocated_ptys_lock);
- idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
- if (idr_ret < 0) {
+ ida_ret = ida_get_new(&allocated_ptys, &index);
+ if (ida_ret < 0) {
mutex_unlock(&allocated_ptys_lock);
- if (idr_ret == -EAGAIN)
+ if (ida_ret == -EAGAIN)
goto retry;
return -EIO;
}
if (index >= pty_limit) {
- idr_remove(&allocated_ptys, index);
+ ida_remove(&allocated_ptys, index);
mutex_unlock(&allocated_ptys_lock);
return -EIO;
}
@@ -208,7 +208,7 @@ retry:
void devpts_kill_index(int idx)
{
mutex_lock(&allocated_ptys_lock);
- idr_remove(&allocated_ptys, idx);
+ ida_remove(&allocated_ptys, idx);
mutex_unlock(&allocated_ptys_lock);
}
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index c4e7d721bd8d..89d2fb7b991a 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -30,16 +30,16 @@
static struct config_group *space_list;
static struct config_group *comm_list;
-static struct comm *local_comm;
+static struct dlm_comm *local_comm;
-struct clusters;
-struct cluster;
-struct spaces;
-struct space;
-struct comms;
-struct comm;
-struct nodes;
-struct node;
+struct dlm_clusters;
+struct dlm_cluster;
+struct dlm_spaces;
+struct dlm_space;
+struct dlm_comms;
+struct dlm_comm;
+struct dlm_nodes;
+struct dlm_node;
static struct config_group *make_cluster(struct config_group *, const char *);
static void drop_cluster(struct config_group *, struct config_item *);
@@ -68,17 +68,22 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len);
-static ssize_t comm_nodeid_read(struct comm *cm, char *buf);
-static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len);
-static ssize_t comm_local_read(struct comm *cm, char *buf);
-static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len);
-static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len);
-static ssize_t node_nodeid_read(struct node *nd, char *buf);
-static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
-static ssize_t node_weight_read(struct node *nd, char *buf);
-static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
-
-struct cluster {
+static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf);
+static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
+ size_t len);
+static ssize_t comm_local_read(struct dlm_comm *cm, char *buf);
+static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
+ size_t len);
+static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
+ size_t len);
+static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
+static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
+ size_t len);
+static ssize_t node_weight_read(struct dlm_node *nd, char *buf);
+static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
+ size_t len);
+
+struct dlm_cluster {
struct config_group group;
unsigned int cl_tcp_port;
unsigned int cl_buffer_size;
@@ -109,11 +114,11 @@ enum {
struct cluster_attribute {
struct configfs_attribute attr;
- ssize_t (*show)(struct cluster *, char *);
- ssize_t (*store)(struct cluster *, const char *, size_t);
+ ssize_t (*show)(struct dlm_cluster *, char *);
+ ssize_t (*store)(struct dlm_cluster *, const char *, size_t);
};
-static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
+static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
int *info_field, int check_zero,
const char *buf, size_t len)
{
@@ -134,12 +139,12 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
}
#define CLUSTER_ATTR(name, check_zero) \
-static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
+static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \
{ \
return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
check_zero, buf, len); \
} \
-static ssize_t name##_read(struct cluster *cl, char *buf) \
+static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
} \
@@ -181,8 +186,8 @@ enum {
struct comm_attribute {
struct configfs_attribute attr;
- ssize_t (*show)(struct comm *, char *);
- ssize_t (*store)(struct comm *, const char *, size_t);
+ ssize_t (*show)(struct dlm_comm *, char *);
+ ssize_t (*store)(struct dlm_comm *, const char *, size_t);
};
static struct comm_attribute comm_attr_nodeid = {
@@ -222,8 +227,8 @@ enum {
struct node_attribute {
struct configfs_attribute attr;
- ssize_t (*show)(struct node *, char *);
- ssize_t (*store)(struct node *, const char *, size_t);
+ ssize_t (*show)(struct dlm_node *, char *);
+ ssize_t (*store)(struct dlm_node *, const char *, size_t);
};
static struct node_attribute node_attr_nodeid = {
@@ -248,26 +253,26 @@ static struct configfs_attribute *node_attrs[] = {
NULL,
};
-struct clusters {
+struct dlm_clusters {
struct configfs_subsystem subsys;
};
-struct spaces {
+struct dlm_spaces {
struct config_group ss_group;
};
-struct space {
+struct dlm_space {
struct config_group group;
struct list_head members;
struct mutex members_lock;
int members_count;
};
-struct comms {
+struct dlm_comms {
struct config_group cs_group;
};
-struct comm {
+struct dlm_comm {
struct config_item item;
int nodeid;
int local;
@@ -275,11 +280,11 @@ struct comm {
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
};
-struct nodes {
+struct dlm_nodes {
struct config_group ns_group;
};
-struct node {
+struct dlm_node {
struct config_item item;
struct list_head list; /* space->members */
int nodeid;
@@ -372,38 +377,40 @@ static struct config_item_type node_type = {
.ct_owner = THIS_MODULE,
};
-static struct cluster *to_cluster(struct config_item *i)
+static struct dlm_cluster *to_cluster(struct config_item *i)
{
- return i ? container_of(to_config_group(i), struct cluster, group):NULL;
+ return i ? container_of(to_config_group(i), struct dlm_cluster, group) :
+ NULL;
}
-static struct space *to_space(struct config_item *i)
+static struct dlm_space *to_space(struct config_item *i)
{
- return i ? container_of(to_config_group(i), struct space, group) : NULL;
+ return i ? container_of(to_config_group(i), struct dlm_space, group) :
+ NULL;
}
-static struct comm *to_comm(struct config_item *i)
+static struct dlm_comm *to_comm(struct config_item *i)
{
- return i ? container_of(i, struct comm, item) : NULL;
+ return i ? container_of(i, struct dlm_comm, item) : NULL;
}
-static struct node *to_node(struct config_item *i)
+static struct dlm_node *to_node(struct config_item *i)
{
- return i ? container_of(i, struct node, item) : NULL;
+ return i ? container_of(i, struct dlm_node, item) : NULL;
}
static struct config_group *make_cluster(struct config_group *g,
const char *name)
{
- struct cluster *cl = NULL;
- struct spaces *sps = NULL;
- struct comms *cms = NULL;
+ struct dlm_cluster *cl = NULL;
+ struct dlm_spaces *sps = NULL;
+ struct dlm_comms *cms = NULL;
void *gps = NULL;
- cl = kzalloc(sizeof(struct cluster), GFP_KERNEL);
+ cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL);
gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
- sps = kzalloc(sizeof(struct spaces), GFP_KERNEL);
- cms = kzalloc(sizeof(struct comms), GFP_KERNEL);
+ sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL);
+ cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL);
if (!cl || !gps || !sps || !cms)
goto fail;
@@ -443,7 +450,7 @@ static struct config_group *make_cluster(struct config_group *g,
static void drop_cluster(struct config_group *g, struct config_item *i)
{
- struct cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = to_cluster(i);
struct config_item *tmp;
int j;
@@ -461,20 +468,20 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
static void release_cluster(struct config_item *i)
{
- struct cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = to_cluster(i);
kfree(cl->group.default_groups);
kfree(cl);
}
static struct config_group *make_space(struct config_group *g, const char *name)
{
- struct space *sp = NULL;
- struct nodes *nds = NULL;
+ struct dlm_space *sp = NULL;
+ struct dlm_nodes *nds = NULL;
void *gps = NULL;
- sp = kzalloc(sizeof(struct space), GFP_KERNEL);
+ sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL);
gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL);
- nds = kzalloc(sizeof(struct nodes), GFP_KERNEL);
+ nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL);
if (!sp || !gps || !nds)
goto fail;
@@ -500,7 +507,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
static void drop_space(struct config_group *g, struct config_item *i)
{
- struct space *sp = to_space(i);
+ struct dlm_space *sp = to_space(i);
struct config_item *tmp;
int j;
@@ -517,16 +524,16 @@ static void drop_space(struct config_group *g, struct config_item *i)
static void release_space(struct config_item *i)
{
- struct space *sp = to_space(i);
+ struct dlm_space *sp = to_space(i);
kfree(sp->group.default_groups);
kfree(sp);
}
static struct config_item *make_comm(struct config_group *g, const char *name)
{
- struct comm *cm;
+ struct dlm_comm *cm;
- cm = kzalloc(sizeof(struct comm), GFP_KERNEL);
+ cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL);
if (!cm)
return ERR_PTR(-ENOMEM);
@@ -539,7 +546,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
static void drop_comm(struct config_group *g, struct config_item *i)
{
- struct comm *cm = to_comm(i);
+ struct dlm_comm *cm = to_comm(i);
if (local_comm == cm)
local_comm = NULL;
dlm_lowcomms_close(cm->nodeid);
@@ -550,16 +557,16 @@ static void drop_comm(struct config_group *g, struct config_item *i)
static void release_comm(struct config_item *i)
{
- struct comm *cm = to_comm(i);
+ struct dlm_comm *cm = to_comm(i);
kfree(cm);
}
static struct config_item *make_node(struct config_group *g, const char *name)
{
- struct space *sp = to_space(g->cg_item.ci_parent);
- struct node *nd;
+ struct dlm_space *sp = to_space(g->cg_item.ci_parent);
+ struct dlm_node *nd;
- nd = kzalloc(sizeof(struct node), GFP_KERNEL);
+ nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL);
if (!nd)
return ERR_PTR(-ENOMEM);
@@ -578,8 +585,8 @@ static struct config_item *make_node(struct config_group *g, const char *name)
static void drop_node(struct config_group *g, struct config_item *i)
{
- struct space *sp = to_space(g->cg_item.ci_parent);
- struct node *nd = to_node(i);
+ struct dlm_space *sp = to_space(g->cg_item.ci_parent);
+ struct dlm_node *nd = to_node(i);
mutex_lock(&sp->members_lock);
list_del(&nd->list);
@@ -591,11 +598,11 @@ static void drop_node(struct config_group *g, struct config_item *i)
static void release_node(struct config_item *i)
{
- struct node *nd = to_node(i);
+ struct dlm_node *nd = to_node(i);
kfree(nd);
}
-static struct clusters clusters_root = {
+static struct dlm_clusters clusters_root = {
.subsys = {
.su_group = {
.cg_item = {
@@ -625,7 +632,7 @@ void dlm_config_exit(void)
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->show ? cla->show(cl, buf) : 0;
@@ -635,7 +642,7 @@ static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->store ? cla->store(cl, buf, len) : -EINVAL;
@@ -644,7 +651,7 @@ static ssize_t store_cluster(struct config_item *i,
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct comm *cm = to_comm(i);
+ struct dlm_comm *cm = to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->show ? cma->show(cm, buf) : 0;
@@ -653,29 +660,31 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct comm *cm = to_comm(i);
+ struct dlm_comm *cm = to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->store ? cma->store(cm, buf, len) : -EINVAL;
}
-static ssize_t comm_nodeid_read(struct comm *cm, char *buf)
+static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf)
{
return sprintf(buf, "%d\n", cm->nodeid);
}
-static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len)
+static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf,
+ size_t len)
{
cm->nodeid = simple_strtol(buf, NULL, 0);
return len;
}
-static ssize_t comm_local_read(struct comm *cm, char *buf)
+static ssize_t comm_local_read(struct dlm_comm *cm, char *buf)
{
return sprintf(buf, "%d\n", cm->local);
}
-static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len)
+static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
+ size_t len)
{
cm->local= simple_strtol(buf, NULL, 0);
if (cm->local && !local_comm)
@@ -683,7 +692,7 @@ static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len)
return len;
}
-static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len)
+static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
{
struct sockaddr_storage *addr;
@@ -705,7 +714,7 @@ static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len)
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct node *nd = to_node(i);
+ struct dlm_node *nd = to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->show ? nda->show(nd, buf) : 0;
@@ -714,29 +723,31 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct node *nd = to_node(i);
+ struct dlm_node *nd = to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->store ? nda->store(nd, buf, len) : -EINVAL;
}
-static ssize_t node_nodeid_read(struct node *nd, char *buf)
+static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf)
{
return sprintf(buf, "%d\n", nd->nodeid);
}
-static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len)
+static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
+ size_t len)
{
nd->nodeid = simple_strtol(buf, NULL, 0);
return len;
}
-static ssize_t node_weight_read(struct node *nd, char *buf)
+static ssize_t node_weight_read(struct dlm_node *nd, char *buf)
{
return sprintf(buf, "%d\n", nd->weight);
}
-static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
+static ssize_t node_weight_write(struct dlm_node *nd, const char *buf,
+ size_t len)
{
nd->weight = simple_strtol(buf, NULL, 0);
return len;
@@ -746,7 +757,7 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
* Functions for the dlm to get the info that's been configured
*/
-static struct space *get_space(char *name)
+static struct dlm_space *get_space(char *name)
{
struct config_item *i;
@@ -760,15 +771,15 @@ static struct space *get_space(char *name)
return to_space(i);
}
-static void put_space(struct space *sp)
+static void put_space(struct dlm_space *sp)
{
config_item_put(&sp->group.cg_item);
}
-static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
+static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
{
struct config_item *i;
- struct comm *cm = NULL;
+ struct dlm_comm *cm = NULL;
int found = 0;
if (!comm_list)
@@ -801,7 +812,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
return cm;
}
-static void put_comm(struct comm *cm)
+static void put_comm(struct dlm_comm *cm)
{
config_item_put(&cm->item);
}
@@ -810,8 +821,8 @@ static void put_comm(struct comm *cm)
int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
int **new_out, int *new_count_out)
{
- struct space *sp;
- struct node *nd;
+ struct dlm_space *sp;
+ struct dlm_node *nd;
int i = 0, rv = 0, ids_count = 0, new_count = 0;
int *ids, *new;
@@ -874,8 +885,8 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
int dlm_node_weight(char *lsname, int nodeid)
{
- struct space *sp;
- struct node *nd;
+ struct dlm_space *sp;
+ struct dlm_node *nd;
int w = -EEXIST;
sp = get_space(lsname);
@@ -897,7 +908,7 @@ int dlm_node_weight(char *lsname, int nodeid)
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
{
- struct comm *cm = get_comm(nodeid, NULL);
+ struct dlm_comm *cm = get_comm(nodeid, NULL);
if (!cm)
return -EEXIST;
if (!cm->addr_count)
@@ -909,7 +920,7 @@ int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
{
- struct comm *cm = get_comm(0, addr);
+ struct dlm_comm *cm = get_comm(0, addr);
if (!cm)
return -EEXIST;
*nodeid = cm->nodeid;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 929e48ae7591..34f14a14fb4e 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -527,8 +527,10 @@ static ssize_t device_write(struct file *file, const char __user *buf,
k32buf = (struct dlm_write_request32 *)kbuf;
kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) -
sizeof(struct dlm_write_request32)), GFP_KERNEL);
- if (!kbuf)
+ if (!kbuf) {
+ kfree(k32buf);
return -ENOMEM;
+ }
if (proc)
set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
@@ -539,8 +541,10 @@ static ssize_t device_write(struct file *file, const char __user *buf,
/* do we really need this? can a write happen after a close? */
if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
- (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)))
- return -EINVAL;
+ (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) {
+ error = -EINVAL;
+ goto out_free;
+ }
sigfillset(&allsigs);
sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74ce..8ec4d6cc7633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
return ret;
}
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+ struct path *path)
+{
+ int error = security_quota_on(path->dentry);
+ if (error)
+ return error;
+ /* Quota file not on the same filesystem? */
+ if (path->mnt->mnt_sb != sb)
+ error = -EXDEV;
+ else
+ error = vfs_quota_on_inode(path->dentry->d_inode, type,
+ format_id);
+ return error;
+}
+
/* Actual function called from quotactl() */
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
return vfs_quota_on_remount(sb, type);
error = path_lookup(path, LOOKUP_FOLLOW, &nd);
- if (error < 0)
- return error;
- error = security_quota_on(nd.path.dentry);
- if (error)
- goto out_path;
- /* Quota file not on the same filesystem? */
- if (nd.path.mnt->mnt_sb != sb)
- error = -EXDEV;
- else
- error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
- format_id);
-out_path:
- path_put(&nd.path);
+ if (!error) {
+ error = vfs_quota_on_path(sb, type, format_id, &nd.path);
+ path_put(&nd.path);
+ }
return error;
}
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
EXPORT_SYMBOL(dqstats);
EXPORT_SYMBOL(dq_data_lock);
EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
EXPORT_SYMBOL(vfs_quota_on_mount);
EXPORT_SYMBOL(vfs_quota_off);
EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 3a404e7fad53..291abb11e20e 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
}
unlock_kernel();
- d_add(dentry, inode);
- return NULL;
+ return d_splice_alias(inode, dentry);
}
static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0c87474f7917..7cc0eb756b55 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1041,10 +1041,7 @@ retry:
}
/*
- * It opens an eventpoll file descriptor. The "size" parameter is there
- * for historical reasons, when epoll was using an hash instead of an
- * RB tree. With the current implementation, the "size" parameter is ignored
- * (besides sanity checks).
+ * Open an eventpoll file descriptor.
*/
asmlinkage long sys_epoll_create1(int flags)
{
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced384674..f38a5afc39a1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
journal_unlock_updates(EXT3_SB(sb)->s_journal);
}
+ err = vfs_quota_on_path(sb, type, format_id, &nd.path);
path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, remount);
+ return err;
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e165446..694ed6fadcc8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
- for (n=0; n < count; n++) {
+ for (n = 0; n < count; n++) {
ext4_acl_entry *entry =
(ext4_acl_entry *)value;
if ((char *)value + sizeof(ext4_acl_entry_short) > end)
goto fail;
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
- switch(acl->a_entries[n].e_tag) {
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- value = (char *)value +
- sizeof(ext4_acl_entry_short);
- acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
- break;
-
- case ACL_USER:
- case ACL_GROUP:
- value = (char *)value + sizeof(ext4_acl_entry);
- if ((char *)value > end)
- goto fail;
- acl->a_entries[n].e_id =
- le32_to_cpu(entry->e_id);
- break;
-
- default:
+
+ switch (acl->a_entries[n].e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ value = (char *)value +
+ sizeof(ext4_acl_entry_short);
+ acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ value = (char *)value + sizeof(ext4_acl_entry);
+ if ((char *)value > end)
goto fail;
+ acl->a_entries[n].e_id =
+ le32_to_cpu(entry->e_id);
+ break;
+
+ default:
+ goto fail;
}
}
if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
e = (char *)ext_acl + sizeof(ext4_acl_header);
- for (n=0; n < acl->a_count; n++) {
+ for (n = 0; n < acl->a_count; n++) {
ext4_acl_entry *entry = (ext4_acl_entry *)e;
entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
- switch(acl->a_entries[n].e_tag) {
- case ACL_USER:
- case ACL_GROUP:
- entry->e_id =
- cpu_to_le32(acl->a_entries[n].e_id);
- e += sizeof(ext4_acl_entry);
- break;
-
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- e += sizeof(ext4_acl_entry_short);
- break;
-
- default:
- goto fail;
+ switch (acl->a_entries[n].e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+ e += sizeof(ext4_acl_entry);
+ break;
+
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ e += sizeof(ext4_acl_entry_short);
+ break;
+
+ default:
+ goto fail;
}
}
return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
if (!test_opt(inode->i_sb, POSIX_ACL))
return NULL;
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = ext4_iget_acl(inode, &ei->i_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- acl = ext4_iget_acl(inode, &ei->i_default_acl);
- if (acl != EXT4_ACL_NOT_CACHED)
- return acl;
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- break;
-
- default:
- return ERR_PTR(-EINVAL);
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ acl = ext4_iget_acl(inode, &ei->i_acl);
+ if (acl != EXT4_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ acl = ext4_iget_acl(inode, &ei->i_default_acl);
+ if (acl != EXT4_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+
+ default:
+ return ERR_PTR(-EINVAL);
}
retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
kfree(value);
if (!IS_ERR(acl)) {
- switch(type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ext4_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext4_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
}
}
return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- switch(type) {
- case ACL_TYPE_ACCESS:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- mode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0)
- return error;
- else {
- inode->i_mode = mode;
- ext4_mark_inode_dirty(handle, inode);
- if (error == 0)
- acl = NULL;
- }
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ return error;
+ else {
+ inode->i_mode = mode;
+ ext4_mark_inode_dirty(handle, inode);
+ if (error == 0)
+ acl = NULL;
}
- break;
+ }
+ break;
- case ACL_TYPE_DEFAULT:
- name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
+ case ACL_TYPE_DEFAULT:
+ name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
- default:
- return -EINVAL;
+ default:
+ return -EINVAL;
}
if (acl) {
value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
kfree(value);
if (!error) {
- switch(type) {
- case ACL_TYPE_ACCESS:
- ext4_iset_acl(inode, &ei->i_acl, acl);
- break;
-
- case ACL_TYPE_DEFAULT:
- ext4_iset_acl(inode, &ei->i_default_acl, acl);
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ext4_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext4_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
}
}
return error;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b9832..e9fa960ba6da 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
if (unlikely(!bh)) {
ext4_error(sb, __func__,
"Cannot read block bitmap - "
- "block_group = %d, block_bitmap = %llu",
- (int)block_group, (unsigned long long)bitmap_blk);
+ "block_group = %lu, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
if (bh_uptodate_or_lock(bh))
return bh;
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh);
unlock_buffer(bh);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh;
}
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (bh_submit_read(bh) < 0) {
put_bh(bh);
ext4_error(sb, __func__,
"Cannot read block bitmap - "
- "block_group = %d, block_bitmap = %llu",
- (int)block_group, (unsigned long long)bitmap_blk);
+ "block_group = %lu, block_bitmap = %llu",
+ block_group, bitmap_blk);
return NULL;
}
ext4_valid_block_bitmap(sb, desc, block_group, bh);
@@ -1623,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
free_blocks =
percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
#endif
+ if (free_blocks <= root_blocks)
+ /* we don't have free space */
+ return 0;
if (free_blocks - root_blocks < nblocks)
return free_blocks - root_blocks;
return nblocks;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d3d23d73c08b..ec8e33b45219 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent,
get_dtype(sb, fname->file_type));
if (error) {
filp->f_pos = curr_pos;
- info->extra_fname = fname->next;
+ info->extra_fname = fname;
return error;
}
fname = fname->next;
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp,
* If there are any leftover names on the hash collision
* chain, return them first.
*/
- if (info->extra_fname &&
- call_filldir(filp, dirent, filldir, info->extra_fname))
- goto finished;
+ if (info->extra_fname) {
+ if (call_filldir(filp, dirent, filldir, info->extra_fname))
+ goto finished;
- if (!info->curr_node)
+ info->extra_fname = NULL;
+ info->curr_node = rb_next(info->curr_node);
+ if (!info->curr_node) {
+ if (info->next_hash == ~0) {
+ filp->f_pos = EXT4_HTREE_EOF;
+ goto finished;
+ }
+ info->curr_hash = info->next_hash;
+ info->curr_minor_hash = 0;
+ }
+ } else if (!info->curr_node)
info->curr_node = rb_first(&info->root);
while (1) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b14..295003241d3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
/* inode.c */
-void ext4_da_release_space(struct inode *inode, int used, int to_free);
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1073,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
+extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1228,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* extents.c */
extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
+ int chunk);
extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6c166c0a54b7..d33dc56d6986 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
-extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
+ int num,
+ struct ext4_ext_path *path);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
EXT4_XATTR_TRANS_BLOCKS - 2 + \
2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+/*
+ * Define the number of metadata blocks we need to account to modify data.
+ *
+ * This include super block, inode block, quota blocks and xattr blocks
+ */
+#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
+ 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+
/* Delete operations potentially hit one directory's namespace plus an
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
* generous. We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892ed..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
if (handle->h_buffer_credits > needed)
return 0;
err = ext4_journal_extend(handle, needed);
- if (err)
+ if (err <= 0)
return err;
return ext4_journal_restart(handle, needed);
}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
/*
* get the next allocated block if the extent in the path
- * is before the requested block(s)
+ * is before the requested block(s)
*/
if (b2 < b1) {
b2 = ext4_ext_next_allocated_block(path);
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
}
/*
- * ext4_ext_calc_credits_for_insert:
- * This routine returns max. credits that the extent tree can consume.
- * It should be OK for low-performance paths like ->writepage()
- * To allow many writing processes to fit into a single transaction,
- * the caller should calculate credits under i_data_sem and
- * pass the actual path.
+ * ext4_ext_calc_credits_for_single_extent:
+ * This routine returns max. credits that needed to insert an extent
+ * to the extent tree.
+ * When pass the actual path, the caller should calculate credits
+ * under i_data_sem.
*/
-int ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
struct ext4_ext_path *path)
{
- int depth, needed;
-
if (path) {
+ int depth = ext_depth(inode);
+ int ret = 0;
+
/* probably there is space in leaf? */
- depth = ext_depth(inode);
if (le16_to_cpu(path[depth].p_hdr->eh_entries)
- < le16_to_cpu(path[depth].p_hdr->eh_max))
- return 1;
- }
-
- /*
- * given 32-bit logical block (4294967296 blocks), max. tree
- * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
- * Let's also add one more level for imbalance.
- */
- depth = 5;
+ < le16_to_cpu(path[depth].p_hdr->eh_max)) {
- /* allocation of new data block(s) */
- needed = 2;
+ /*
+ * There are some space in the leaf tree, no
+ * need to account for leaf block credit
+ *
+ * bitmaps and block group descriptor blocks
+ * and other metadat blocks still need to be
+ * accounted.
+ */
+ /* 1 bitmap, 1 block group descriptor */
+ ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+ }
+ }
- /*
- * tree can be full, so it would need to grow in depth:
- * we need one credit to modify old root, credits for
- * new root will be added in split accounting
- */
- needed += 1;
+ return ext4_chunk_trans_blocks(inode, nrblocks);
+}
- /*
- * Index split can happen, we would need:
- * allocate intermediate indexes (bitmap + group)
- * + change two blocks at each level, but root (already included)
- */
- needed += (depth * 2) + (depth * 2);
+/*
+ * How many index/leaf blocks need to change/allocate to modify nrblocks?
+ *
+ * if nrblocks are fit in a single extent (chunk flag is 1), then
+ * in the worse case, each tree level index/leaf need to be changed
+ * if the tree split due to insert a new extent, then the old tree
+ * index/leaf need to be updated too
+ *
+ * If the nrblocks are discontiguous, they could cause
+ * the whole tree split more than once, but this is really rare.
+ */
+int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+ int index;
+ int depth = ext_depth(inode);
- /* any allocation modifies superblock */
- needed += 1;
+ if (chunk)
+ index = depth * 2;
+ else
+ index = depth * 3;
- return needed;
+ return index;
}
static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1910,16 +1917,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
BUG_ON(b != ex_ee_block + ex_ee_len - 1);
}
- /* at present, extent can't cross block group: */
- /* leaf + bitmap + group desc + sb + inode */
- credits = 5;
+ /*
+ * 3 for leaf, sb, and inode plus 2 (bmap and group
+ * descriptor) for each block group; assume two block
+ * groups plus ex_ee_len/blocks_per_block_group for
+ * the worst case
+ */
+ credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
if (ex == EXT_FIRST_EXTENT(eh)) {
correct_index = 1;
credits += (ext_depth(inode)) + 1;
}
-#ifdef CONFIG_QUOTA
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
err = ext4_ext_journal_restart(handle, credits);
if (err)
@@ -2323,7 +2332,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
if (allocated <= EXT4_EXT_ZERO_LEN) {
- /* Mark first half uninitialized.
+ /*
+ * iblock == ee_block is handled by the zerouout
+ * at the beginning.
+ * Mark first half uninitialized.
* Mark second half initialized and zero out the
* initialized extent
*/
@@ -2346,7 +2358,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
- /* zeroed the full extent */
+ /* blocks available from iblock */
return allocated;
} else if (err)
@@ -2374,6 +2386,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = PTR_ERR(path);
return err;
}
+ /* get the second half extent details */
ex = path[depth].p_ext;
err = ext4_ext_get_access(handle, inode,
path + depth);
@@ -2403,6 +2416,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
+ /* blocks available from iblock */
return allocated;
} else if (err)
@@ -2418,23 +2432,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
orig_ex.ee_len = cpu_to_le16(ee_len -
ext4_ext_get_actual_len(ex3));
- if (newdepth != depth) {
- depth = newdepth;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, iblock, path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
- if (ex2 != &newex)
- ex2 = ex;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
+ depth = newdepth;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, iblock, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
}
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ if (ex2 != &newex)
+ ex2 = ex;
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
allocated = max_blocks;
/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2465,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
+ /* blocks available from iblock */
return allocated;
}
}
@@ -2796,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
/*
* probably first extent we're gonna free will be last in block
*/
- err = ext4_writepage_trans_blocks(inode) + 3;
+ err = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, err);
if (IS_ERR(handle))
return;
@@ -2810,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode)
down_write(&EXT4_I(inode)->i_data_sem);
ext4_ext_invalidate_cache(inode);
- ext4_mb_discard_inode_preallocations(inode);
+ ext4_discard_reservation(inode);
/*
* TODO: optimization is possible here.
@@ -2849,27 +2863,6 @@ out_stop:
ext4_journal_stop(handle);
}
-/*
- * ext4_ext_writepage_trans_blocks:
- * calculate max number of blocks we could modify
- * in order to allocate new block for an inode
- */
-int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
-{
- int needed;
-
- needed = ext4_ext_calc_credits_for_insert(inode, NULL);
-
- /* caller wants to allocate num blocks, but note it includes sb */
- needed = needed * num - (num - 1);
-
-#ifdef CONFIG_QUOTA
- needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-
- return needed;
-}
-
static void ext4_falloc_update_inode(struct inode *inode,
int mode, loff_t new_size, int update_ctime)
{
@@ -2930,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- block;
/*
- * credits to insert 1 extent into extent tree + buffers to be able to
- * modify 1 super block, 1 block bitmap and 1 group descriptor.
+ * credits to insert 1 extent into extent tree
*/
- credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+ credits = ext4_chunk_trans_blocks(inode, max_blocks);
mutex_lock(&inode->i_mutex);
retry:
while (ret >= 0 && ret < max_blocks) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344f..f344834bbf58 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
* Return buffer_head of bitmap on success or NULL.
*/
static struct buffer_head *
-read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
+ ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
- goto error_out;
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
- if (!buffer_uptodate(bh)) {
- lock_buffer(bh);
- if (!buffer_uptodate(bh)) {
- ext4_init_inode_bitmap(sb, bh, block_group,
- desc);
- set_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
- }
- } else {
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ return NULL;
+ bitmap_blk = ext4_inode_bitmap(sb, desc);
+ bh = sb_getblk(sb, bitmap_blk);
+ if (unlikely(!bh)) {
+ ext4_error(sb, __func__,
+ "Cannot read inode bitmap - "
+ "block_group = %lu, inode_bitmap = %llu",
+ block_group, bitmap_blk);
+ return NULL;
}
- if (!bh)
- ext4_error(sb, "read_inode_bitmap",
+ if (bh_uptodate_or_lock(bh))
+ return bh;
+
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ ext4_init_inode_bitmap(sb, bh, block_group, desc);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ return bh;
+ }
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (bh_submit_read(bh) < 0) {
+ put_bh(bh);
+ ext4_error(sb, __func__,
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %llu",
- block_group, ext4_inode_bitmap(sb, desc));
-error_out:
+ block_group, bitmap_blk);
+ return NULL;
+ }
return bh;
}
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = read_inode_bitmap(sb, block_group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
@@ -341,7 +351,7 @@ find_close_to_parent:
goto found_flexbg;
}
- if (best_flex < 0 ||
+ if (flex_group[best_flex].free_inodes == 0 ||
(flex_group[i].free_blocks >
flex_group[best_flex].free_blocks &&
flex_group[i].free_inodes))
@@ -623,7 +633,7 @@ got_group:
goto fail;
brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, group);
if (!bitmap_bh)
goto fail;
@@ -728,7 +738,7 @@ got:
/* When marking the block group with
* ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unsed even though
+ * on the value of bg_itable_unused even though
* mke2fs could have initialized the same for us.
* Instead we calculated the value below
*/
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
- bitmap_bh = read_inode_bitmap(sb, block_group);
+ bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh) {
ext4_warning(sb, __func__,
"inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
brelse(bitmap_bh);
- bitmap_bh = read_inode_bitmap(sb, i);
+ bitmap_bh = ext4_read_inode_bitmap(sb, i);
if (!bitmap_bh)
continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c235..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
#include "acl.h"
#include "ext4_extents.h"
+#define MPAGE_DA_EXTENT_TAIL 0x01
+
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
@@ -191,6 +193,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
void ext4_delete_inode (struct inode * inode)
{
handle_t *handle;
+ int err;
if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +202,9 @@ void ext4_delete_inode (struct inode * inode)
if (is_bad_inode(inode))
goto no_delete;
- handle = start_transaction(inode);
+ handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) {
+ ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
* If we're going to skip the normal cleanup, we still need to
* make sure that the in-core orphan linked list is properly
@@ -213,8 +217,34 @@ void ext4_delete_inode (struct inode * inode)
if (IS_SYNC(inode))
handle->h_sync = 1;
inode->i_size = 0;
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't mark inode dirty (err %d)", err);
+ goto stop_handle;
+ }
if (inode->i_blocks)
ext4_truncate(inode);
+
+ /*
+ * ext4_ext_truncate() doesn't reserve any slop when it
+ * restarts journal transactions; therefore there may not be
+ * enough credits left in the handle to remove the inode from
+ * the orphan list and set the dtime field.
+ */
+ if (handle->h_buffer_credits < 3) {
+ err = ext4_journal_extend(handle, 3);
+ if (err > 0)
+ err = ext4_journal_restart(handle, 3);
+ if (err != 0) {
+ ext4_warning(inode->i_sb, __func__,
+ "couldn't extend journal (err %d)", err);
+ stop_handle:
+ ext4_journal_stop(handle);
+ goto no_delete;
+ }
+ }
+
/*
* Kill off the orphan record which ext4_truncate created.
* AKPM: I think this can be inside the above `if'.
@@ -952,23 +982,74 @@ out:
return err;
}
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
/*
- * Number of credits we need for writing DIO_MAX_BLOCKS:
- * We need sb + group descriptor + bitmap + inode -> 4
- * For B blocks with A block pointers per block we need:
- * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
- * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
*/
-#define DIO_CREDITS 25
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ int ind_blks, dind_blks, tind_blks;
+
+ /* number of new indirect blocks needed */
+ ind_blks = (blocks + icap - 1) / icap;
+
+ dind_blks = (ind_blks + icap - 1) / icap;
+ tind_blks = 1;
+
+ return ind_blks + dind_blks + tind_blks;
+}
/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ if (!blocks)
+ return 0;
+
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_calc_metadata_amount(inode, blocks);
+
+ return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static void ext4_da_update_reserve_space(struct inode *inode, int used)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int total, mdb, mdb_free;
+
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ /* recalculate the number of metablocks still need to be reserved */
+ total = EXT4_I(inode)->i_reserved_data_blocks - used;
+ mdb = ext4_calc_metadata_amount(inode, total);
+
+ /* figure out how many metablocks to release */
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ /* update fs free blocks counter for truncate case */
+ percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+
+ /* update per-inode reservations */
+ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= used;
+
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
+/*
+ * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * and returns if the blocks are already mapped.
*
- *
- * ext4_ext4 get_block() wrapper function
- * It will do a look up first, and returns if the blocks already mapped.
* Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it
* mapped.
@@ -1069,26 +1150,30 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
* which were deferred till now
*/
if ((retval > 0) && buffer_delay(bh))
- ext4_da_release_space(inode, retval, 0);
+ ext4_da_update_reserve_space(inode, retval);
}
up_write((&EXT4_I(inode)->i_data_sem));
return retval;
}
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+
static int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
handle_t *handle = ext4_journal_current_handle();
int ret = 0, started = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int dio_credits;
if (create && !handle) {
/* Direct IO write... */
if (max_blocks > DIO_MAX_BLOCKS)
max_blocks = DIO_MAX_BLOCKS;
- handle = ext4_journal_start(inode, DIO_CREDITS +
- 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+ dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+ handle = ext4_journal_start(inode, dio_credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -1336,12 +1421,8 @@ static int ext4_ordered_write_end(struct file *file,
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
- unsigned from, to;
int ret = 0, ret2;
- from = pos & (PAGE_CACHE_SIZE - 1);
- to = from + len;
-
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
@@ -1437,36 +1518,6 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
-{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
-
- dind_blks = (ind_blks + icap - 1) / icap;
-
- tind_blks = 1;
-
- return ind_blks + dind_blks + tind_blks;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
- */
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
-{
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
-
- return ext4_indirect_calc_metadata_amount(inode, blocks);
-}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
@@ -1490,7 +1541,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -ENOSPC;
}
-
/* reduce fs free blocks counter */
percpu_counter_sub(&sbi->s_freeblocks_counter, total);
@@ -1501,35 +1551,49 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
return 0; /* success */
}
-void ext4_da_release_space(struct inode *inode, int used, int to_free)
+static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, mdb_free, release;
+ if (!to_free)
+ return; /* Nothing to release, exit */
+
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ if (!EXT4_I(inode)->i_reserved_data_blocks) {
+ /*
+ * if there is no reserved blocks, but we try to free some
+ * then the counter is messed up somewhere.
+ * but since this function is called from invalidate
+ * page, it's harmless to return without any action
+ */
+ printk(KERN_INFO "ext4 delalloc try to release %d reserved "
+ "blocks for inode %lu, but there is no reserved "
+ "data blocks\n", to_free, inode->i_ino);
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return;
+ }
+
/* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+ total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
mdb = ext4_calc_metadata_amount(inode, total);
/* figure out how many metablocks to release */
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
release = to_free + mdb_free;
/* update fs free blocks counter for truncate case */
percpu_counter_add(&sbi->s_freeblocks_counter, release);
/* update per-inode reservations */
- BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= to_free;
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
@@ -1551,7 +1615,7 @@ static void ext4_da_page_release_reservation(struct page *page,
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- ext4_da_release_space(page->mapping->host, 0, to_release);
+ ext4_da_release_space(page->mapping->host, to_release);
}
/*
@@ -1564,11 +1628,13 @@ struct mpage_da_data {
unsigned long first_page, next_page; /* extent of pages */
get_block_t *get_block;
struct writeback_control *wbc;
+ int io_done;
+ long pages_written;
};
/*
* mpage_da_submit_io - walks through extent of pages and try to write
- * them with __mpage_writepage()
+ * them with writepage() call back
*
* @mpd->inode: inode
* @mpd->first_page: first page of the extent
@@ -1583,18 +1649,11 @@ struct mpage_da_data {
static int mpage_da_submit_io(struct mpage_da_data *mpd)
{
struct address_space *mapping = mpd->inode->i_mapping;
- struct mpage_data mpd_pp = {
- .bio = NULL,
- .last_block_in_bio = 0,
- .get_block = mpd->get_block,
- .use_writepage = 1,
- };
int ret = 0, err, nr_pages, i;
unsigned long index, end;
struct pagevec pvec;
BUG_ON(mpd->next_page <= mpd->first_page);
-
pagevec_init(&pvec, 0);
index = mpd->first_page;
end = mpd->next_page - 1;
@@ -1612,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
break;
index++;
- err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
-
+ err = mapping->a_ops->writepage(page, mpd->wbc);
+ if (!err)
+ mpd->pages_written++;
/*
* In error case, we have to continue because
* remaining pages are still locked
@@ -1624,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
}
pagevec_release(&pvec);
}
- if (mpd_pp.bio)
- mpage_bio_submit(WRITE, mpd_pp.bio);
-
return ret;
}
@@ -1649,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
int blocks = exbh->b_size >> inode->i_blkbits;
sector_t pblock = exbh->b_blocknr, cur_logical;
struct buffer_head *head, *bh;
- unsigned long index, end;
+ pgoff_t index, end;
struct pagevec pvec;
int nr_pages, i;
@@ -1692,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
if (buffer_delay(bh)) {
bh->b_blocknr = pblock;
clear_buffer_delay(bh);
+ bh->b_bdev = inode->i_sb->s_bdev;
+ } else if (buffer_unwritten(bh)) {
+ bh->b_blocknr = pblock;
+ clear_buffer_unwritten(bh);
+ set_buffer_mapped(bh);
+ set_buffer_new(bh);
+ bh->b_bdev = inode->i_sb->s_bdev;
} else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock);
@@ -1727,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
*
* The function skips space we know is already mapped to disk blocks.
*
- * The function ignores errors ->get_block() returns, thus real
- * error handling is postponed to __mpage_writepage()
*/
static void mpage_da_map_blocks(struct mpage_da_data *mpd)
{
+ int err = 0;
struct buffer_head *lbh = &mpd->lbh;
- int err = 0, remain = lbh->b_size;
sector_t next = lbh->b_blocknr;
struct buffer_head new;
@@ -1743,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
if (buffer_mapped(lbh) && !buffer_delay(lbh))
return;
- while (remain) {
- new.b_state = lbh->b_state;
- new.b_blocknr = 0;
- new.b_size = remain;
- err = mpd->get_block(mpd->inode, next, &new, 1);
- if (err) {
- /*
- * Rather than implement own error handling
- * here, we just leave remaining blocks
- * unallocated and try again with ->writepage()
- */
- break;
- }
- BUG_ON(new.b_size == 0);
+ new.b_state = lbh->b_state;
+ new.b_blocknr = 0;
+ new.b_size = lbh->b_size;
- if (buffer_new(&new))
- __unmap_underlying_blocks(mpd->inode, &new);
+ /*
+ * If we didn't accumulate anything
+ * to write simply return
+ */
+ if (!new.b_size)
+ return;
+ err = mpd->get_block(mpd->inode, next, &new, 1);
+ if (err)
+ return;
+ BUG_ON(new.b_size == 0);
- /*
- * If blocks are delayed marked, we need to
- * put actual blocknr and drop delayed bit
- */
- if (buffer_delay(lbh))
- mpage_put_bnr_to_bhs(mpd, next, &new);
+ if (buffer_new(&new))
+ __unmap_underlying_blocks(mpd->inode, &new);
- /* go for the remaining blocks */
- next += new.b_size >> mpd->inode->i_blkbits;
- remain -= new.b_size;
- }
+ /*
+ * If blocks are delayed marked, we need to
+ * put actual blocknr and drop delayed bit
+ */
+ if (buffer_delay(lbh) || buffer_unwritten(lbh))
+ mpage_put_bnr_to_bhs(mpd, next, &new);
+
+ return;
}
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
+#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+ (1 << BH_Delay) | (1 << BH_Unwritten))
/*
* mpage_add_bh_to_extent - try to add one more block to extent of blocks
@@ -1788,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
sector_t logical, struct buffer_head *bh)
{
- struct buffer_head *lbh = &mpd->lbh;
sector_t next;
+ size_t b_size = bh->b_size;
+ struct buffer_head *lbh = &mpd->lbh;
+ int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
- next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
-
+ /* check if thereserved journal credits might overflow */
+ if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+ if (nrblocks >= EXT4_MAX_TRANS_DATA) {
+ /*
+ * With non-extent format we are limited by the journal
+ * credit available. Total credit needed to insert
+ * nrblocks contiguous blocks is dependent on the
+ * nrblocks. So limit nrblocks.
+ */
+ goto flush_it;
+ } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
+ EXT4_MAX_TRANS_DATA) {
+ /*
+ * Adding the new buffer_head would make it cross the
+ * allowed limit for which we have journal credit
+ * reserved. So limit the new bh->b_size
+ */
+ b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
+ mpd->inode->i_blkbits;
+ /* we will do mpage_da_submit_io in the next loop */
+ }
+ }
/*
* First block in the extent
*/
if (lbh->b_size == 0) {
lbh->b_blocknr = logical;
- lbh->b_size = bh->b_size;
+ lbh->b_size = b_size;
lbh->b_state = bh->b_state & BH_FLAGS;
return;
}
+ next = lbh->b_blocknr + nrblocks;
/*
* Can we merge the block to our big extent?
*/
if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
- lbh->b_size += bh->b_size;
+ lbh->b_size += b_size;
return;
}
+flush_it:
/*
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
mpage_da_map_blocks(mpd);
-
- /*
- * Now start a new extent
- */
- lbh->b_size = bh->b_size;
- lbh->b_state = bh->b_state & BH_FLAGS;
- lbh->b_blocknr = logical;
+ mpage_da_submit_io(mpd);
+ mpd->io_done = 1;
+ return;
}
/*
@@ -1842,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page,
struct buffer_head *bh, *head, fake;
sector_t logical;
+ if (mpd->io_done) {
+ /*
+ * Rest of the page in the page_vec
+ * redirty then and skip then. We will
+ * try to to write them again after
+ * starting a new transaction
+ */
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return MPAGE_DA_EXTENT_TAIL;
+ }
/*
* Can we merge this page to current extent?
*/
if (mpd->next_page != page->index) {
/*
* Nope, we can't. So, we map non-allocated blocks
- * and start IO on them using __mpage_writepage()
+ * and start IO on them using writepage()
*/
if (mpd->next_page != mpd->first_page) {
mpage_da_map_blocks(mpd);
mpage_da_submit_io(mpd);
+ /*
+ * skip rest of the page in the page_vec
+ */
+ mpd->io_done = 1;
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return MPAGE_DA_EXTENT_TAIL;
}
/*
@@ -1883,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page,
set_buffer_dirty(bh);
set_buffer_uptodate(bh);
mpage_add_bh_to_extent(mpd, logical, bh);
+ if (mpd->io_done)
+ return MPAGE_DA_EXTENT_TAIL;
} else {
/*
* Page with regular buffer heads, just add all dirty ones
@@ -1891,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page,
bh = head;
do {
BUG_ON(buffer_locked(bh));
- if (buffer_dirty(bh))
+ if (buffer_dirty(bh) &&
+ (!buffer_mapped(bh) || buffer_delay(bh))) {
mpage_add_bh_to_extent(mpd, logical, bh);
+ if (mpd->io_done)
+ return MPAGE_DA_EXTENT_TAIL;
+ }
logical++;
} while ((bh = bh->b_this_page) != head);
}
@@ -1911,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page,
*
* This is a library function, which implements the writepages()
* address_space_operation.
- *
- * In order to avoid duplication of logic that deals with partial pages,
- * multiple bio per page, etc, we find non-allocated blocks, allocate
- * them with minimal calls to ->get_block() and re-use __mpage_writepage()
- *
- * It's important that we call __mpage_writepage() only once for each
- * involved page, otherwise we'd have to implement more complicated logic
- * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
- *
- * See comments to mpage_writepages()
*/
static int mpage_da_writepages(struct address_space *mapping,
struct writeback_control *wbc,
get_block_t get_block)
{
struct mpage_da_data mpd;
+ long to_write;
int ret;
if (!get_block)
@@ -1940,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping,
mpd.first_page = 0;
mpd.next_page = 0;
mpd.get_block = get_block;
+ mpd.io_done = 0;
+ mpd.pages_written = 0;
+
+ to_write = wbc->nr_to_write;
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
/*
* Handle last extent of pages
*/
- if (mpd.next_page != mpd.first_page) {
+ if (!mpd.io_done && mpd.next_page != mpd.first_page) {
mpage_da_map_blocks(&mpd);
mpage_da_submit_io(&mpd);
}
+ wbc->nr_to_write = to_write - mpd.pages_written;
return ret;
}
@@ -2155,63 +2255,95 @@ static int ext4_da_writepage(struct page *page,
}
/*
- * For now just follow the DIO way to estimate the max credits
- * needed to write out EXT4_MAX_WRITEBACK_PAGES.
- * todo: need to calculate the max credits need for
- * extent based files, currently the DIO credits is based on
- * indirect-blocks mapping way.
- *
- * Probably should have a generic way to calculate credits
- * for DIO, writepages, and truncate
+ * This is called via ext4_da_writepages() to
+ * calulate the total number of credits to reserve to fit
+ * a single extent allocation into a single transaction,
+ * ext4_da_writpeages() will loop calling this before
+ * the block allocation.
*/
-#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS
-#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS
+
+static int ext4_da_writepages_trans_blocks(struct inode *inode)
+{
+ int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+
+ /*
+ * With non-extent format the journal credit needed to
+ * insert nrblocks contiguous block is dependent on
+ * number of contiguous block. So we will limit
+ * number of contiguous block to a sane value
+ */
+ if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+ (max_blocks > EXT4_MAX_TRANS_DATA))
+ max_blocks = EXT4_MAX_TRANS_DATA;
+
+ return ext4_chunk_trans_blocks(inode, max_blocks);
+}
static int ext4_da_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+ struct writeback_control *wbc)
{
- struct inode *inode = mapping->host;
handle_t *handle = NULL;
- int needed_blocks;
- int ret = 0;
- long to_write;
loff_t range_start = 0;
+ struct inode *inode = mapping->host;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+ long to_write, pages_skipped = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
/*
* No pages to write? This is mainly a kludge to avoid starting
* a transaction for special inodes like journal inode on last iput()
* because that could violate lock ordering on umount
*/
- if (!mapping->nrpages)
+ if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0;
-
/*
- * Estimate the worse case needed credits to write out
- * EXT4_MAX_BUF_BLOCKS pages
+ * Make sure nr_to_write is >= sbi->s_mb_stream_request
+ * This make sure small files blocks are allocated in
+ * single attempt. This ensure that small files
+ * get less fragmented.
*/
- needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
+ if (wbc->nr_to_write < sbi->s_mb_stream_request) {
+ nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+ wbc->nr_to_write = sbi->s_mb_stream_request;
+ }
- to_write = wbc->nr_to_write;
- if (!wbc->range_cyclic) {
+ if (!wbc->range_cyclic)
/*
* If range_cyclic is not set force range_cont
* and save the old writeback_index
*/
wbc->range_cont = 1;
- range_start = wbc->range_start;
- }
- while (!ret && to_write) {
+ range_start = wbc->range_start;
+ pages_skipped = wbc->pages_skipped;
+
+restart_loop:
+ to_write = wbc->nr_to_write;
+ while (!ret && to_write > 0) {
+
+ /*
+ * we insert one extent at a time. So we need
+ * credit needed for single extent allocation.
+ * journalled mode is currently not supported
+ * by delalloc
+ */
+ BUG_ON(ext4_should_journal_data(inode));
+ needed_blocks = ext4_da_writepages_trans_blocks(inode);
+
/* start a new transaction*/
handle = ext4_journal_start(inode, needed_blocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
+ printk(KERN_EMERG "%s: jbd2_start: "
+ "%ld pages, ino %lu; err %d\n", __func__,
+ wbc->nr_to_write, inode->i_ino, ret);
+ dump_stack();
goto out_writepages;
}
if (ext4_should_order_data(inode)) {
/*
* With ordered mode we need to add
- * the inode to the journal handle
+ * the inode to the journal handl
* when we do block allocation.
*/
ret = ext4_jbd2_file_inode(handle, inode);
@@ -2219,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping,
ext4_journal_stop(handle);
goto out_writepages;
}
-
}
- /*
- * set the max dirty pages could be write at a time
- * to fit into the reserved transaction credits
- */
- if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
- wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
to_write -= wbc->nr_to_write;
ret = mpage_da_writepages(mapping, wbc,
- ext4_da_get_block_write);
+ ext4_da_get_block_write);
ext4_journal_stop(handle);
- if (wbc->nr_to_write) {
+ if (ret == MPAGE_DA_EXTENT_TAIL) {
+ /*
+ * got one extent now try with
+ * rest of the pages
+ */
+ to_write += wbc->nr_to_write;
+ ret = 0;
+ } else if (wbc->nr_to_write) {
/*
* There is no more writeout needed
* or we requested for a noblocking writeout
@@ -2244,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->nr_to_write = to_write;
}
-out_writepages:
- wbc->nr_to_write = to_write;
- if (range_start)
+ if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
+ /* We skipped pages in this loop */
wbc->range_start = range_start;
+ wbc->nr_to_write = to_write +
+ wbc->pages_skipped - pages_skipped;
+ wbc->pages_skipped = pages_skipped;
+ goto restart_loop;
+ }
+
+out_writepages:
+ wbc->nr_to_write = to_write - nr_to_writebump;
+ wbc->range_start = range_start;
return ret;
}
@@ -2280,8 +2420,11 @@ retry:
}
page = __grab_cache_page(mapping, index);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ ext4_journal_stop(handle);
+ ret = -ENOMEM;
+ goto out;
+ }
*pagep = page;
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -3434,6 +3577,9 @@ void ext4_truncate(struct inode *inode)
* modify the block allocation tree.
*/
down_write(&ei->i_data_sem);
+
+ ext4_discard_reservation(inode);
+
/*
* The orphan list entry will now protect us from any crash which
* occurs before the truncate completes, so it is now safe to propagate
@@ -3503,8 +3649,6 @@ do_indirects:
;
}
- ext4_discard_reservation(inode);
-
up_write(&ei->i_data_sem);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
@@ -3590,6 +3734,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
@@ -4262,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
+ int chunk)
+{
+ int indirects;
+
+ /* if nrblocks are contiguous */
+ if (chunk) {
+ /*
+ * With N contiguous data blocks, it need at most
+ * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
+ * 2 dindirect blocks
+ * 1 tindirect block
+ */
+ indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ return indirects + 3;
+ }
+ /*
+ * if nrblocks are not contiguous, worse case, each block touch
+ * a indirect block, and each indirect block touch a double indirect
+ * block, plus a triple indirect block
+ */
+ indirects = nrblocks * 2 + 1;
+ return indirects;
+}
+
+static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+ return ext4_indirect_trans_blocks(inode, nrblocks, 0);
+ return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+}
/*
- * How many blocks doth make a writepage()?
+ * Account for index blocks, block groups bitmaps and block group
+ * descriptor blocks if modify datablocks and index blocks
+ * worse case, the indexs blocks spread over different block groups
*
- * With N blocks per page, it may be:
- * N data blocks
- * 2 indirect block
- * 2 dindirect
- * 1 tindirect
- * N+5 bitmap blocks (from the above)
- * N+5 group descriptor summary blocks
- * 1 inode block
- * 1 superblock.
- * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
+ * If datablocks are discontiguous, they are possible to spread over
+ * different block groups too. If they are contiugous, with flexbg,
+ * they could still across block group boundary.
*
- * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
- *
- * With ordered or writeback data it's the same, less the N data blocks.
+ * Also account for superblock, inode, quota and xattr blocks
+ */
+int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+ int groups, gdpblocks;
+ int idxblocks;
+ int ret = 0;
+
+ /*
+ * How many index blocks need to touch to modify nrblocks?
+ * The "Chunk" flag indicating whether the nrblocks is
+ * physically contiguous on disk
+ *
+ * For Direct IO and fallocate, they calls get_block to allocate
+ * one single extent at a time, so they could set the "Chunk" flag
+ */
+ idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
+
+ ret = idxblocks;
+
+ /*
+ * Now let's see how many group bitmaps and group descriptors need
+ * to account
+ */
+ groups = idxblocks;
+ if (chunk)
+ groups += 1;
+ else
+ groups += nrblocks;
+
+ gdpblocks = groups;
+ if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
+ groups = EXT4_SB(inode->i_sb)->s_groups_count;
+ if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
+ gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
+
+ /* bitmaps and block group descriptor blocks */
+ ret += groups + gdpblocks;
+
+ /* Blocks for super block, inode, quota and xattr blocks */
+ ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
+
+ return ret;
+}
+
+/*
+ * Calulate the total number of credits to reserve to fit
+ * the modification of a single pages into a single transaction,
+ * which may include multiple chunks of block allocations.
*
- * If the inode's direct blocks can hold an integral number of pages then a
- * page cannot straddle two indirect blocks, and we can only touch one indirect
- * and dindirect block, and the "5" above becomes "3".
+ * This could be called via ext4_write_begin()
*
- * This still overestimates under most circumstances. If we were to pass the
- * start and end offsets in here as well we could do block_to_path() on each
- * block and work out the exact number of indirects which are touched. Pah.
+ * We need to consider the worse case, when
+ * one new block per extent.
*/
-
int ext4_writepage_trans_blocks(struct inode *inode)
{
int bpp = ext4_journal_blocks_per_page(inode);
- int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
int ret;
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_writepage_trans_blocks(inode, bpp);
+ ret = ext4_meta_trans_blocks(inode, bpp, 0);
+ /* Account for data blocks for journalled mode */
if (ext4_should_journal_data(inode))
- ret = 3 * (bpp + indirects) + 2;
- else
- ret = 2 * (bpp + indirects) + 2;
-
-#ifdef CONFIG_QUOTA
- /* We know that structure was already allocated during DQUOT_INIT so
- * we will be updating only the data blocks + inodes */
- ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-
+ ret += bpp;
return ret;
}
/*
+ * Calculate the journal credits for a chunk of data modification.
+ *
+ * This is called from DIO, fallocate or whoever calling
+ * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ *
+ * journal buffers for data blocks are not included here, as DIO
+ * and fallocate do no need to journal data buffers.
+ */
+int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
+{
+ return ext4_meta_trans_blocks(inode, nrblocks, 1);
+}
+
+/*
* The caller must have previously called ext4_reserve_inode_write().
* Give this, we know that the caller already has write access to iloc->bh.
*/
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbee..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
if (bh_uptodate_or_lock(bh[i]))
continue;
+ spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh[i],
first_group + i, desc);
set_buffer_uptodate(bh[i]);
unlock_buffer(bh[i]);
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
continue;
}
+ spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
get_bh(bh[i]);
bh[i]->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned i;
+ unsigned i, j;
unsigned offset;
unsigned max;
int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
- i = sizeof(struct ext4_locality_group) * NR_CPUS;
+ i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
if (sbi->s_locality_groups == NULL) {
clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
kfree(sbi->s_mb_maxs);
return -ENOMEM;
}
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
struct ext4_locality_group *lg;
lg = &sbi->s_locality_groups[i];
mutex_init(&lg->lg_mutex);
- INIT_LIST_HEAD(&lg->lg_prealloc_list);
+ for (j = 0; j < PREALLOC_TB_SIZE; j++)
+ INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
spin_lock_init(&lg->lg_prealloc_lock);
}
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa)
{
unsigned int len = ac->ac_o_ex.fe_len;
+
ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
&ac->ac_b_ex.fe_group,
&ac->ac_b_ex.fe_start);
@@ -3277,14 +3282,45 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
}
/*
+ * Return the prealloc space that have minimal distance
+ * from the goal block. @cpa is the prealloc
+ * space that is having currently known minimal distance
+ * from the goal block.
+ */
+static struct ext4_prealloc_space *
+ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
+ struct ext4_prealloc_space *pa,
+ struct ext4_prealloc_space *cpa)
+{
+ ext4_fsblk_t cur_distance, new_distance;
+
+ if (cpa == NULL) {
+ atomic_inc(&pa->pa_count);
+ return pa;
+ }
+ cur_distance = abs(goal_block - cpa->pa_pstart);
+ new_distance = abs(goal_block - pa->pa_pstart);
+
+ if (cur_distance < new_distance)
+ return cpa;
+
+ /* drop the previous reference */
+ atomic_dec(&cpa->pa_count);
+ atomic_inc(&pa->pa_count);
+ return pa;
+}
+
+/*
* search goal blocks in preallocated space
*/
static noinline_for_stack int
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
+ int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
- struct ext4_prealloc_space *pa;
+ struct ext4_prealloc_space *pa, *cpa = NULL;
+ ext4_fsblk_t goal_block;
/* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3322,22 +3358,38 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
lg = ac->ac_lg;
if (lg == NULL)
return 0;
+ order = fls(ac->ac_o_ex.fe_len) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+
+ goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
+ ac->ac_g_ex.fe_start +
+ le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
+ /*
+ * search for the prealloc space that is having
+ * minimal distance from the goal block.
+ */
+ for (i = order; i < PREALLOC_TB_SIZE; i++) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (pa->pa_deleted == 0 &&
+ pa->pa_free >= ac->ac_o_ex.fe_len) {
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
- atomic_inc(&pa->pa_count);
- ext4_mb_use_group_pa(ac, pa);
+ cpa = ext4_mb_check_group_pa(goal_block,
+ pa, cpa);
+ }
spin_unlock(&pa->pa_lock);
- ac->ac_criteria = 20;
- rcu_read_unlock();
- return 1;
}
- spin_unlock(&pa->pa_lock);
+ rcu_read_unlock();
+ }
+ if (cpa) {
+ ext4_mb_use_group_pa(ac, cpa);
+ ac->ac_criteria = 20;
+ return 1;
}
- rcu_read_unlock();
-
return 0;
}
@@ -3560,6 +3612,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
@@ -3580,10 +3633,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- spin_lock(pa->pa_obj_lock);
- list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
- spin_unlock(pa->pa_obj_lock);
-
+ /*
+ * We will later add the new pa to the right bucket
+ * after updating the pa_free in ext4_mb_release_context
+ */
return 0;
}
@@ -3733,20 +3786,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) {
- /* error handling here */
- ext4_mb_release_desc(&e4b);
- BUG_ON(bitmap_bh == NULL);
+ ext4_error(sb, __func__, "Error in reading block "
+ "bitmap for %lu\n", group);
+ return 0;
}
err = ext4_mb_load_buddy(sb, group, &e4b);
- BUG_ON(err != 0); /* error handling here */
+ if (err) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ put_bh(bitmap_bh);
+ return 0;
+ }
if (needed == 0)
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
- grp = ext4_get_group_info(sb, group);
INIT_LIST_HEAD(&list);
-
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
repeat:
ext4_lock_group(sb, group);
@@ -3903,13 +3959,18 @@ repeat:
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
err = ext4_mb_load_buddy(sb, group, &e4b);
- BUG_ON(err != 0); /* error handling here */
+ if (err) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) {
- /* error handling here */
+ ext4_error(sb, __func__, "Error in reading block "
+ "bitmap for %lu\n", group);
ext4_mb_release_desc(&e4b);
- BUG_ON(bitmap_bh == NULL);
+ continue;
}
ext4_lock_group(sb, group);
@@ -4112,22 +4173,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
}
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations(struct super_block *sb,
+ struct ext4_locality_group *lg,
+ int order, int total_entries)
+{
+ ext4_group_t group = 0;
+ struct ext4_buddy e4b;
+ struct list_head discard_list;
+ struct ext4_prealloc_space *pa, *tmp;
+ struct ext4_allocation_context *ac;
+
+ mb_debug("discard locality group preallocation\n");
+
+ INIT_LIST_HEAD(&discard_list);
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ spin_lock(&lg->lg_prealloc_lock);
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (atomic_read(&pa->pa_count)) {
+ /*
+ * This is the pa that we just used
+ * for block allocation. So don't
+ * free that
+ */
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ /* only lg prealloc space */
+ BUG_ON(!pa->pa_linear);
+
+ /* seems this one can be freed ... */
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+ list_del_rcu(&pa->pa_inode_list);
+ list_add(&pa->u.pa_tmp_list, &discard_list);
+
+ total_entries--;
+ if (total_entries <= 5) {
+ /*
+ * we want to keep only 5 entries
+ * allowing it to grow to 8. This
+ * mak sure we don't call discard
+ * soon for this list.
+ */
+ break;
+ }
+ }
+ spin_unlock(&lg->lg_prealloc_lock);
+
+ list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+ if (ext4_mb_load_buddy(sb, group, &e4b)) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ list_del(&pa->pa_group_list);
+ ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_unlock_group(sb, group);
+
+ ext4_mb_release_desc(&e4b);
+ list_del(&pa->u.pa_tmp_list);
+ call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ }
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
+}
+
+/*
+ * We have incremented pa_count. So it cannot be freed at this
+ * point. Also we hold lg_mutex. So no parallel allocation is
+ * possible from this lg. That means pa_free cannot be updated.
+ *
+ * A parallel ext4_mb_discard_group_preallocations is possible.
+ * which can cause the lg_prealloc_list to be updated.
+ */
+
+static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
+{
+ int order, added = 0, lg_prealloc_count = 1;
+ struct super_block *sb = ac->ac_sb;
+ struct ext4_locality_group *lg = ac->ac_lg;
+ struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
+
+ order = fls(pa->pa_free) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+ /* Add the prealloc space to lg */
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ spin_lock(&tmp_pa->pa_lock);
+ if (tmp_pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (!added && pa->pa_free < tmp_pa->pa_free) {
+ /* Add to the tail of the previous entry */
+ list_add_tail_rcu(&pa->pa_inode_list,
+ &tmp_pa->pa_inode_list);
+ added = 1;
+ /*
+ * we want to count the total
+ * number of entries in the list
+ */
+ }
+ spin_unlock(&tmp_pa->pa_lock);
+ lg_prealloc_count++;
+ }
+ if (!added)
+ list_add_tail_rcu(&pa->pa_inode_list,
+ &lg->lg_prealloc_list[order]);
+ rcu_read_unlock();
+
+ /* Now trim the list to be not more than 8 elements */
+ if (lg_prealloc_count > 8) {
+ ext4_mb_discard_lg_preallocations(sb, lg,
+ order, lg_prealloc_count);
+ return;
+ }
+ return ;
+}
+
/*
* release all resource we used in allocation
*/
static int ext4_mb_release_context(struct ext4_allocation_context *ac)
{
- if (ac->ac_pa) {
- if (ac->ac_pa->pa_linear) {
+ struct ext4_prealloc_space *pa = ac->ac_pa;
+ if (pa) {
+ if (pa->pa_linear) {
/* see comment in ext4_mb_use_group_pa() */
- spin_lock(&ac->ac_pa->pa_lock);
- ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len;
- spin_unlock(&ac->ac_pa->pa_lock);
+ spin_lock(&pa->pa_lock);
+ pa->pa_pstart += ac->ac_b_ex.fe_len;
+ pa->pa_lstart += ac->ac_b_ex.fe_len;
+ pa->pa_free -= ac->ac_b_ex.fe_len;
+ pa->pa_len -= ac->ac_b_ex.fe_len;
+ spin_unlock(&pa->pa_lock);
+ /*
+ * We want to add the pa to the right bucket.
+ * Remove it from the list and while adding
+ * make sure the list to which we are adding
+ * doesn't grow big.
+ */
+ if (likely(pa->pa_free)) {
+ spin_lock(pa->pa_obj_lock);
+ list_del_rcu(&pa->pa_inode_list);
+ spin_unlock(pa->pa_obj_lock);
+ ext4_mb_add_n_trim(ac);
+ }
}
- ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa);
+ ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4627,15 @@ do_more:
count -= overflow;
}
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
- if (!bitmap_bh)
+ if (!bitmap_bh) {
+ err = -EIO;
goto error_return;
+ }
gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
- if (!gdp)
+ if (!gdp) {
+ err = -EIO;
goto error_return;
+ }
if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bcf..c7c9906c2a75 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
* Locality group:
* we try to group all related changes together
* so that writeback can flush/allocate them together as well
+ * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
+ * (512). We store prealloc space into the hash based on the pa_free blocks
+ * order value.ie, fls(pa_free)-1;
*/
+#define PREALLOC_TB_SIZE 10
struct ext4_locality_group {
/* for allocator */
- struct mutex lg_mutex; /* to serialize allocates */
- struct list_head lg_prealloc_list;/* list of preallocations */
+ /* to serialize allocates */
+ struct mutex lg_mutex;
+ /* list of preallocations */
+ struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
spinlock_t lg_prealloc_lock;
};
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b9e077ba07e9..46fc0b5b12ba 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
* credit. But below we try to not accumalate too much
* of them by restarting the journal.
*/
- needed = ext4_ext_calc_credits_for_insert(inode, path);
+ needed = ext4_ext_calc_credits_for_single_extent(inode,
+ lb->last_block - lb->first_block + 1, path);
/*
* Make sure the credit we accumalated is not really high
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd93..b3d35604ea18 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
"Inode bitmap not in group (block %llu)",
(unsigned long long)input->inode_bitmap);
else if (outside(input->inode_table, start, end) ||
- outside(itend - 1, start, end))
+ outside(itend - 1, start, end))
ext4_warning(sb, __func__,
"Inode table not in group (blocks %llu-%llu)",
(unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
(unsigned long long)input->inode_bitmap,
start, metaend - 1);
else if (inside(input->inode_table, start, metaend) ||
- inside(itend - 1, start, metaend))
+ inside(itend - 1, start, metaend))
ext4_warning(sb, __func__,
"Inode table (%llu-%llu) overlaps"
"GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
if (err) {
if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
return err;
- if ((err = ext4_journal_get_write_access(handle, bh)))
+ if ((err = ext4_journal_get_write_access(handle, bh)))
return err;
- }
+ }
return 0;
}
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
- /*
- * If we are not using the primary superblock/GDT copy don't resize,
- * because the user tools have no way of handling this. Probably a
- * bad time to do it anyways.
- */
+ /*
+ * If we are not using the primary superblock/GDT copy don't resize,
+ * because the user tools have no way of handling this. Probably a
+ * bad time to do it anyways.
+ */
if (EXT4_SB(sb)->s_sbh->b_blocknr !=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
return 0;
exit_inode:
- //ext4_journal_release_buffer(handle, iloc.bh);
+ /* ext4_journal_release_buffer(handle, iloc.bh); */
brelse(iloc.bh);
exit_dindj:
- //ext4_journal_release_buffer(handle, dind);
+ /* ext4_journal_release_buffer(handle, dind); */
exit_primary:
- //ext4_journal_release_buffer(handle, *primary);
+ /* ext4_journal_release_buffer(handle, *primary); */
exit_sbh:
- //ext4_journal_release_buffer(handle, *primary);
+ /* ext4_journal_release_buffer(handle, *primary); */
exit_dind:
brelse(dind);
exit_bh:
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if (reserved_gdb || gdb_off == 0) {
if (!EXT4_HAS_COMPAT_FEATURE(sb,
- EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+ EXT4_FEATURE_COMPAT_RESIZE_INODE)
+ || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext4_warning(sb, __func__,
"No reserved GDT blocks, can't resize");
return -EPERM;
@@ -818,12 +819,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
goto exit_journal;
- /*
- * We will only either add reserved group blocks to a backup group
- * or remove reserved blocks for the first group in a new group block.
- * Doing both would be mean more complex code, and sane people don't
- * use non-sparse filesystems anymore. This is already checked above.
- */
+ /*
+ * We will only either add reserved group blocks to a backup group
+ * or remove reserved blocks for the first group in a new group block.
+ * Doing both would be mean more complex code, and sane people don't
+ * use non-sparse filesystems anymore. This is already checked above.
+ */
if (gdb_off) {
primary = sbi->s_group_desc[gdb_num];
if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +836,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
} else if ((err = add_new_gdb(handle, inode, input, &primary)))
goto exit_journal;
- /*
- * OK, now we've set up the new group. Time to make it active.
- *
- * Current kernels don't lock all allocations via lock_super(),
- * so we have to be safe wrt. concurrent accesses the group
- * data. So we need to be careful to set all of the relevant
- * group descriptor data etc. *before* we enable the group.
- *
- * The key field here is sbi->s_groups_count: as long as
- * that retains its old value, nobody is going to access the new
- * group.
- *
- * So first we update all the descriptor metadata for the new
- * group; then we update the total disk blocks count; then we
- * update the groups count to enable the group; then finally we
- * update the free space counts so that the system can start
- * using the new disk blocks.
- */
+ /*
+ * OK, now we've set up the new group. Time to make it active.
+ *
+ * Current kernels don't lock all allocations via lock_super(),
+ * so we have to be safe wrt. concurrent accesses the group
+ * data. So we need to be careful to set all of the relevant
+ * group descriptor data etc. *before* we enable the group.
+ *
+ * The key field here is sbi->s_groups_count: as long as
+ * that retains its old value, nobody is going to access the new
+ * group.
+ *
+ * So first we update all the descriptor metadata for the new
+ * group; then we update the total disk blocks count; then we
+ * update the groups count to enable the group; then finally we
+ * update the free space counts so that the system can start
+ * using the new disk blocks.
+ */
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +947,8 @@ exit_put:
return err;
} /* ext4_group_add */
-/* Extend the filesystem to the new number of blocks specified. This entry
+/*
+ * Extend the filesystem to the new number of blocks specified. This entry
* point is only used to extend the current filesystem to the end of the last
* existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
* for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1026,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
o_blocks_count + add, add);
/* See if the device is actually as big as what was requested */
- bh = sb_bread(sb, o_blocks_count + add -1);
+ bh = sb_bread(sb, o_blocks_count + add - 1);
if (!bh) {
ext4_warning(sb, __func__,
"can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff14..566344b926b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
unsigned int);
-static void ext4_commit_super (struct super_block * sb,
- struct ext4_super_block * es,
- int sync);
-static void ext4_mark_recovery_complete(struct super_block * sb,
- struct ext4_super_block * es);
-static void ext4_clear_journal_err(struct super_block * sb,
- struct ext4_super_block * es);
+static void ext4_commit_super(struct super_block *sb,
+ struct ext4_super_block *es, int sync);
+static void ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es);
+static void ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16]);
-static int ext4_remount (struct super_block * sb, int * flags, char * data);
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext4_remount(struct super_block *sb, int *flags, char *data);
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static void ext4_unlockfs(struct super_block *sb);
-static void ext4_write_super (struct super_block * sb);
+static void ext4_write_super(struct super_block *sb);
static void ext4_write_super_lockfs(struct super_block *sb);
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return;
- if (!test_opt (sb, ERRORS_CONT)) {
+ if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
if (journal)
jbd2_journal_abort(journal, -EIO);
}
- if (test_opt (sb, ERRORS_RO)) {
- printk (KERN_CRIT "Remounting filesystem read-only\n");
+ if (test_opt(sb, ERRORS_RO)) {
+ printk(KERN_CRIT "Remounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
}
ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
sb->s_id);
}
-void ext4_error (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext4_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
ext4_handle_error(sb);
}
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16])
{
char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
/* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */
-void __ext4_std_error (struct super_block * sb, const char * function,
- int errno)
+void __ext4_std_error(struct super_block *sb, const char *function, int errno)
{
char nbuf[16];
const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
return;
errstr = ext4_decode_error(sb, errno, nbuf);
- printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
- sb->s_id, function, errstr);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
+ sb->s_id, function, errstr);
ext4_handle_error(sb);
}
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
* case we take the easy way out and panic immediately.
*/
-void ext4_abort (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext4_abort(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
va_list args;
- printk (KERN_CRIT "ext4_abort called.\n");
+ printk(KERN_CRIT "ext4_abort called.\n");
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
-void ext4_warning (struct super_block * sb, const char * function,
- const char * fmt, ...)
+void ext4_warning(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
va_list args;
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
}
}
-static void ext4_put_super (struct super_block * sb)
+static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
@@ -570,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
#endif
ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1;
+ ei->vfs_inode.i_data.writeback_index = 0;
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
@@ -647,7 +646,8 @@ static void ext4_clear_inode(struct inode *inode)
&EXT4_I(inode)->jinode);
}
-static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
+static inline void ext4_show_quota_options(struct seq_file *seq,
+ struct super_block *sb)
{
#if defined(CONFIG_QUOTA)
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +822,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
-#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
+#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
static int ext4_dquot_initialize(struct inode *inode, int type);
static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +991,12 @@ static ext4_fsblk_t get_sb_block(void **data)
return sb_block;
}
-static int parse_options (char *options, struct super_block *sb,
- unsigned int *inum, unsigned long *journal_devnum,
- ext4_fsblk_t *n_blocks_count, int is_remount)
+static int parse_options(char *options, struct super_block *sb,
+ unsigned int *inum, unsigned long *journal_devnum,
+ ext4_fsblk_t *n_blocks_count, int is_remount)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char * p;
+ char *p;
substring_t args[MAX_OPT_ARGS];
int data_opt = 0;
int option;
@@ -1009,7 +1009,7 @@ static int parse_options (char *options, struct super_block *sb,
if (!options)
return 1;
- while ((p = strsep (&options, ",")) != NULL) {
+ while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
@@ -1017,16 +1017,16 @@ static int parse_options (char *options, struct super_block *sb,
token = match_token(p, tokens, args);
switch (token) {
case Opt_bsd_df:
- clear_opt (sbi->s_mount_opt, MINIX_DF);
+ clear_opt(sbi->s_mount_opt, MINIX_DF);
break;
case Opt_minix_df:
- set_opt (sbi->s_mount_opt, MINIX_DF);
+ set_opt(sbi->s_mount_opt, MINIX_DF);
break;
case Opt_grpid:
- set_opt (sbi->s_mount_opt, GRPID);
+ set_opt(sbi->s_mount_opt, GRPID);
break;
case Opt_nogrpid:
- clear_opt (sbi->s_mount_opt, GRPID);
+ clear_opt(sbi->s_mount_opt, GRPID);
break;
case Opt_resuid:
if (match_int(&args[0], &option))
@@ -1043,41 +1043,41 @@ static int parse_options (char *options, struct super_block *sb,
/* *sb_block = match_int(&args[0]); */
break;
case Opt_err_panic:
- clear_opt (sbi->s_mount_opt, ERRORS_CONT);
- clear_opt (sbi->s_mount_opt, ERRORS_RO);
- set_opt (sbi->s_mount_opt, ERRORS_PANIC);
+ clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sbi->s_mount_opt, ERRORS_RO);
+ set_opt(sbi->s_mount_opt, ERRORS_PANIC);
break;
case Opt_err_ro:
- clear_opt (sbi->s_mount_opt, ERRORS_CONT);
- clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
- set_opt (sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sbi->s_mount_opt, ERRORS_RO);
break;
case Opt_err_cont:
- clear_opt (sbi->s_mount_opt, ERRORS_RO);
- clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
- set_opt (sbi->s_mount_opt, ERRORS_CONT);
+ clear_opt(sbi->s_mount_opt, ERRORS_RO);
+ clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ set_opt(sbi->s_mount_opt, ERRORS_CONT);
break;
case Opt_nouid32:
- set_opt (sbi->s_mount_opt, NO_UID32);
+ set_opt(sbi->s_mount_opt, NO_UID32);
break;
case Opt_nocheck:
- clear_opt (sbi->s_mount_opt, CHECK);
+ clear_opt(sbi->s_mount_opt, CHECK);
break;
case Opt_debug:
- set_opt (sbi->s_mount_opt, DEBUG);
+ set_opt(sbi->s_mount_opt, DEBUG);
break;
case Opt_oldalloc:
- set_opt (sbi->s_mount_opt, OLDALLOC);
+ set_opt(sbi->s_mount_opt, OLDALLOC);
break;
case Opt_orlov:
- clear_opt (sbi->s_mount_opt, OLDALLOC);
+ clear_opt(sbi->s_mount_opt, OLDALLOC);
break;
#ifdef CONFIG_EXT4DEV_FS_XATTR
case Opt_user_xattr:
- set_opt (sbi->s_mount_opt, XATTR_USER);
+ set_opt(sbi->s_mount_opt, XATTR_USER);
break;
case Opt_nouser_xattr:
- clear_opt (sbi->s_mount_opt, XATTR_USER);
+ clear_opt(sbi->s_mount_opt, XATTR_USER);
break;
#else
case Opt_user_xattr:
@@ -1115,7 +1115,7 @@ static int parse_options (char *options, struct super_block *sb,
"journal on remount\n");
return 0;
}
- set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
+ set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
break;
case Opt_journal_inum:
if (is_remount) {
@@ -1145,7 +1145,7 @@ static int parse_options (char *options, struct super_block *sb,
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break;
case Opt_noload:
- set_opt (sbi->s_mount_opt, NOLOAD);
+ set_opt(sbi->s_mount_opt, NOLOAD);
break;
case Opt_commit:
if (match_int(&args[0], &option))
@@ -1331,7 +1331,7 @@ set_qf_format:
"on this filesystem, use tune2fs\n");
return 0;
}
- set_opt (sbi->s_mount_opt, EXTENTS);
+ set_opt(sbi->s_mount_opt, EXTENTS);
break;
case Opt_noextents:
/*
@@ -1348,7 +1348,7 @@ set_qf_format:
"-o noextents options\n");
return 0;
}
- clear_opt (sbi->s_mount_opt, EXTENTS);
+ clear_opt(sbi->s_mount_opt, EXTENTS);
break;
case Opt_i_version:
set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1374,9 @@ set_qf_format:
set_opt(sbi->s_mount_opt, DELALLOC);
break;
default:
- printk (KERN_ERR
- "EXT4-fs: Unrecognized mount option \"%s\" "
- "or missing value\n", p);
+ printk(KERN_ERR
+ "EXT4-fs: Unrecognized mount option \"%s\" "
+ "or missing value\n", p);
return 0;
}
}
@@ -1423,31 +1423,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
int res = 0;
if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
- printk (KERN_ERR "EXT4-fs warning: revision level too high, "
- "forcing read-only mode\n");
+ printk(KERN_ERR "EXT4-fs warning: revision level too high, "
+ "forcing read-only mode\n");
res = MS_RDONLY;
}
if (read_only)
return res;
if (!(sbi->s_mount_state & EXT4_VALID_FS))
- printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
+ "running e2fsck is recommended\n");
else if ((sbi->s_mount_state & EXT4_ERROR_FS))
- printk (KERN_WARNING
- "EXT4-fs warning: mounting fs with errors, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING
+ "EXT4-fs warning: mounting fs with errors, "
+ "running e2fsck is recommended\n");
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
le16_to_cpu(es->s_mnt_count) >=
(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
- printk (KERN_WARNING
- "EXT4-fs warning: maximal mount count reached, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING
+ "EXT4-fs warning: maximal mount count reached, "
+ "running e2fsck is recommended\n");
else if (le32_to_cpu(es->s_checkinterval) &&
(le32_to_cpu(es->s_lastcheck) +
le32_to_cpu(es->s_checkinterval) <= get_seconds()))
- printk (KERN_WARNING
- "EXT4-fs warning: checktime reached, "
- "running e2fsck is recommended\n");
+ printk(KERN_WARNING
+ "EXT4-fs warning: checktime reached, "
+ "running e2fsck is recommended\n");
#if 0
/* @@@ We _will_ want to clear the valid bit if we find
* inconsistencies, to force a fsck at reboot. But for
@@ -1506,14 +1506,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
groups_per_flex;
- sbi->s_flex_groups = kmalloc(flex_group_count *
+ sbi->s_flex_groups = kzalloc(flex_group_count *
sizeof(struct flex_groups), GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
- printk(KERN_ERR "EXT4-fs: not enough memory\n");
+ printk(KERN_ERR "EXT4-fs: not enough memory for "
+ "%lu flex groups\n", flex_group_count);
goto failed;
}
- memset(sbi->s_flex_groups, 0, flex_group_count *
- sizeof(struct flex_groups));
gdp = ext4_get_group_desc(sb, 1, &bh);
block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1596,14 @@ static int ext4_check_descriptors(struct super_block *sb)
(EXT4_BLOCKS_PER_GROUP(sb) - 1);
block_bitmap = ext4_block_bitmap(sb, gdp);
- if (block_bitmap < first_block || block_bitmap > last_block)
- {
+ if (block_bitmap < first_block || block_bitmap > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Block bitmap for group %lu not in group "
"(block %llu)!", i, block_bitmap);
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
- if (inode_bitmap < first_block || inode_bitmap > last_block)
- {
+ if (inode_bitmap < first_block || inode_bitmap > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Inode bitmap for group %lu not in group "
"(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1611,28 @@ static int ext4_check_descriptors(struct super_block *sb)
}
inode_table = ext4_inode_table(sb, gdp);
if (inode_table < first_block ||
- inode_table + sbi->s_itb_per_group - 1 > last_block)
- {
+ inode_table + sbi->s_itb_per_group - 1 > last_block) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Inode table for group %lu not in group "
"(block %llu)!", i, inode_table);
return 0;
}
+ spin_lock(sb_bgl_lock(sbi, i));
if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Checksum for group %lu failed (%u!=%u)\n",
i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
gdp)), le16_to_cpu(gdp->bg_checksum));
- return 0;
+ if (!(sb->s_flags & MS_RDONLY))
+ return 0;
}
+ spin_unlock(sb_bgl_lock(sbi, i));
if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
}
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
- sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
+ sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
return 1;
}
@@ -1654,8 +1653,8 @@ static int ext4_check_descriptors(struct super_block *sb)
* e2fsck was run on this filesystem, and it must have already done the orphan
* inode cleanup for us, so we can safely abort without any further action.
*/
-static void ext4_orphan_cleanup (struct super_block * sb,
- struct ext4_super_block * es)
+static void ext4_orphan_cleanup(struct super_block *sb,
+ struct ext4_super_block *es)
{
unsigned int s_flags = sb->s_flags;
int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1731,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
iput(inode); /* The delete magic happens here! */
}
-#define PLURAL(x) (x), ((x)==1) ? "" : "s"
+#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
if (nr_orphans)
printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1898,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
return 0;
}
-static int ext4_fill_super (struct super_block *sb, void *data, int silent)
+static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
{
- struct buffer_head * bh;
+ struct buffer_head *bh;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi;
ext4_fsblk_t block;
@@ -1953,7 +1952,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
}
if (!(bh = sb_bread(sb, logical_sb_block))) {
- printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
+ printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
goto out_fail;
}
/*
@@ -2026,8 +2025,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, DELALLOC);
- if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
- NULL, 0))
+ if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
+ NULL, 0))
goto failed_mount;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2101,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- brelse (bh);
+ brelse(bh);
logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
offset = do_div(logical_sb_block, blocksize);
bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2113,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
- printk (KERN_ERR
- "EXT4-fs: Magic mismatch, very weird !\n");
+ printk(KERN_ERR
+ "EXT4-fs: Magic mismatch, very weird !\n");
goto failed_mount;
}
}
@@ -2132,9 +2131,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
(!is_power_of_2(sbi->s_inode_size)) ||
(sbi->s_inode_size > blocksize)) {
- printk (KERN_ERR
- "EXT4-fs: unsupported inode size: %d\n",
- sbi->s_inode_size);
+ printk(KERN_ERR
+ "EXT4-fs: unsupported inode size: %d\n",
+ sbi->s_inode_size);
goto failed_mount;
}
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2165,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
- for (i=0; i < 4; i++)
+ for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
if (sbi->s_blocks_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT4-fs: #blocks per group too big: %lu\n",
- sbi->s_blocks_per_group);
+ printk(KERN_ERR
+ "EXT4-fs: #blocks per group too big: %lu\n",
+ sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT4-fs: #inodes per group too big: %lu\n",
- sbi->s_inodes_per_group);
+ printk(KERN_ERR
+ "EXT4-fs: #inodes per group too big: %lu\n",
+ sbi->s_inodes_per_group);
goto failed_mount;
}
@@ -2213,10 +2212,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_groups_count = blocks_count;
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
- sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+ sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
- printk (KERN_ERR "EXT4-fs: not enough memory\n");
+ printk(KERN_ERR "EXT4-fs: not enough memory\n");
goto failed_mount;
}
@@ -2226,13 +2225,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
block = descriptor_loc(sb, logical_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
if (!sbi->s_group_desc[i]) {
- printk (KERN_ERR "EXT4-fs: "
- "can't read group descriptor %d\n", i);
+ printk(KERN_ERR "EXT4-fs: "
+ "can't read group descriptor %d\n", i);
db_count = i;
goto failed_mount2;
}
}
- if (!ext4_check_descriptors (sb)) {
+ if (!ext4_check_descriptors(sb)) {
printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
goto failed_mount2;
}
@@ -2308,11 +2307,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
EXT4_SB(sb)->s_journal->j_failed_commit) {
printk(KERN_CRIT "EXT4-fs error (device %s): "
"ext4_fill_super: Journal transaction "
- "%u is corrupt\n", sb->s_id,
+ "%u is corrupt\n", sb->s_id,
EXT4_SB(sb)->s_journal->j_failed_commit);
- if (test_opt (sb, ERRORS_RO)) {
- printk (KERN_CRIT
- "Mounting filesystem read-only\n");
+ if (test_opt(sb, ERRORS_RO)) {
+ printk(KERN_CRIT
+ "Mounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2331,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount3;
} else {
if (!silent)
- printk (KERN_ERR
- "ext4: No journal on filesystem on %s\n",
- sb->s_id);
+ printk(KERN_ERR
+ "ext4: No journal on filesystem on %s\n",
+ sb->s_id);
goto failed_mount3;
}
@@ -2418,7 +2417,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount4;
}
- ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+ ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2456,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
if (needs_recovery)
- printk (KERN_INFO "EXT4-fs: recovery complete.\n");
+ printk(KERN_INFO "EXT4-fs: recovery complete.\n");
ext4_mark_recovery_complete(sb, es);
- printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
- test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
- test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
- "writeback");
+ printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+ "writeback");
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2574,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
static journal_t *ext4_get_dev_journal(struct super_block *sb,
dev_t j_dev)
{
- struct buffer_head * bh;
+ struct buffer_head *bh;
journal_t *journal;
ext4_fsblk_t start;
ext4_fsblk_t len;
int hblock, blocksize;
ext4_fsblk_t sb_block;
unsigned long offset;
- struct ext4_super_block * es;
+ struct ext4_super_block *es;
struct block_device *bdev;
bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2696,8 @@ static int ext4_load_journal(struct super_block *sb,
"unavailable, cannot proceed.\n");
return -EROFS;
}
- printk (KERN_INFO "EXT4-fs: write access will "
- "be enabled during recovery.\n");
+ printk(KERN_INFO "EXT4-fs: write access will "
+ "be enabled during recovery.\n");
}
}
@@ -2751,8 +2750,8 @@ static int ext4_load_journal(struct super_block *sb,
return 0;
}
-static int ext4_create_journal(struct super_block * sb,
- struct ext4_super_block * es,
+static int ext4_create_journal(struct super_block *sb,
+ struct ext4_super_block *es,
unsigned int journal_inum)
{
journal_t *journal;
@@ -2793,9 +2792,8 @@ static int ext4_create_journal(struct super_block * sb,
return 0;
}
-static void ext4_commit_super (struct super_block * sb,
- struct ext4_super_block * es,
- int sync)
+static void ext4_commit_super(struct super_block *sb,
+ struct ext4_super_block *es, int sync)
{
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
@@ -2816,8 +2814,8 @@ static void ext4_commit_super (struct super_block * sb,
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block * sb,
- struct ext4_super_block * es)
+static void ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
journal_t *journal = EXT4_SB(sb)->s_journal;
@@ -2839,8 +2837,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block * sb,
- struct ext4_super_block * es)
+static void ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
@@ -2865,7 +2863,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super (sb, es, 1);
+ ext4_commit_super(sb, es, 1);
jbd2_journal_clear_err(journal);
}
@@ -2898,7 +2896,7 @@ int ext4_force_commit(struct super_block *sb)
* This implicitly triggers the writebehind on sync().
*/
-static void ext4_write_super (struct super_block * sb)
+static void ext4_write_super(struct super_block *sb)
{
if (mutex_trylock(&sb->s_lock) != 0)
BUG();
@@ -2954,13 +2952,14 @@ static void ext4_unlockfs(struct super_block *sb)
}
}
-static int ext4_remount (struct super_block * sb, int * flags, char * data)
+static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
- struct ext4_super_block * es;
+ struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
+ ext4_group_t g;
int err;
#ifdef CONFIG_QUOTA
int i;
@@ -3039,6 +3038,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
}
/*
+ * Make sure the group descriptor checksums
+ * are sane. If they aren't, refuse to
+ * remount r/w.
+ */
+ for (g = 0; g < sbi->s_groups_count; g++) {
+ struct ext4_group_desc *gdp =
+ ext4_get_group_desc(sb, g, NULL);
+
+ if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
+ printk(KERN_ERR
+ "EXT4-fs: ext4_remount: "
+ "Checksum for group %lu failed (%u!=%u)\n",
+ g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
+ le16_to_cpu(gdp->bg_checksum));
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ }
+
+ /*
* If we have an unprocessed orphan list hanging
* around from a previously readonly bdev mount,
* require a full umount/remount for now.
@@ -3063,7 +3082,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
sbi->s_mount_state = le16_to_cpu(es->s_state);
if ((err = ext4_group_extend(sb, es, n_blocks_count)))
goto restore_opts;
- if (!ext4_setup_super (sb, es, 0))
+ if (!ext4_setup_super(sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
}
}
@@ -3093,7 +3112,7 @@ restore_opts:
return err;
}
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3350,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
}
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
- /* Quotafile not of fs root? */
+ /* Quotafile not in fs root? */
if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
printk(KERN_WARNING
"EXT4-fs: Quota file not on filesystem root. "
"Journaled quota will not work.\n");
- }
+ }
/*
* When we journal data on quota file, we have to flush journal to see
@@ -3352,8 +3371,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
}
+ err = vfs_quota_on_path(sb, type, format_id, &nd.path);
path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, remount);
+ return err;
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2e..8954208b4893 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
char *name = entry->e_name;
int n;
- for (n=0; n < entry->e_name_len; n++) {
+ for (n = 0; n < entry->e_name_len; n++) {
hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
*name++;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8707a8cfa02c..ddde37025ca6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -313,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
return 0;
}
+#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+
int fat_setattr(struct dentry *dentry, struct iattr *attr)
{
struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -336,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
/* Check for setting the inode time. */
ia_valid = attr->ia_valid;
- if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+ if (ia_valid & TIMES_SET_FLAGS) {
if (fat_allow_set_time(sbi, inode))
- attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+ attr->ia_valid &= ~TIMES_SET_FLAGS;
}
error = inode_change_ok(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6d266d793e2c..80ff3381fa21 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait)
struct buffer_head *bh;
struct msdos_dir_entry *raw_entry;
loff_t i_pos;
- int err = 0;
+ int err;
retry:
i_pos = MSDOS_I(inode)->i_pos;
if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
return 0;
- lock_super(sb);
bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
if (!bh) {
printk(KERN_ERR "FAT: unable to read inode block "
"for updating (i_pos %lld)\n", i_pos);
- err = -EIO;
- goto out;
+ return -EIO;
}
spin_lock(&sbi->inode_hash_lock);
if (i_pos != MSDOS_I(inode)->i_pos) {
spin_unlock(&sbi->inode_hash_lock);
brelse(bh);
- unlock_super(sb);
goto retry;
}
@@ -607,11 +604,10 @@ retry:
}
spin_unlock(&sbi->inode_hash_lock);
mark_buffer_dirty(bh);
+ err = 0;
if (wait)
err = sync_dirty_buffer(bh);
brelse(bh);
-out:
- unlock_super(sb);
return err;
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d625136813..ac4f7db9f134 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
return res;
}
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary. Must be called with the
- * file_lock held for write.
- */
-
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
- struct files_struct *files = current->files;
- unsigned int newfd;
- unsigned int start;
- int error;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
-repeat:
- fdt = files_fdtable(files);
- /*
- * Someone might have closed fd's in the range
- * orig_start..fdt->next_fd
- */
- start = orig_start;
- if (start < files->next_fd)
- start = files->next_fd;
-
- newfd = start;
- if (start < fdt->max_fds)
- newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
- fdt->max_fds, start);
-
- error = expand_files(files, newfd);
- if (error < 0)
- goto out;
-
- /*
- * If we needed to expand the fs array we
- * might have blocked - try again.
- */
- if (error)
- goto repeat;
-
- if (start <= files->next_fd)
- files->next_fd = newfd + 1;
-
- FD_SET(newfd, fdt->open_fds);
- if (cloexec)
- FD_SET(newfd, fdt->close_on_exec);
- else
- FD_CLR(newfd, fdt->close_on_exec);
- error = newfd;
-
-out:
- spin_unlock(&files->file_lock);
- return error;
-}
-
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
- int fd = locate_fd(start, cloexec);
- if (fd >= 0)
- fd_install(fd, file);
- else
- fput(file);
-
- return fd;
-}
-
asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
int err = -EBADF;
@@ -130,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
return -EINVAL;
spin_lock(&files->file_lock);
- if (!(file = fcheck(oldfd)))
- goto out_unlock;
- get_file(file); /* We are now finished with oldfd */
-
err = expand_files(files, newfd);
+ file = fcheck(oldfd);
+ if (unlikely(!file))
+ goto Ebadf;
if (unlikely(err < 0)) {
if (err == -EMFILE)
- err = -EBADF;
- goto out_fput;
+ goto Ebadf;
+ goto out_unlock;
}
-
- /* To avoid races with open() and dup(), we will mark the fd as
- * in-use in the open-file bitmap throughout the entire dup2()
- * process. This is quite safe: do_close() uses the fd array
- * entry, not the bitmap, to decide what work needs to be
- * done. --sct */
- /* Doesn't work. open() might be there first. --AV */
-
- /* Yes. It's a race. In user space. Nothing sane to do */
+ /*
+ * We need to detect attempts to do dup2() over allocated but still
+ * not finished descriptor. NB: OpenBSD avoids that at the price of
+ * extra work in their equivalent of fget() - they insert struct
+ * file immediately after grabbing descriptor, mark it larval if
+ * more work (e.g. actual opening) is needed and make sure that
+ * fget() treats larval files as absent. Potentially interesting,
+ * but while extra work in fget() is trivial, locking implications
+ * and amount of surgery on open()-related paths in VFS are not.
+ * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+ * deadlocks in rather amusing ways, AFAICS. All of that is out of
+ * scope of POSIX or SUS, since neither considers shared descriptor
+ * tables and this condition does not arise without those.
+ */
err = -EBUSY;
fdt = files_fdtable(files);
tofree = fdt->fd[newfd];
if (!tofree && FD_ISSET(newfd, fdt->open_fds))
- goto out_fput;
-
+ goto out_unlock;
+ get_file(file);
rcu_assign_pointer(fdt->fd[newfd], file);
FD_SET(newfd, fdt->open_fds);
if (flags & O_CLOEXEC)
@@ -165,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
if (tofree)
filp_close(tofree, files);
- err = newfd;
-out:
- return err;
-out_unlock:
- spin_unlock(&files->file_lock);
- goto out;
-out_fput:
+ return newfd;
+
+Ebadf:
+ err = -EBADF;
+out_unlock:
spin_unlock(&files->file_lock);
- fput(file);
- goto out;
+ return err;
}
asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
@@ -194,10 +128,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
asmlinkage long sys_dup(unsigned int fildes)
{
int ret = -EBADF;
- struct file * file = fget(fildes);
-
- if (file)
- ret = dupfd(file, 0, 0);
+ struct file *file = fget(fildes);
+
+ if (file) {
+ ret = get_unused_fd();
+ if (ret >= 0)
+ fd_install(ret, file);
+ else
+ fput(file);
+ }
return ret;
}
@@ -322,8 +261,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_DUPFD_CLOEXEC:
if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
break;
- get_file(filp);
- err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+ err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
+ if (err >= 0) {
+ get_file(filp);
+ fd_install(err, filp);
+ }
break;
case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe47..f313314f996f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
* Manage the dynamic fd arrays in the process files_struct.
*/
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
},
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
};
+
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+ struct files_struct *files = current->files;
+ unsigned int fd;
+ int error;
+ struct fdtable *fdt;
+
+ spin_lock(&files->file_lock);
+repeat:
+ fdt = files_fdtable(files);
+ fd = start;
+ if (fd < files->next_fd)
+ fd = files->next_fd;
+
+ if (fd < fdt->max_fds)
+ fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fds, fd);
+
+ error = expand_files(files, fd);
+ if (error < 0)
+ goto out;
+
+ /*
+ * If we needed to expand the fs array we
+ * might have blocked - try again.
+ */
+ if (error)
+ goto repeat;
+
+ if (start <= files->next_fd)
+ files->next_fd = fd + 1;
+
+ FD_SET(fd, fdt->open_fds);
+ if (flags & O_CLOEXEC)
+ FD_SET(fd, fdt->close_on_exec);
+ else
+ FD_CLR(fd, fdt->close_on_exec);
+ error = fd;
+#if 1
+ /* Sanity check */
+ if (rcu_dereference(fdt->fd[fd]) != NULL) {
+ printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ }
+#endif
+
+out:
+ spin_unlock(&files->file_lock);
+ return error;
+}
+
+int get_unused_fd(void)
+{
+ return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/inode.c b/fs/inode.c
index b6726f644530..0487ddba1397 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb)
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
+ mapping->writeback_index = 0;
/*
* If the block_device provides a backing_dev_info for client
diff --git a/fs/ioprio.c b/fs/ioprio.c
index c4a1c3c65aac..da3cc460d4df 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
pgrp = task_pgrp(current);
else
pgrp = find_vpid(who);
- do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+ do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
ret = set_task_ioprio(p, ioprio);
if (ret)
break;
- } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+ } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case IOPRIO_WHO_USER:
if (!who)
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
pgrp = task_pgrp(current);
else
pgrp = find_vpid(who);
- do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+ do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
tmpio = get_task_ioprio(p);
if (tmpio < 0)
continue;
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
ret = tmpio;
else
ret = ioprio_best(ret, tmpio);
- } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+ } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case IOPRIO_WHO_USER:
if (!who)
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d48..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
goto nope;
/* OK, it's a truncated page */
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto nope;
page_cache_get(page);
@@ -221,7 +221,7 @@ write_out_data:
* blocking lock_buffer().
*/
if (buffer_dirty(bh)) {
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
/* Write out all data to prevent deadlocks */
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
spin_lock(&journal->j_list_lock);
}
if (unlikely(!buffer_uptodate(bh))) {
- if (TestSetPageLocked(bh->b_page)) {
+ if (!trylock_page(bh->b_page)) {
spin_unlock(&journal->j_list_lock);
lock_page(bh->b_page);
spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8dee32007500..0540ca27a446 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
goto out;
}
- lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_map_acquire(&handle->h_lockdep_map);
out:
return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock);
}
- lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+ lock_map_release(&handle->h_lockdep_map);
jbd_free_handle(handle);
return err;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be873226..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
goto nope;
/* OK, it's a truncated page */
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto nope;
page_cache_get(page);
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
- if (!ret)
- ret = err;
+ if (err) {
+ /*
+ * Because AS_EIO is cleared by
+ * wait_on_page_writeback_range(), set it again so
+ * that user process can get -EIO from fsync().
+ */
+ set_bit(AS_EIO,
+ &jinode->i_vfs_inode->i_mapping->flags);
+
+ if (!ret)
+ ret = err;
+ }
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
* commit block, which happens below in such setting.
*/
err = journal_finish_inode_data_buffers(journal, commit_transaction);
- if (err)
- jbd2_journal_abort(journal, err);
+ if (err) {
+ char b[BDEVNAME_SIZE];
+
+ printk(KERN_WARNING
+ "JBD2: Detected IO errors while flushing file data "
+ "on %s\n", bdevname(journal->j_fs_dev, b));
+ err = 0;
+ }
/* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6ae..8207a01c4edb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
EXPORT_SYMBOL(jbd2_journal_create);
EXPORT_SYMBOL(jbd2_journal_load);
EXPORT_SYMBOL(jbd2_journal_destroy);
-EXPORT_SYMBOL(jbd2_journal_update_superblock);
EXPORT_SYMBOL(jbd2_journal_abort);
EXPORT_SYMBOL(jbd2_journal_errno);
EXPORT_SYMBOL(jbd2_journal_ack_err);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f7cadbb19fa..e5d540588fa9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
goto out;
}
- lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_map_acquire(&handle->h_lockdep_map);
out:
return handle;
}
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock);
}
- lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+ lock_map_release(&handle->h_lockdep_map);
jbd2_free_handle(handle);
return err;
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 31559f45fdde..4c41db91eaa4 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -12,7 +12,6 @@
#ifndef _JFFS2_FS_I
#define _JFFS2_FS_I
-#include <linux/version.h>
#include <linux/rbtree.h>
#include <linux/posix_acl.h>
#include <linux/mutex.h>
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ade..6caf1e1ee26d 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
int jffs2_sum_init(struct jffs2_sb_info *c)
{
+ uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
+
c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
return -ENOMEM;
}
- c->summary->sum_buf = vmalloc(c->sector_size);
+ c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
if (!c->summary->sum_buf) {
JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
jffs2_sum_disable_collecting(c->summary);
- vfree(c->summary->sum_buf);
+ kfree(c->summary->sum_buf);
c->summary->sum_buf = NULL;
kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
- uint32_t infosize, uint32_t datasize, int padsize)
+ uint32_t infosize, uint32_t datasize, int padsize)
{
struct jffs2_raw_summary isum;
union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
int ret;
size_t retlen;
+ if (padsize + datasize > MAX_SUMMARY_SIZE) {
+ /* It won't fit in the buffer. Abort summary for this jeb */
+ jffs2_sum_disable_collecting(c->summary);
+
+ JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
+ datasize, padsize, jeb->offset);
+ /* Non-fatal */
+ return 0;
+ }
+ /* Is there enough space for summary? */
+ if (padsize < 0) {
+ /* don't try to write out summary for this jeb */
+ jffs2_sum_disable_collecting(c->summary);
+
+ JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
+ padsize);
+ /* Non-fatal */
+ return 0;
+ }
+
memset(c->summary->sum_buf, 0xff, datasize);
memset(&isum, 0, sizeof(isum));
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
{
int datasize, infosize, padsize;
struct jffs2_eraseblock *jeb;
- int ret;
+ int ret = 0;
dbg_summary("called\n");
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
infosize += padsize;
datasize += padsize;
- /* Is there enough space for summary? */
- if (padsize < 0) {
- /* don't try to write out summary for this jeb */
- jffs2_sum_disable_collecting(c->summary);
-
- JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
- spin_lock(&c->erase_completion_lock);
- return 0;
- }
-
ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
spin_lock(&c->erase_completion_lock);
return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5ce..60207a2ae952 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
#ifndef JFFS2_SUMMARY_H
#define JFFS2_SUMMARY_H
+/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
+ is larger than that, we have to just ditch it and avoid using summary
+ for the eraseblock in question... and it probably doesn't hurt us much
+ anyway. */
+#define MAX_SUMMARY_SIZE 65536
+
#include <linux/uio.h>
#include <linux/jffs2.h>
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cde..1add676a19df 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
s->s_flags = MS_NOUSER;
s->s_maxbytes = ~0ULL;
- s->s_blocksize = 1024;
- s->s_blocksize_bits = 10;
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
s->s_magic = magic;
s->s_op = ops ? ops : &simple_super_operations;
s->s_time_gran = 1;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399444639337..4a714f64515b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: TEST4 called\n");
resp->cookie = argp->cookie;
@@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: LOCK called\n");
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76019d2ff72d..76262c1986f2 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: TEST called\n");
resp->cookie = argp->cookie;
@@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
- int rc = rpc_success;
+ __be32 rc = rpc_success;
dprintk("lockd: LOCK called\n");
diff --git a/fs/namei.c b/fs/namei.c
index a7b0a0b80128..4ea63ed5e791 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -274,7 +274,7 @@ int inode_permission(struct inode *inode, int mask)
return retval;
return security_inode_permission(inode,
- mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
+ mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
}
/**
@@ -1431,8 +1431,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
* 3. We should have write and exec permissions on dir
* 4. We can't do it if dir is immutable (done in permission())
*/
-static inline int may_create(struct inode *dir, struct dentry *child,
- struct nameidata *nd)
+static inline int may_create(struct inode *dir, struct dentry *child)
{
if (child->d_inode)
return -EEXIST;
@@ -1504,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
- int error = may_create(dir, dentry, nd);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -1948,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
- int error = may_create(dir, dentry, NULL);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -2049,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
- int error = may_create(dir, dentry, NULL);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -2316,7 +2315,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
{
- int error = may_create(dir, dentry, NULL);
+ int error = may_create(dir, dentry);
if (error)
return error;
@@ -2386,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
if (!inode)
return -ENOENT;
- error = may_create(dir, new_dentry, NULL);
+ error = may_create(dir, new_dentry);
if (error)
return error;
@@ -2595,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
return error;
if (!new_dentry->d_inode)
- error = may_create(new_dir, new_dentry, NULL);
+ error = may_create(new_dir, new_dentry);
else
error = may_delete(new_dir, new_dentry, is_dir);
if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8bb..6e283c93b50d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- return do_add_mount(mnt, nd, mnt_flags, NULL);
+ return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
}
/*
* add a mount into a namespace's mount tree
* - provide the option of adding the new mount to an expiration list
*/
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
int mnt_flags, struct list_head *fslist)
{
int err;
down_write(&namespace_sem);
/* Something was mounted here while we slept */
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path.mnt, &nd->path.dentry))
+ while (d_mountpoint(path->dentry) &&
+ follow_down(&path->mnt, &path->dentry))
;
err = -EINVAL;
- if (!check_mnt(nd->path.mnt))
+ if (!check_mnt(path->mnt))
goto unlock;
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
- if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
- nd->path.mnt->mnt_root == nd->path.dentry)
+ if (path->mnt->mnt_sb == newmnt->mnt_sb &&
+ path->mnt->mnt_root == path->dentry)
goto unlock;
err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
goto unlock;
newmnt->mnt_flags = mnt_flags;
- if ((err = graft_tree(newmnt, &nd->path)))
+ if ((err = graft_tree(newmnt, path)))
goto unlock;
if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399..66df08dd1caf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
goto out_err;
mntget(mnt);
- err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+ err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
&nfs_automount_list);
if (err < 0) {
mntput(mnt);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..46763d1cd397 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
Opt_err
};
-static match_table_t __initconst tokens = {
+static match_table_t __initdata tokens = {
{Opt_port, "port=%u"},
{Opt_rsize, "rsize=%u"},
{Opt_wsize, "wsize=%u"},
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9abcd2b329f7..e9b20173fef3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw,
}
}
+ if (errors > 0) {
+ dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n",
+ errors, (errors == 1 ? "" : "s"));
+ if (!sloppy)
+ return 0;
+ }
return 1;
out_nomem:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 33bfcf09db46..9dc036f18356 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp)
/* Look up the dentry */
err = path_lookup(nxp->ex_path, 0, &nd);
if (err)
- goto out_unlock;
+ goto out_put_clp;
err = -EINVAL;
exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
@@ -1090,9 +1090,9 @@ finish:
exp_put(exp);
if (fsid_key && !IS_ERR(fsid_key))
cache_put(&fsid_key->h, &svc_expkey_cache);
- if (clp)
- auth_domain_put(clp);
path_put(&nd.path);
+out_put_clp:
+ auth_domain_put(clp);
out_unlock:
exp_writeunlock();
out:
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index b6ed38380ab8..54b8b4140c8f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt)
* enough space for either:
*/
alloc = sizeof(struct posix_ace_state_array)
- + cnt*sizeof(struct posix_ace_state);
+ + cnt*sizeof(struct posix_user_ace_state);
state->users = kzalloc(alloc, GFP_KERNEL);
if (!state->users)
return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f5..e5b51ffafc6c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
static struct nfsd4_operation nfsd4_ops[];
-static inline char *nfsd4_op_name(unsigned opnum);
+static const char *nfsd4_op_name(unsigned opnum);
/*
* COMPOUND call.
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
int slack_bytes;
__be32 status;
- status = nfserr_resource;
- cstate = cstate_alloc();
- if (cstate == NULL)
- goto out;
-
resp->xbuf = &rqstp->rq_res;
resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
resp->tagp = resp->p;
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
goto out;
+ status = nfserr_resource;
+ cstate = cstate_alloc();
+ if (cstate == NULL)
+ goto out;
+
status = nfs_ok;
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
@@ -957,9 +957,9 @@ encode_op:
nfsd4_increment_op_stats(op->opnum);
}
+ cstate_free(cstate);
out:
nfsd4_release_compoundargs(args);
- cstate_free(cstate);
dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
},
};
-static inline char *
-nfsd4_op_name(unsigned opnum)
+static const char *nfsd4_op_name(unsigned opnum)
{
if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
tbh = bhs[i];
if (!tbh)
continue;
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
/* The buffer dirty state is now irrelevant, just clean it. */
clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
for (i = 0; i < nr_bhs; i++) {
struct buffer_head *tbh = bhs[i];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
continue;
if (unlikely(buffer_uptodate(tbh))) {
unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
struct buffer_head *tbh = bhs[i_bhs];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
BUG_ON(!buffer_uptodate(tbh));
clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
struct buffer_head *tbh = bhs[i_bhs];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
BUG_ON(!buffer_uptodate(tbh));
clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index e1781c8b1650..9e8a95be7a1e 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
// TODO: Consider moving this lot to a separate function! (AIA)
handle_name:
{
- struct dentry *real_dent, *new_dent;
MFT_RECORD *m;
ntfs_attr_search_ctx *ctx;
ntfs_inode *ni = NTFS_I(dent_inode);
@@ -255,93 +254,9 @@ handle_name:
}
nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
- /*
- * Note: No need for dent->d_lock lock as i_mutex is held on the
- * parent inode.
- */
-
- /* Does a dentry matching the nls_name exist already? */
- real_dent = d_lookup(dent->d_parent, &nls_name);
- /* If not, create it now. */
- if (!real_dent) {
- real_dent = d_alloc(dent->d_parent, &nls_name);
- kfree(nls_name.name);
- if (!real_dent) {
- err = -ENOMEM;
- goto err_out;
- }
- new_dent = d_splice_alias(dent_inode, real_dent);
- if (new_dent)
- dput(real_dent);
- else
- new_dent = real_dent;
- ntfs_debug("Done. (Created new dentry.)");
- return new_dent;
- }
+ dent = d_add_ci(dent, dent_inode, &nls_name);
kfree(nls_name.name);
- /* Matching dentry exists, check if it is negative. */
- if (real_dent->d_inode) {
- if (unlikely(real_dent->d_inode != dent_inode)) {
- /* This can happen because bad inodes are unhashed. */
- BUG_ON(!is_bad_inode(dent_inode));
- BUG_ON(!is_bad_inode(real_dent->d_inode));
- }
- /*
- * Already have the inode and the dentry attached, decrement
- * the reference count to balance the ntfs_iget() we did
- * earlier on. We found the dentry using d_lookup() so it
- * cannot be disconnected and thus we do not need to worry
- * about any NFS/disconnectedness issues here.
- */
- iput(dent_inode);
- ntfs_debug("Done. (Already had inode and dentry.)");
- return real_dent;
- }
- /*
- * Negative dentry: instantiate it unless the inode is a directory and
- * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
- * in which case d_move() that in place of the found dentry.
- */
- if (!S_ISDIR(dent_inode->i_mode)) {
- /* Not a directory; everything is easy. */
- d_instantiate(real_dent, dent_inode);
- ntfs_debug("Done. (Already had negative file dentry.)");
- return real_dent;
- }
- spin_lock(&dcache_lock);
- if (list_empty(&dent_inode->i_dentry)) {
- /*
- * Directory without a 'disconnected' dentry; we need to do
- * d_instantiate() by hand because it takes dcache_lock which
- * we already hold.
- */
- list_add(&real_dent->d_alias, &dent_inode->i_dentry);
- real_dent->d_inode = dent_inode;
- spin_unlock(&dcache_lock);
- security_d_instantiate(real_dent, dent_inode);
- ntfs_debug("Done. (Already had negative directory dentry.)");
- return real_dent;
- }
- /*
- * Directory with a 'disconnected' dentry; get a reference to the
- * 'disconnected' dentry.
- */
- new_dent = list_entry(dent_inode->i_dentry.next, struct dentry,
- d_alias);
- dget_locked(new_dent);
- spin_unlock(&dcache_lock);
- /* Do security vodoo. */
- security_d_instantiate(real_dent, dent_inode);
- /* Move new_dent in place of real_dent. */
- d_move(new_dent, real_dent);
- /* Balance the ntfs_iget() we did above. */
- iput(dent_inode);
- /* Throw away real_dent. */
- dput(real_dent);
- /* Use new_dent as the actual dentry. */
- ntfs_debug("Done. (Already had negative, disconnected directory "
- "dentry.)");
- return new_dent;
+ return dent;
eio_err_out:
ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 3a8af75351e8..4087fbdac327 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -113,7 +113,7 @@ typedef struct {
* Reason flags (32-bit). Cumulative flags describing the change(s) to the
* file since it was last opened. I think the names speak for themselves but
* if you disagree check out the descriptions in the Linux NTFS project NTFS
- * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ * documentation: http://www.linux-ntfs.org/
*/
enum {
USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001),
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS;
* Source info flags (32-bit). Information about the source of the change(s)
* to the file. For detailed descriptions of what these mean, see the Linux
* NTFS project NTFS documentation:
- * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ * http://www.linux-ntfs.org/
*/
enum {
USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001),
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6d..506c24fb5078 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
for(i = 0; i < wc->w_num_pages; i++) {
tmppage = wc->w_pages[i];
- if (ocfs2_should_order_data(inode))
- walk_page_buffers(wc->w_handle, page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-
- block_commit_write(tmppage, from, to);
+ if (page_has_buffers(tmppage)) {
+ if (ocfs2_should_order_data(inode))
+ walk_page_buffers(wc->w_handle,
+ page_buffers(tmppage),
+ from, to, NULL,
+ ocfs2_journal_dirty_data);
+
+ block_commit_write(tmppage, from, to);
+ }
}
}
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
to = PAGE_CACHE_SIZE;
}
- if (ocfs2_should_order_data(inode))
- walk_page_buffers(wc->w_handle, page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-
- block_commit_write(tmppage, from, to);
+ if (page_has_buffers(tmppage)) {
+ if (ocfs2_should_order_data(inode))
+ walk_page_buffers(wc->w_handle,
+ page_buffers(tmppage),
+ from, to, NULL,
+ ocfs2_journal_dirty_data);
+ block_commit_write(tmppage, from, to);
+ }
}
out_write_size:
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index d8bfa0eb41b2..52276c02f710 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v)
" message id: %d\n"
" message type: %u\n"
" message key: 0x%08x\n"
- " sock acquiry: %lu.%lu\n"
- " send start: %lu.%lu\n"
- " wait start: %lu.%lu\n",
+ " sock acquiry: %lu.%ld\n"
+ " send start: %lu.%ld\n"
+ " wait start: %lu.%ld\n",
nst, (unsigned long)nst->st_task->pid,
(unsigned long)nst->st_task->tgid,
nst->st_task->comm, nst->st_node,
nst->st_sc, nst->st_id, nst->st_msg_type,
nst->st_msg_key,
nst->st_sock_time.tv_sec,
- (unsigned long)nst->st_sock_time.tv_usec,
+ (long)nst->st_sock_time.tv_usec,
nst->st_send_time.tv_sec,
- (unsigned long)nst->st_send_time.tv_usec,
+ (long)nst->st_send_time.tv_usec,
nst->st_status_time.tv_sec,
- nst->st_status_time.tv_usec);
+ (long)nst->st_status_time.tv_usec);
}
spin_unlock(&o2net_debug_lock);
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return sc; /* unused, just needs to be null when done */
}
-#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec
+#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
static int sc_seq_show(struct seq_file *seq, void *v)
{
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v)
" remote node: %s\n"
" page off: %zu\n"
" handshake ok: %u\n"
- " timer: %lu.%lu\n"
- " data ready: %lu.%lu\n"
- " advance start: %lu.%lu\n"
- " advance stop: %lu.%lu\n"
- " func start: %lu.%lu\n"
- " func stop: %lu.%lu\n"
+ " timer: %lu.%ld\n"
+ " data ready: %lu.%ld\n"
+ " advance start: %lu.%ld\n"
+ " advance stop: %lu.%ld\n"
+ " func start: %lu.%ld\n"
+ " func stop: %lu.%ld\n"
" func key: %u\n"
" func type: %u\n",
sc,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a27d61581bd6..2bcf706d9dd3 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
#ifdef CONFIG_DEBUG_FS
-void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
- u32 msgkey, struct task_struct *task, u8 node)
+static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+ u32 msgkey, struct task_struct *task, u8 node)
{
INIT_LIST_HEAD(&nst->st_net_debug_item);
nst->st_task = task;
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
nst->st_node = node;
}
-void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
{
do_gettimeofday(&nst->st_sock_time);
}
-void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
{
do_gettimeofday(&nst->st_send_time);
}
-void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
{
do_gettimeofday(&nst->st_status_time);
}
-void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
struct o2net_sock_container *sc)
{
nst->st_sc = sc;
}
-void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
{
nst->st_id = msg_id;
}
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+ u32 msgkey, struct task_struct *task, u8 node)
+{
+}
+
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+{
+}
+
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+{
+}
+
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+{
+}
+
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+ struct o2net_sock_container *sc)
+{
+}
+
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+ u32 msg_id)
+{
+}
+
#endif /* CONFIG_DEBUG_FS */
static inline int o2net_reconnect_delay(void)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 18307ff81b77..8d58cfe410b1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,42 +224,10 @@ struct o2net_send_tracking {
struct timeval st_send_time;
struct timeval st_status_time;
};
-
-void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
- u32 msgkey, struct task_struct *task, u8 node);
-void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
- struct o2net_sock_container *sc);
-void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
-
#else
struct o2net_send_tracking {
u32 dummy;
};
-
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
- u32 msgkey, struct task_struct *task, u8 node)
-{
-}
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
- struct o2net_sock_container *sc)
-{
-}
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
- u32 msg_id)
-{
-}
#endif /* CONFIG_DEBUG_FS */
#endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8a1875848080..9cce563fd627 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
di->i_size = cpu_to_le64(sb->s_blocksize);
di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
- dir->i_blocks = ocfs2_inode_sector_count(dir);
/*
* This should never fail as our extent list is empty and all
@@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
NULL);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto out_commit;
}
+ /*
+ * Set i_blocks after the extent insert for the most up to
+ * date ip_clusters value.
+ */
+ dir->i_blocks = ocfs2_inode_sector_count(dir);
+
ret = ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
@@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
len, 0, NULL);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto out_commit;
}
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1d..ec2ed15c3daa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
- mutex_unlock(&inode->i_mutex);
out:
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfdd..c47bc2a809c2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
static int ocfs2_commit_cache(struct ocfs2_super *osb);
static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
- int dirty);
+ int dirty, int replayed);
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
int slot_num);
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
return status;
}
+static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
+{
+ le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
+}
+
+static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
+{
+ return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
+}
+
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
- int dirty)
+ int dirty, int replayed)
{
int status;
unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
flags &= ~OCFS2_JOURNAL_DIRTY_FL;
fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+ if (replayed)
+ ocfs2_bump_recovery_generation(fe);
+
status = ocfs2_write_block(osb, bh, journal->j_inode);
if (status < 0)
mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
* Do not toggle if flush was unsuccessful otherwise
* will leave dirty metadata in a "clean" journal
*/
- status = ocfs2_journal_toggle_dirty(osb, 0);
+ status = ocfs2_journal_toggle_dirty(osb, 0, 0);
if (status < 0)
mlog_errno(status);
}
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
}
}
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
{
int status = 0;
struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
- status = ocfs2_journal_toggle_dirty(osb, 1);
+ status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
if (status < 0) {
mlog_errno(status);
goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
goto bail;
}
- status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);
+ status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
if (status < 0)
mlog_errno(status);
@@ -1034,6 +1047,12 @@ restart:
spin_unlock(&osb->osb_lock);
mlog(0, "All nodes recovered\n");
+ /* Refresh all journal recovery generations from disk */
+ status = ocfs2_check_journals_nolocks(osb);
+ status = (status == -EROFS) ? 0 : status;
+ if (status < 0)
+ mlog_errno(status);
+
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
mlog_exit_void();
}
+static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
+ int slot_num,
+ struct buffer_head **bh,
+ struct inode **ret_inode)
+{
+ int status = -EACCES;
+ struct inode *inode = NULL;
+
+ BUG_ON(slot_num >= osb->max_slots);
+
+ inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+ slot_num);
+ if (!inode || is_bad_inode(inode)) {
+ mlog_errno(status);
+ goto bail;
+ }
+ SET_INODE_JOURNAL(inode);
+
+ status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ status = 0;
+
+bail:
+ if (inode) {
+ if (status || !ret_inode)
+ iput(inode);
+ else
+ *ret_inode = inode;
+ }
+ return status;
+}
+
/* Does the actual journal replay and marks the journal inode as
* clean. Will only replay if the journal inode is marked dirty. */
static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
struct ocfs2_dinode *fe;
journal_t *journal = NULL;
struct buffer_head *bh = NULL;
+ u32 slot_reco_gen;
- inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
- slot_num);
- if (inode == NULL) {
- status = -EACCES;
+ status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
+ if (status) {
mlog_errno(status);
goto done;
}
- if (is_bad_inode(inode)) {
- status = -EACCES;
- iput(inode);
- inode = NULL;
- mlog_errno(status);
+
+ fe = (struct ocfs2_dinode *)bh->b_data;
+ slot_reco_gen = ocfs2_get_recovery_generation(fe);
+ brelse(bh);
+ bh = NULL;
+
+ /*
+ * As the fs recovery is asynchronous, there is a small chance that
+ * another node mounted (and recovered) the slot before the recovery
+ * thread could get the lock. To handle that, we dirty read the journal
+ * inode for that slot to get the recovery generation. If it is
+ * different than what we expected, the slot has been recovered.
+ * If not, it needs recovery.
+ */
+ if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
+ mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
+ osb->slot_recovery_generations[slot_num], slot_reco_gen);
+ osb->slot_recovery_generations[slot_num] = slot_reco_gen;
+ status = -EBUSY;
goto done;
}
- SET_INODE_JOURNAL(inode);
+
+ /* Continue with recovery as the journal has not yet been recovered */
status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
fe = (struct ocfs2_dinode *) bh->b_data;
flags = le32_to_cpu(fe->id1.journal1.ij_flags);
+ slot_reco_gen = ocfs2_get_recovery_generation(fe);
if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
mlog(0, "No recovery required for node %d\n", node_num);
+ /* Refresh recovery generation for the slot */
+ osb->slot_recovery_generations[slot_num] = slot_reco_gen;
goto done;
}
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
flags &= ~OCFS2_JOURNAL_DIRTY_FL;
fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+ /* Increment recovery generation to indicate successful recovery */
+ ocfs2_bump_recovery_generation(fe);
+ osb->slot_recovery_generations[slot_num] =
+ ocfs2_get_recovery_generation(fe);
+
status = ocfs2_write_block(osb, bh, inode);
if (status < 0)
mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
status = ocfs2_replay_journal(osb, node_num, slot_num);
if (status < 0) {
+ if (status == -EBUSY) {
+ mlog(0, "Skipping recovery for slot %u (node %u) "
+ "as another node has recovered it\n", slot_num,
+ node_num);
+ status = 0;
+ goto done;
+ }
mlog_errno(status);
goto done;
}
@@ -1334,21 +1418,46 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
{
unsigned int node_num;
int status, i;
+ u32 gen;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_dinode *di;
/* This is called with the super block cluster lock, so we
* know that the slot map can't change underneath us. */
- spin_lock(&osb->osb_lock);
for (i = 0; i < osb->max_slots; i++) {
- if (i == osb->slot_num)
+ /* Read journal inode to get the recovery generation */
+ status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ di = (struct ocfs2_dinode *)bh->b_data;
+ gen = ocfs2_get_recovery_generation(di);
+ brelse(bh);
+ bh = NULL;
+
+ spin_lock(&osb->osb_lock);
+ osb->slot_recovery_generations[i] = gen;
+
+ mlog(0, "Slot %u recovery generation is %u\n", i,
+ osb->slot_recovery_generations[i]);
+
+ if (i == osb->slot_num) {
+ spin_unlock(&osb->osb_lock);
continue;
+ }
status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
- if (status == -ENOENT)
+ if (status == -ENOENT) {
+ spin_unlock(&osb->osb_lock);
continue;
+ }
- if (__ocfs2_recovery_map_test(osb, node_num))
+ if (__ocfs2_recovery_map_test(osb, node_num)) {
+ spin_unlock(&osb->osb_lock);
continue;
+ }
spin_unlock(&osb->osb_lock);
/* Ok, we have a slot occupied by another node which
@@ -1364,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
mlog_errno(status);
goto bail;
}
-
- spin_lock(&osb->osb_lock);
}
- spin_unlock(&osb->osb_lock);
status = 0;
bail:
@@ -1603,49 +1709,41 @@ static int ocfs2_commit_thread(void *arg)
return 0;
}
-/* Look for a dirty journal without taking any cluster locks. Used for
- * hard readonly access to determine whether the file system journals
- * require recovery. */
+/* Reads all the journal inodes without taking any cluster locks. Used
+ * for hard readonly access to determine whether any journal requires
+ * recovery. Also used to refresh the recovery generation numbers after
+ * a journal has been recovered by another node.
+ */
int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
{
int ret = 0;
unsigned int slot;
- struct buffer_head *di_bh;
+ struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- struct inode *journal = NULL;
+ int journal_dirty = 0;
for(slot = 0; slot < osb->max_slots; slot++) {
- journal = ocfs2_get_system_file_inode(osb,
- JOURNAL_SYSTEM_INODE,
- slot);
- if (!journal || is_bad_inode(journal)) {
- ret = -EACCES;
- mlog_errno(ret);
- goto out;
- }
-
- di_bh = NULL;
- ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
- 0, journal);
- if (ret < 0) {
+ ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
+ if (ret) {
mlog_errno(ret);
goto out;
}
di = (struct ocfs2_dinode *) di_bh->b_data;
+ osb->slot_recovery_generations[slot] =
+ ocfs2_get_recovery_generation(di);
+
if (le32_to_cpu(di->id1.journal1.ij_flags) &
OCFS2_JOURNAL_DIRTY_FL)
- ret = -EROFS;
+ journal_dirty = 1;
brelse(di_bh);
- if (ret)
- break;
+ di_bh = NULL;
}
out:
- if (journal)
- iput(journal);
-
+ if (journal_dirty)
+ ret = -EROFS;
return ret;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532ed..2178ebffa05f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
void ocfs2_journal_shutdown(struct ocfs2_super *osb);
int ocfs2_journal_wipe(struct ocfs2_journal *journal,
int full);
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local);
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
+ int replayed);
int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
void ocfs2_recovery_thread(struct ocfs2_super *osb,
int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef1..7f625f2b1117 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
struct ocfs2_slot_info *slot_info;
+ u32 *slot_recovery_generations;
+
spinlock_t node_map_lock;
u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f1945177629..4f619850ccf7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
struct { /* Info for journal system
inodes */
__le32 ij_flags; /* Mounted, version, etc. */
- __le32 ij_pad;
+ __le32 ij_recovery_generation; /* Incremented when the
+ journal is recovered
+ after an unclean
+ shutdown */
} journal1;
} id1; /* Inode type dependant 1 */
/*C0*/ union {
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 10e149ae5e3a..07f348b8d721 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name,
goto out;
}
- /* Ok, the stack is pinned */
- p->sp_count++;
active_stack = p;
-
rc = 0;
out:
+ /* If we found it, pin it */
+ if (!rc)
+ active_stack->sp_count++;
+
spin_unlock(&ocfs2_stack_lock);
return rc;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889aa..88255d3f52b4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
mlog(0, "max_slots for this device: %u\n", osb->max_slots);
+ osb->slot_recovery_generations =
+ kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
+ GFP_KERNEL);
+ if (!osb->slot_recovery_generations) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
init_waitqueue_head(&osb->osb_wipe_event);
osb->osb_orphan_wipes = kcalloc(osb->max_slots,
sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
local = ocfs2_mount_local(osb);
/* will play back anything left in the journal. */
- status = ocfs2_journal_load(osb->journal, local);
+ status = ocfs2_journal_load(osb->journal, local, dirty);
if (status < 0) {
mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
ocfs2_free_slot_info(osb);
kfree(osb->osb_orphan_wipes);
+ kfree(osb->slot_recovery_generations);
/* FIXME
* This belongs in journal shutdown, but because we have to
* allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index dc75f22be3f2..e1c0ec0ae989 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -71,10 +71,10 @@ static int set_run(struct super_block *sb, int map,
}
if (set) {
set_bit(bit, sbi->s_imap[map]);
- set_bit(bit, (long *) bh->b_data);
+ set_bit(bit, (unsigned long *)bh->b_data);
} else {
clear_bit(bit, sbi->s_imap[map]);
- clear_bit(bit, (long *) bh->b_data);
+ clear_bit(bit, (unsigned long *)bh->b_data);
}
}
mark_buffer_dirty(bh);
@@ -92,7 +92,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
struct buffer_head *bh;
struct omfs_sb_info *sbi = OMFS_SB(sb);
int bits_per_entry = 8 * sb->s_blocksize;
- int map, bit;
+ unsigned int map, bit;
int ret = 0;
u64 tmp;
@@ -109,7 +109,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
if (!bh)
goto out;
- set_bit(bit, (long *) bh->b_data);
+ set_bit(bit, (unsigned long *)bh->b_data);
mark_buffer_dirty(bh);
brelse(bh);
}
@@ -176,7 +176,8 @@ int omfs_clear_range(struct super_block *sb, u64 block, int count)
struct omfs_sb_info *sbi = OMFS_SB(sb);
int bits_per_entry = 8 * sb->s_blocksize;
u64 tmp;
- int map, bit, ret;
+ unsigned int map, bit;
+ int ret;
tmp = block;
bit = do_div(tmp, bits_per_entry);
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 05a5bc31e4bd..c0757e998876 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -104,7 +104,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
oi = (struct omfs_inode *) bh->b_data;
oi->i_head.h_self = cpu_to_be64(inode->i_ino);
- oi->i_sibling = ~0ULL;
+ oi->i_sibling = ~cpu_to_be64(0ULL);
mark_buffer_dirty(bh);
brelse(bh);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 66e01fae4384..834b2331f6b3 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -26,15 +26,22 @@ static int omfs_sync_file(struct file *file, struct dentry *dentry,
return err ? -EIO : 0;
}
+static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
+{
+ return (sbi->s_sys_blocksize - offset -
+ sizeof(struct omfs_extent)) /
+ sizeof(struct omfs_extent_entry) + 1;
+}
+
void omfs_make_empty_table(struct buffer_head *bh, int offset)
{
struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
- oe->e_next = ~0ULL;
+ oe->e_next = ~cpu_to_be64(0ULL);
oe->e_extent_count = cpu_to_be32(1),
oe->e_fill = cpu_to_be32(0x22),
- oe->e_entry.e_cluster = ~0ULL;
- oe->e_entry.e_blocks = ~0ULL;
+ oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
+ oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
}
int omfs_shrink_inode(struct inode *inode)
@@ -45,6 +52,7 @@ int omfs_shrink_inode(struct inode *inode)
struct buffer_head *bh;
u64 next, last;
u32 extent_count;
+ u32 max_extents;
int ret;
/* traverse extent table, freeing each entry that is greater
@@ -62,15 +70,18 @@ int omfs_shrink_inode(struct inode *inode)
goto out;
oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+ max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
for (;;) {
- if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) {
- brelse(bh);
- goto out;
- }
+ if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
+ goto out_brelse;
extent_count = be32_to_cpu(oe->e_extent_count);
+
+ if (extent_count > max_extents)
+ goto out_brelse;
+
last = next;
next = be64_to_cpu(oe->e_next);
entry = &oe->e_entry;
@@ -98,10 +109,14 @@ int omfs_shrink_inode(struct inode *inode)
if (!bh)
goto out;
oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+ max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
}
ret = 0;
out:
return ret;
+out_brelse:
+ brelse(bh);
+ return ret;
}
static void omfs_truncate(struct inode *inode)
@@ -154,9 +169,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
goto out;
}
}
- max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START -
- sizeof(struct omfs_extent)) /
- sizeof(struct omfs_extent_entry) + 1;
+ max_count = omfs_max_extents(sbi, OMFS_EXTENT_START);
/* TODO: add a continuation block here */
if (be32_to_cpu(oe->e_extent_count) > max_count-1)
@@ -225,6 +238,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
sector_t next, offset;
int ret;
u64 new_block;
+ u32 max_extents;
int extent_count;
struct omfs_extent *oe;
struct omfs_extent_entry *entry;
@@ -238,6 +252,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
goto out;
oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+ max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
next = inode->i_ino;
for (;;) {
@@ -249,6 +264,9 @@ static int omfs_get_block(struct inode *inode, sector_t block,
next = be64_to_cpu(oe->e_next);
entry = &oe->e_entry;
+ if (extent_count > max_extents)
+ goto out_brelse;
+
offset = find_block(inode, entry, block, extent_count, &remain);
if (offset > 0) {
ret = 0;
@@ -266,6 +284,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
if (!bh)
goto out;
oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+ max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
}
if (create) {
ret = omfs_grow_extent(inode, oe, &new_block);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d865f5535436..d29047b1b9b0 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -232,8 +232,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
inode->i_op = &omfs_dir_inops;
inode->i_fop = &omfs_dir_operations;
- inode->i_size = be32_to_cpu(oi->i_head.h_body_size) +
- sizeof(struct omfs_header);
+ inode->i_size = sbi->s_sys_blocksize;
inc_nlink(inode);
break;
case OMFS_FILE:
@@ -492,7 +491,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
printk(KERN_ERR "omfs: block count discrepancy between "
"super and root blocks (%llx, %llx)\n",
- sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks));
+ (unsigned long long)sbi->s_num_blocks,
+ (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
goto out_brelse_bh2;
}
diff --git a/fs/open.c b/fs/open.c
index 52647be277a2..07da9359481c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
}
EXPORT_SYMBOL(dentry_open);
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
- struct files_struct * files = current->files;
- int fd, error;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
-
-repeat:
- fdt = files_fdtable(files);
- fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
- files->next_fd);
-
- /* Do we need to expand the fd array or fd set? */
- error = expand_files(files, fd);
- if (error < 0)
- goto out;
-
- if (error) {
- /*
- * If we needed to expand the fs array we
- * might have blocked - try again.
- */
- goto repeat;
- }
-
- FD_SET(fd, fdt->open_fds);
- if (flags & O_CLOEXEC)
- FD_SET(fd, fdt->close_on_exec);
- else
- FD_CLR(fd, fdt->close_on_exec);
- files->next_fd = fd + 1;
-#if 1
- /* Sanity check */
- if (fdt->fd[fd] != NULL) {
- printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
- fdt->fd[fd] = NULL;
- }
-#endif
- error = fd;
-
-out:
- spin_unlock(&files->file_lock);
- return error;
-}
-
-int get_unused_fd(void)
-{
- return get_unused_fd_flags(0);
-}
-
-EXPORT_SYMBOL(get_unused_fd);
-
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33597c6..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
-/*
- * Use precise platform statistics if available:
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-static cputime_t task_utime(struct task_struct *p)
-{
- return p->utime;
-}
-
-static cputime_t task_stime(struct task_struct *p)
-{
- return p->stime;
-}
-#else
-static cputime_t task_utime(struct task_struct *p)
-{
- clock_t utime = cputime_to_clock_t(p->utime),
- total = utime + cputime_to_clock_t(p->stime);
- u64 temp;
-
- /*
- * Use CFS's precise accounting:
- */
- temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
-
- if (total) {
- temp *= utime;
- do_div(temp, total);
- }
- utime = (clock_t)temp;
-
- p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
- return p->prev_utime;
-}
-
-static cputime_t task_stime(struct task_struct *p)
-{
- clock_t stime;
-
- /*
- * Use CFS's precise accounting. (we subtract utime from
- * the total, to make sure the total observed by userspace
- * grows monotonically - apps rely on that):
- */
- stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
- cputime_to_clock_t(task_utime(p));
-
- if (stime >= 0)
- p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
-
- return p->prev_stime;
-}
-#endif
-
-static cputime_t task_gtime(struct task_struct *p)
-{
- return p->gtime;
-}
-
static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task, int whole)
{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01ed610f9b87..a28840b11b89 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2423,10 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
"read_bytes: %llu\n"
"write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n",
- acct.rchar, acct.wchar,
- acct.syscr, acct.syscw,
- acct.read_bytes, acct.write_bytes,
- acct.cancelled_write_bytes);
+ (unsigned long long)acct.rchar,
+ (unsigned long long)acct.wchar,
+ (unsigned long long)acct.syscr,
+ (unsigned long long)acct.syscw,
+ (unsigned long long)acct.read_bytes,
+ (unsigned long long)acct.write_bytes,
+ (unsigned long long)acct.cancelled_write_bytes);
}
static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cb4096cc3fb7..bca0f81eb687 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
return rtn;
}
-static DEFINE_IDR(proc_inum_idr);
+static DEFINE_IDA(proc_inum_ida);
static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
-#define PROC_DYNAMIC_FIRST 0xF0000000UL
+#define PROC_DYNAMIC_FIRST 0xF0000000U
/*
* Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,34 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
*/
static unsigned int get_inode_number(void)
{
- int i, inum = 0;
+ unsigned int i;
int error;
retry:
- if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
+ if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
return 0;
spin_lock(&proc_inum_lock);
- error = idr_get_new(&proc_inum_idr, NULL, &i);
+ error = ida_get_new(&proc_inum_ida, &i);
spin_unlock(&proc_inum_lock);
if (error == -EAGAIN)
goto retry;
else if (error)
return 0;
- inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
-
- /* inum will never be more than 0xf0ffffff, so no check
- * for overflow.
- */
-
- return inum;
+ if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
+ spin_lock(&proc_inum_lock);
+ ida_remove(&proc_inum_ida, i);
+ spin_unlock(&proc_inum_lock);
+ return 0;
+ }
+ return PROC_DYNAMIC_FIRST + i;
}
static void release_inode_number(unsigned int inum)
{
- int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
-
spin_lock(&proc_inum_lock);
- idr_remove(&proc_inum_idr, id);
+ ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
spin_unlock(&proc_inum_lock);
}
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
}
seq_printf(m,
- "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+ "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
- vma->vm_pgoff << PAGE_SHIFT,
+ ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ded969862960..00f10a2dcf12 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
#include <linux/tty.h>
#include <linux/string.h>
#include <linux/mman.h>
+#include <linux/quicklist.h>
#include <linux/proc_fs.h>
#include <linux/ioport.h>
#include <linux/mm.h>
@@ -189,7 +190,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n"
"VmallocUsed: %8lu kB\n"
- "VmallocChunk: %8lu kB\n",
+ "VmallocChunk: %8lu kB\n"
+ "Quicklists: %8lu kB\n",
K(i.totalram),
K(i.freeram),
K(i.bufferram),
@@ -221,7 +223,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(committed),
(unsigned long)VMALLOC_TOTAL >> 10,
vmi.used >> 10,
- vmi.largest_chunk >> 10
+ vmi.largest_chunk >> 10,
+ K(quicklist_total_size())
);
len += hugetlb_report_meminfo(page + len);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7546a918f790..73d1891ee625 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v)
ino = inode->i_ino;
}
- seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? 's' : 'p',
- vma->vm_pgoff << PAGE_SHIFT,
+ ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
/*
diff --git a/fs/readdir.c b/fs/readdir.c
index 4e026e5407fb..93a7559bbfd8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset
if (buf->result)
return -EINVAL;
d_ino = ino;
- if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+ if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+ buf->result = -EOVERFLOW;
return -EOVERFLOW;
+ }
buf->result++;
dirent = buf->dirent;
if (!access_ok(VERIFY_WRITE, dirent,
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
if (reclen > buf->count)
return -EINVAL;
d_ino = ino;
- if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+ if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+ buf->error = -EOVERFLOW;
return -EOVERFLOW;
+ }
dirent = buf->previous;
if (dirent) {
if (__put_user(offset, &dirent->d_off))
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
lock_buffer(bh);
} else {
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
continue;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b5..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
static void release_buffer_page(struct buffer_head *bh)
{
struct page *page = bh->b_page;
- if (!page->mapping && !TestSetPageLocked(page)) {
+ if (!page->mapping && trylock_page(page)) {
page_cache_get(page);
put_bh(bh);
if (!page->mapping)
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
jh = JH_ENTRY(list->next);
bh = jh->bh;
get_bh(bh);
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
if (!buffer_dirty(bh)) {
list_move(&jh->list, &tmp);
goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
{
PROC_INFO_INC(p_s_sb, journal.prepare);
- if (test_set_buffer_locked(bh)) {
+ if (!trylock_buffer(bh)) {
if (!wait)
return 0;
lock_buffer(bh);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2d..d318c7e663fa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -27,7 +27,6 @@
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
#include <linux/namei.h>
-#include <linux/quotaops.h>
struct file_system_type reiserfs_fs_type;
@@ -2076,8 +2075,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
return err;
/* Quotafile not on the same filesystem? */
if (nd.path.mnt->mnt_sb != sb) {
- path_put(&nd.path);
- return -EXDEV;
+ err = -EXDEV;
+ goto out;
}
inode = nd.path.dentry->d_inode;
/* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2086,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
reiserfs_warning(sb,
"reiserfs: Unpacking tail of quota file failed"
" (%d). Cannot turn on quotas.", err);
- path_put(&nd.path);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
mark_inode_dirty(inode);
}
@@ -2109,13 +2108,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
/* Just start temporary transaction and finish it */
err = journal_begin(&th, sb, 1);
if (err)
- return err;
+ goto out;
err = journal_end_sync(&th, sb, 1);
if (err)
- return err;
+ goto out;
}
+ err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, 0);
+ return err;
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 8e51a2aaa977..60d2f822e87b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
romfs_readpage(struct file *file, struct page * page)
{
struct inode *inode = page->mapping->host;
- loff_t offset, avail, readlen;
+ loff_t offset, size;
+ unsigned long filled;
void *buf;
int result = -EIO;
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
/* 32 bit warning -- but not for us :) */
offset = page_offset(page);
- if (offset < i_size_read(inode)) {
- avail = inode->i_size-offset;
- readlen = min_t(unsigned long, avail, PAGE_SIZE);
- if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
- if (readlen < PAGE_SIZE) {
- memset(buf + readlen,0,PAGE_SIZE-readlen);
- }
- SetPageUptodate(page);
- result = 0;
+ size = i_size_read(inode);
+ filled = 0;
+ result = 0;
+ if (offset < size) {
+ unsigned long readlen;
+
+ size -= offset;
+ readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
+
+ filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
+
+ if (filled != readlen) {
+ SetPageError(page);
+ filled = 0;
+ result = -EIO;
}
}
- if (result) {
- memset(buf, 0, PAGE_SIZE);
- SetPageError(page);
- }
+
+ if (filled < PAGE_SIZE)
+ memset(buf + filled, 0, PAGE_SIZE-filled);
+
+ if (!result)
+ SetPageUptodate(page);
flush_dcache_page(page);
unlock_page(page);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3f54dbd6c49b..bd20f7f5a933 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
goto Done;
}
/* we need at least one record in buffer */
+ pos = m->index;
+ p = m->op->start(m, &pos);
while (1) {
- pos = m->index;
- p = m->op->start(m, &pos);
err = PTR_ERR(p);
if (!p || IS_ERR(p))
break;
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
break;
if (unlikely(err))
m->count = 0;
+ if (unlikely(!m->count)) {
+ p = m->op->next(m, p, &pos);
+ m->index = pos;
+ continue;
+ }
if (m->count < m->size)
goto Fill;
m->op->stop(m, p);
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
goto Enomem;
m->count = 0;
m->version = 0;
+ pos = m->index;
+ p = m->op->start(m, &pos);
}
m->op->stop(m, p);
m->count = 0;
@@ -443,6 +450,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
return -1;
}
+int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
+{
+ size_t len = bitmap_scnprintf_len(nr_bits);
+
+ if (m->count + len < m->size) {
+ bitmap_scnprintf(m->buf + m->count, m->size - m->count,
+ bits, nr_bits);
+ m->count += len;
+ return 0;
+ }
+ m->count = m->size;
+ return -1;
+}
+
static void *single_start(struct seq_file *p, loff_t *pos)
{
return NULL + (*pos == 0);
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af6..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
* for an in-flight io page
*/
if (flags & SPLICE_F_NONBLOCK) {
- if (TestSetPageLocked(page)) {
+ if (!trylock_page(page)) {
error = -EAGAIN;
break;
}
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index d81fb9ed2b8e..73db464cd08b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
- /* And make sure we have twice the index size of space reserved */
- idx_size <<= 1;
+ /* And make sure we have thrice the index size of space reserved */
+ idx_size = idx_size + (idx_size << 1);
/*
* We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
int subtract_lebs;
long long available;
- /*
- * Force the amount available to the total size reported if the used
- * space is zero.
- */
- if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
- c->budg_data_growth + c->budg_dd_growth == 0) {
- /* Do the same calculation as for c->block_cnt */
- available = c->main_lebs - 2;
- available *= c->leb_size - c->dark_wm;
- return available;
- }
-
available = c->main_bytes - c->lst.total_used;
/*
@@ -388,11 +376,11 @@ static int can_use_rp(struct ubifs_info *c)
* This function makes sure UBIFS has enough free eraseblocks for index growth
* and data.
*
- * When budgeting index space, UBIFS reserves twice as more LEBs as the index
+ * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
* would take if it was consolidated and written to the flash. This guarantees
* that the "in-the-gaps" commit method always succeeds and UBIFS will always
* be able to commit dirty index. So this function basically adds amount of
- * budgeted index space to the size of the current index, multiplies this by 2,
+ * budgeted index space to the size of the current index, multiplies this by 3,
* and makes sure this does not exceed the amount of free eraseblocks.
*
* Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
@@ -543,8 +531,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
int err, idx_growth, data_growth, dd_growth;
struct retries_info ri;
+ ubifs_assert(req->new_page <= 1);
+ ubifs_assert(req->dirtied_page <= 1);
+ ubifs_assert(req->new_dent <= 1);
+ ubifs_assert(req->mod_dent <= 1);
+ ubifs_assert(req->new_ino <= 1);
+ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
ubifs_assert(req->dirtied_ino <= 4);
ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+ ubifs_assert(!(req->new_ino_d & 7));
+ ubifs_assert(!(req->dirtied_ino_d & 7));
data_growth = calc_data_growth(c, req);
dd_growth = calc_dd_growth(c, req);
@@ -618,8 +614,16 @@ again:
*/
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
{
+ ubifs_assert(req->new_page <= 1);
+ ubifs_assert(req->dirtied_page <= 1);
+ ubifs_assert(req->new_dent <= 1);
+ ubifs_assert(req->mod_dent <= 1);
+ ubifs_assert(req->new_ino <= 1);
+ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
ubifs_assert(req->dirtied_ino <= 4);
ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+ ubifs_assert(!(req->new_ino_d & 7));
+ ubifs_assert(!(req->dirtied_ino_d & 7));
if (!req->recalculate) {
ubifs_assert(req->idx_growth >= 0);
ubifs_assert(req->data_growth >= 0);
@@ -647,7 +651,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
ubifs_assert(c->budg_idx_growth >= 0);
ubifs_assert(c->budg_data_growth >= 0);
+ ubifs_assert(c->budg_dd_growth >= 0);
ubifs_assert(c->min_idx_lebs < c->main_lebs);
+ ubifs_assert(!(c->budg_idx_growth & 7));
+ ubifs_assert(!(c->budg_data_growth & 7));
+ ubifs_assert(!(c->budg_dd_growth & 7));
spin_unlock(&c->space_lock);
}
@@ -686,41 +694,114 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
struct ubifs_inode *ui)
{
- struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
- .dirtied_ino_d = ui->data_len};
+ struct ubifs_budget_req req;
+ memset(&req, 0, sizeof(struct ubifs_budget_req));
+ req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
ubifs_release_budget(c, &req);
}
/**
- * ubifs_budg_get_free_space - return amount of free space.
+ * ubifs_reported_space - calculate reported free space.
+ * @c: the UBIFS file-system description object
+ * @free: amount of free space
+ *
+ * This function calculates amount of free space which will be reported to
+ * user-space. User-space application tend to expect that if the file-system
+ * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
+ * are able to write a file of size N. UBIFS attaches node headers to each data
+ * node and it has to write indexind nodes as well. This introduces additional
+ * overhead, and UBIFS it has to report sligtly less free space to meet the
+ * above expectetion.
+ *
+ * This function assumes free space is made up of uncompressed data nodes and
+ * full index nodes (one per data node, tripled because we always allow enough
+ * space to write the index thrice).
+ *
+ * Note, the calculation is pessimistic, which means that most of the time
+ * UBIFS reports less space than it actually has.
+ */
+long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
+{
+ int divisor, factor, f;
+
+ /*
+ * Reported space size is @free * X, where X is UBIFS block size
+ * divided by UBIFS block size + all overhead one data block
+ * introduces. The overhead is the node header + indexing overhead.
+ *
+ * Indexing overhead calculations are based on the following formula:
+ * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
+ * of data nodes, f - fanout. Because effective UBIFS fanout is twice
+ * as less than maximum fanout, we assume that each data node
+ * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
+ * Note, the multiplier 3 is because UBIFS reseves thrice as more space
+ * for the index.
+ */
+ f = c->fanout > 3 ? c->fanout >> 1 : 2;
+ factor = UBIFS_BLOCK_SIZE;
+ divisor = UBIFS_MAX_DATA_NODE_SZ;
+ divisor += (c->max_idx_node_sz * 3) / (f - 1);
+ free *= factor;
+ do_div(free, divisor);
+ return free;
+}
+
+/**
+ * ubifs_get_free_space - return amount of free space.
* @c: UBIFS file-system description object
*
- * This function returns amount of free space on the file-system.
+ * This function calculates amount of free space to report to user-space.
+ *
+ * Because UBIFS may introduce substantial overhead (the index, node headers,
+ * alighment, wastage at the end of eraseblocks, etc), it cannot report real
+ * amount of free flash space it has (well, because not all dirty space is
+ * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
+ * it would bread user expectetion about what free space is. Users seem to
+ * accustomed to assume that if the file-system reports N bytes of free space,
+ * they would be able to fit a file of N bytes to the FS. This almost works for
+ * traditional file-systems, because they have way less overhead than UBIFS.
+ * So, to keep users happy, UBIFS tries to take the overhead into account.
*/
-long long ubifs_budg_get_free_space(struct ubifs_info *c)
+long long ubifs_get_free_space(struct ubifs_info *c)
{
- int min_idx_lebs, rsvd_idx_lebs;
+ int min_idx_lebs, rsvd_idx_lebs, lebs;
long long available, outstanding, free;
- /* Do exactly the same calculations as in 'do_budget_space()' */
spin_lock(&c->space_lock);
min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ outstanding = c->budg_data_growth + c->budg_dd_growth;
- if (min_idx_lebs > c->lst.idx_lebs)
- rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
- else
- rsvd_idx_lebs = 0;
-
- if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
- - c->lst.taken_empty_lebs) {
+ /*
+ * Force the amount available to the total size reported if the used
+ * space is zero.
+ */
+ if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
spin_unlock(&c->space_lock);
- return 0;
+ return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
}
available = ubifs_calc_available(c, min_idx_lebs);
- outstanding = c->budg_data_growth + c->budg_dd_growth;
- c->min_idx_lebs = min_idx_lebs;
+
+ /*
+ * When reporting free space to user-space, UBIFS guarantees that it is
+ * possible to write a file of free space size. This means that for
+ * empty LEBs we may use more precise calculations than
+ * 'ubifs_calc_available()' is using. Namely, we know that in empty
+ * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
+ * Thus, amend the available space.
+ *
+ * Note, the calculations below are similar to what we have in
+ * 'do_budget_space()', so refer there for comments.
+ */
+ if (min_idx_lebs > c->lst.idx_lebs)
+ rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+ else
+ rsvd_idx_lebs = 0;
+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+ c->lst.taken_empty_lebs;
+ lebs -= rsvd_idx_lebs;
+ available += lebs * (c->dark_wm - c->leb_overhead);
spin_unlock(&c->space_lock);
if (available > outstanding)
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 3b516316c9b3..0a6aa2cc78f0 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c)
goto out_up;
}
+ c->cmt_no += 1;
err = ubifs_gc_start_commit(c);
if (err)
goto out_up;
@@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c)
goto out;
mutex_lock(&c->mst_mutex);
- c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no);
+ c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
c->mst_node->root_offs = cpu_to_le32(zroot.offs);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4e3aaeba4eca..b9cb77473758 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
{
spin_lock(&dbg_lock);
- printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n",
- lst->empty_lebs, lst->idx_lebs);
+ printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, "
+ "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
"total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
lst->total_dirty);
@@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c)
struct ubifs_gced_idx_leb *idx_gc;
spin_lock(&dbg_lock);
- printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, "
- "budg_dd_growth %lld, budg_idx_growth %lld\n",
+ printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
+ "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
"freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
@@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
struct ubifs_lprops lp;
struct ubifs_lp_stats lst;
- printk(KERN_DEBUG "Dumping LEB properties\n");
+ printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid);
ubifs_get_lp_stats(c, &lst);
dbg_dump_lstats(&lst);
@@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
if (dbg_failure_mode)
return;
- printk(KERN_DEBUG "Dumping LEB %d\n", lnum);
+ printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum);
sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
if (IS_ERR(sleb)) {
@@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
{
int i;
- printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n",
- cat, heap->cnt);
+ printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n",
+ current->pid, cat, heap->cnt);
for (i = 0; i < heap->cnt; i++) {
struct ubifs_lprops *lprops = heap->arr[i];
@@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
{
int i;
- printk(KERN_DEBUG "Dumping pnode:\n");
+ printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid);
printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
(size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
int level;
printk(KERN_DEBUG "\n");
- printk(KERN_DEBUG "Dumping the TNC tree\n");
+ printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid);
znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
level = znode->level;
printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
int offset, int len, int dtype)
{
- int err;
+ int err, failing;
if (in_failure_mode(desc))
return -EIO;
- if (do_fail(desc, lnum, 1))
+ failing = do_fail(desc, lnum, 1);
+ if (failing)
cut_data(buf, len);
err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
if (err)
return err;
- if (in_failure_mode(desc))
+ if (failing)
return -EIO;
return 0;
}
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 3c4f1e93c9e0..50315fc57185 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -27,7 +27,7 @@
#define UBIFS_DBG(op) op
-#define ubifs_assert(expr) do { \
+#define ubifs_assert(expr) do { \
if (unlikely(!(expr))) { \
printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
__func__, __LINE__, current->pid); \
@@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c,
const union ubifs_key *key);
/*
- * DBGKEY macros require dbg_lock to be held, which it is in the dbg message
+ * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message
* macros.
*/
#define DBGKEY(key) dbg_key_str0(c, (key))
#define DBGKEY1(key) dbg_key_str1(c, (key))
/* General messages */
-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
+#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
/* Additional journal messages */
-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
/* Additional TNC messages */
-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
/* Additional lprops messages */
-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
/* Additional LEB find messages */
-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
/* Additional mount messages */
-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
/* Additional I/O messages */
-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
/* Additional commit messages */
-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
/* Additional budgeting messages */
-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
/* Additional log messages */
-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
/* Additional gc messages */
-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
/* Additional scan messages */
-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
/* Additional recovery messages */
-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
/*
* Debugging message type flags (must match msg_type_names in debug.c).
@@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
struct ubifs_zbranch *zbr, void *priv);
typedef int (*dbg_znode_callback)(struct ubifs_info *c,
struct ubifs_znode *znode, void *priv);
-
int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
dbg_znode_callback znode_cb, void *priv);
/* Checking functions */
int dbg_check_lprops(struct ubifs_info *c);
-
int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
-
int dbg_check_cats(struct ubifs_info *c);
-
int dbg_check_ltab(struct ubifs_info *c);
-
int dbg_check_synced_i_size(struct inode *inode);
-
int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
-
int dbg_check_tnc(struct ubifs_info *c, int extra);
-
int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
-
int dbg_check_filesystem(struct ubifs_info *c);
-
void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
int add_pos);
-
int dbg_check_lprops(struct ubifs_info *c);
int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
int row, int col);
@@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
#else /* !CONFIG_UBIFS_FS_DEBUG */
#define UBIFS_DBG(op)
-#define ubifs_assert(expr) ({})
-#define ubifs_assert_cmt_locked(c)
+
+/* Use "if (0)" to make compiler check arguments even if debugging is off */
+#define ubifs_assert(expr) do { \
+ if (0 && (expr)) \
+ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
+ __func__, __LINE__, current->pid); \
+} while (0)
+
+#define dbg_err(fmt, ...) do { \
+ if (0) \
+ ubifs_err(fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define dbg_msg(fmt, ...) do { \
+ if (0) \
+ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
+ current->pid, __func__, ##__VA_ARGS__); \
+} while (0)
+
#define dbg_dump_stack()
-#define dbg_err(fmt, ...) ({})
-#define dbg_msg(fmt, ...) ({})
-#define dbg_key(c, key, fmt, ...) ({})
-
-#define dbg_gen(fmt, ...) ({})
-#define dbg_jnl(fmt, ...) ({})
-#define dbg_tnc(fmt, ...) ({})
-#define dbg_lp(fmt, ...) ({})
-#define dbg_find(fmt, ...) ({})
-#define dbg_mnt(fmt, ...) ({})
-#define dbg_io(fmt, ...) ({})
-#define dbg_cmt(fmt, ...) ({})
-#define dbg_budg(fmt, ...) ({})
-#define dbg_log(fmt, ...) ({})
-#define dbg_gc(fmt, ...) ({})
-#define dbg_scan(fmt, ...) ({})
-#define dbg_rcvry(fmt, ...) ({})
-
-#define dbg_ntype(type) ""
-#define dbg_cstate(cmt_state) ""
-#define dbg_get_key_dump(c, key) ({})
-#define dbg_dump_inode(c, inode) ({})
-#define dbg_dump_node(c, node) ({})
-#define dbg_dump_budget_req(req) ({})
-#define dbg_dump_lstats(lst) ({})
-#define dbg_dump_budg(c) ({})
-#define dbg_dump_lprop(c, lp) ({})
-#define dbg_dump_lprops(c) ({})
-#define dbg_dump_leb(c, lnum) ({})
-#define dbg_dump_znode(c, znode) ({})
-#define dbg_dump_heap(c, heap, cat) ({})
-#define dbg_dump_pnode(c, pnode, parent, iip) ({})
-#define dbg_dump_tnc(c) ({})
-#define dbg_dump_index(c) ({})
+#define ubifs_assert_cmt_locked(c)
-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+
+#define DBGKEY(key) ((char *)(key))
+#define DBGKEY1(key) ((char *)(key))
+
+#define dbg_ntype(type) ""
+#define dbg_cstate(cmt_state) ""
+#define dbg_get_key_dump(c, key) ({})
+#define dbg_dump_inode(c, inode) ({})
+#define dbg_dump_node(c, node) ({})
+#define dbg_dump_budget_req(req) ({})
+#define dbg_dump_lstats(lst) ({})
+#define dbg_dump_budg(c) ({})
+#define dbg_dump_lprop(c, lp) ({})
+#define dbg_dump_lprops(c) ({})
+#define dbg_dump_leb(c, lnum) ({})
+#define dbg_dump_znode(c, znode) ({})
+#define dbg_dump_heap(c, heap, cat) ({})
+#define dbg_dump_pnode(c, pnode, parent, iip) ({})
+#define dbg_dump_tnc(c) ({})
+#define dbg_dump_index(c) ({})
+#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
#define dbg_old_index_check_init(c, zroot) 0
#define dbg_check_old_index(c, zroot) 0
-
#define dbg_check_cats(c) 0
-
#define dbg_check_ltab(c) 0
-
#define dbg_check_synced_i_size(inode) 0
-
#define dbg_check_dir_size(c, dir) 0
-
#define dbg_check_tnc(c, x) 0
-
#define dbg_check_idx_size(c, idx_size) 0
-
#define dbg_check_filesystem(c) 0
-
#define dbg_check_heap(c, heap, cat, add_pos) ({})
-
#define dbg_check_lprops(c) 0
#define dbg_check_lpt_nodes(c, cnode, row, col) 0
-
#define dbg_force_in_the_gaps_enabled 0
#define dbg_force_in_the_gaps() 0
-
#define dbg_failure_mode 0
#define dbg_failure_mode_registration(c) ({})
#define dbg_failure_mode_deregistration(c) ({})
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e90374be7d3b..2b267c9a1806 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
}
inode->i_ino = ++c->highest_inum;
- inode->i_generation = ++c->vfs_gen;
/*
* The creation sequence number remains with this inode for its
* lifetime. All nodes for this inode have a greater sequence number,
@@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
if (err) {
- /*
- * Do not hash the direntry if parent 'i_nlink' is zero, because
- * this has side-effects - '->delete_inode()' call will not be
- * called for the parent orphan inode, because 'd_count' of its
- * direntry will stay 1 (it'll be negative direntry I guess)
- * and prevent 'iput_final()' until the dentry is destroyed due
- * to unmount or memory pressure.
- */
- if (err == -ENOENT && dir->i_nlink != 0) {
+ if (err == -ENOENT) {
dbg_gen("not found");
goto done;
}
@@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
struct ubifs_inode *dir_ui = ubifs_inode(dir);
int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
- .dirtied_ino_d = ui->data_len };
+ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
/*
* Budget request settings: new direntry, changing the target inode,
@@ -596,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
if (err) {
if (err != -ENOSPC)
return err;
- err = 0;
budgeted = 0;
}
@@ -727,8 +717,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct ubifs_inode *dir_ui = ubifs_inode(dir);
struct ubifs_info *c = dir->i_sb->s_fs_info;
int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .dirtied_ino_d = 1 };
+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
/*
* Budget request settings: new inode, new direntry and changing parent
@@ -789,7 +778,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
int err, devlen = 0;
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = devlen, .dirtied_ino = 1 };
+ .new_ino_d = ALIGN(devlen, 8),
+ .dirtied_ino = 1 };
/*
* Budget request settings: new inode, new direntry and changing parent
@@ -863,7 +853,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
int err, len = strlen(symname);
int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = len, .dirtied_ino = 1 };
+ .new_ino_d = ALIGN(len, 8),
+ .dirtied_ino = 1 };
/*
* Budget request settings: new inode, new direntry and changing parent
@@ -1012,7 +1003,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
.dirtied_ino = 3 };
struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
- .dirtied_ino_d = old_inode_ui->data_len };
+ .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
struct timespec time;
/*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 8565e586e533..3d698e2022b1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
int err;
struct ubifs_budget_req req;
loff_t old_size = inode->i_size, new_size = attr->ia_size;
- int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
+ int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
struct ubifs_inode *ui = ubifs_inode(inode);
dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
/* A funny way to budget for truncation node */
req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
err = ubifs_budget_space(c, &req);
- if (err)
- return err;
+ if (err) {
+ /*
+ * Treat truncations to zero as deletion and always allow them,
+ * just like we do for '->unlink()'.
+ */
+ if (new_size || err != -ENOSPC)
+ return err;
+ budgeted = 0;
+ }
err = vmtruncate(inode, new_size);
if (err)
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
mutex_unlock(&ui->ui_mutex);
out_budg:
- ubifs_release_budget(c, &req);
+ if (budgeted)
+ ubifs_release_budget(c, &req);
+ else {
+ c->nospace = c->nospace_rp = 0;
+ smp_wmb();
+ }
return err;
}
@@ -890,7 +902,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
loff_t new_size = attr->ia_size;
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_budget_req req = { .dirtied_ino = 1,
- .dirtied_ino_d = ui->data_len };
+ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
err = ubifs_budget_space(c, &req);
if (err)
@@ -941,7 +953,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = dentry->d_inode;
struct ubifs_info *c = inode->i_sb->s_fs_info;
- dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid);
+ dbg_gen("ino %lu, mode %#x, ia_valid %#x",
+ inode->i_ino, inode->i_mode, attr->ia_valid);
err = inode_change_ok(inode, attr);
if (err)
return err;
@@ -1051,7 +1064,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
if (mctime_update_needed(inode, &now)) {
int err, release;
struct ubifs_budget_req req = { .dirtied_ino = 1,
- .dirtied_ino_d = ui->data_len };
+ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
err = ubifs_budget_space(c, &req);
if (err)
@@ -1270,6 +1283,7 @@ struct file_operations ubifs_file_operations = {
.fsync = ubifs_fsync,
.unlocked_ioctl = ubifs_ioctl,
.splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
#ifdef CONFIG_COMPAT
.compat_ioctl = ubifs_compat_ioctl,
#endif
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 10394c548367..e045c8b55423 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
* dirty index heap, and it falls-back to LPT scanning if the heaps are empty
* or do not have an LEB which satisfies the @min_space criteria.
*
- * Note:
- * o LEBs which have less than dead watermark of dirty space are never picked
- * by this function;
- *
- * Returns zero and the LEB properties of
- * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
- * negative error code in case of other failures. The returned LEB is marked as
- * "taken".
+ * Note, LEBs which have less than dead watermark of free + dirty space are
+ * never picked by this function.
*
* The additional @pick_free argument controls if this function has to return a
* free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
*
* In addition @pick_free is set to %2 by the recovery process in order to
* recover gc_lnum in which case an index LEB must not be returned.
+ *
+ * This function returns zero and the LEB properties of found dirty LEB in case
+ * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
+ * case of other failures. The returned LEB is marked as "taken".
*/
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int lebs, rsvd_idx_lebs = 0;
spin_lock(&c->space_lock);
- lebs = c->lst.empty_lebs;
+ lebs = c->lst.empty_lebs + c->idx_gc_cnt;
lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
/*
@@ -290,9 +288,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
idx_lp = idx_heap->arr[0];
sum = idx_lp->free + idx_lp->dirty;
/*
- * Since we reserve twice as more space for the index than it
+ * Since we reserve thrice as much space for the index than it
* actually takes, it does not make sense to pick indexing LEBs
- * with less than half LEB of dirty space.
+ * with less than, say, half LEB of dirty space. May be half is
+ * not the optimal boundary - this should be tested and
+ * checked. This boundary should determine how much we use
+ * in-the-gaps to consolidate the index comparing to how much
+ * we use garbage collector to consolidate it. The "half"
+ * criteria just feels to be fine.
*/
if (sum < min_space || sum < c->half_leb_size)
idx_lp = NULL;
@@ -312,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
lp = idx_lp;
if (lp) {
- ubifs_assert(lp->dirty >= c->dead_wm);
+ ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
goto found;
}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..13f1019c859f 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
if (err)
goto out;
+ /* Allow for races with TNC */
+ c->gced_lnum = lnum;
+ smp_wmb();
+ c->gc_seq += 1;
+ smp_wmb();
+
if (c->gc_lnum == -1) {
c->gc_lnum = lnum;
err = LEB_RETAINED;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3374f91b6709..054363f2b207 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -54,6 +54,20 @@
#include "ubifs.h"
/**
+ * ubifs_ro_mode - switch UBIFS to read read-only mode.
+ * @c: UBIFS file-system description object
+ * @err: error code which is the reason of switching to R/O mode
+ */
+void ubifs_ro_mode(struct ubifs_info *c, int err)
+{
+ if (!c->ro_media) {
+ c->ro_media = 1;
+ ubifs_warn("switched to read-only mode, error %d", err);
+ dbg_dump_stack();
+ }
+}
+
+/**
* ubifs_check_node - check node.
* @c: UBIFS file-system description object
* @buf: node to check
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 283155abe5f5..22993f867d19 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -447,13 +447,11 @@ static int get_dent_type(int mode)
* @ino: buffer in which to pack inode node
* @inode: inode to pack
* @last: indicates the last node of the group
- * @last_reference: non-zero if this is a deletion inode
*/
static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
- const struct inode *inode, int last,
- int last_reference)
+ const struct inode *inode, int last)
{
- int data_len = 0;
+ int data_len = 0, last_reference = !inode->i_nlink;
struct ubifs_inode *ui = ubifs_inode(inode);
ino->ch.node_type = UBIFS_INO_NODE;
@@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
ubifs_prep_grp_node(c, dent, dlen, 0);
ino = (void *)dent + aligned_dlen;
- pack_inode(c, ino, inode, 0, last_reference);
+ pack_inode(c, ino, inode, 0);
ino = (void *)ino + aligned_ilen;
- pack_inode(c, ino, dir, 1, 0);
+ pack_inode(c, ino, dir, 1);
if (last_reference) {
err = ubifs_add_orphan(c, inode->i_ino);
@@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
release_head(c, BASEHD);
goto out_finish;
}
+ ui->del_cmtno = c->cmt_no;
}
err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
@@ -750,30 +749,25 @@ out_free:
* ubifs_jnl_write_inode - flush inode to the journal.
* @c: UBIFS file-system description object
* @inode: inode to flush
- * @deletion: inode has been deleted
*
* This function writes inode @inode to the journal. If the inode is
* synchronous, it also synchronizes the write-buffer. Returns zero in case of
* success and a negative error code in case of failure.
*/
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
- int deletion)
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{
- int err, len, lnum, offs, sync = 0;
+ int err, lnum, offs;
struct ubifs_ino_node *ino;
struct ubifs_inode *ui = ubifs_inode(inode);
+ int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
- dbg_jnl("ino %lu%s", inode->i_ino,
- deletion ? " (last reference)" : "");
- if (deletion)
- ubifs_assert(inode->i_nlink == 0);
+ dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
- len = UBIFS_INO_NODE_SZ;
/*
* If the inode is being deleted, do not write the attached data. No
* need to synchronize the write-buffer either.
*/
- if (!deletion) {
+ if (!last_reference) {
len += ui->data_len;
sync = IS_SYNC(inode);
}
@@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, inode, 1, deletion);
+ pack_inode(c, ino, inode, 1);
err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
if (err)
goto out_release;
@@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
inode->i_ino);
release_head(c, BASEHD);
- if (deletion) {
+ if (last_reference) {
err = ubifs_tnc_remove_ino(c, inode->i_ino);
if (err)
goto out_ro;
@@ -828,6 +822,65 @@ out_free:
}
/**
+ * ubifs_jnl_delete_inode - delete an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to delete
+ *
+ * This function deletes inode @inode which includes removing it from orphans,
+ * deleting it from TNC and, in some cases, writing a deletion inode to the
+ * journal.
+ *
+ * When regular file inodes are unlinked or a directory inode is removed, the
+ * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
+ * direntry to the media, and adds the inode to orphans. After this, when the
+ * last reference to this inode has been dropped, this function is called. In
+ * general, it has to write one more deletion inode to the media, because if
+ * a commit happened between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
+ * anymore, and in fact it might not be on the flash anymore, because it might
+ * have been garbage-collected already. And for optimization reasons UBIFS does
+ * not read the orphan area if it has been unmounted cleanly, so it would have
+ * no indication in the journal that there is a deleted inode which has to be
+ * removed from TNC.
+ *
+ * However, if there was no commit between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
+ * inode to the media for the second time. And this is quite a typical case.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
+{
+ int err;
+ struct ubifs_inode *ui = ubifs_inode(inode);
+
+ ubifs_assert(inode->i_nlink == 0);
+
+ if (ui->del_cmtno != c->cmt_no)
+ /* A commit happened for sure */
+ return ubifs_jnl_write_inode(c, inode);
+
+ down_read(&c->commit_sem);
+ /*
+ * Check commit number again, because the first test has been done
+ * without @c->commit_sem, so a commit might have happened.
+ */
+ if (ui->del_cmtno != c->cmt_no) {
+ up_read(&c->commit_sem);
+ return ubifs_jnl_write_inode(c, inode);
+ }
+
+ err = ubifs_tnc_remove_ino(c, inode->i_ino);
+ if (err)
+ ubifs_ro_mode(c, err);
+ else
+ ubifs_delete_orphan(c, inode->i_ino);
+ up_read(&c->commit_sem);
+ return err;
+}
+
+/**
* ubifs_jnl_rename - rename a directory entry.
* @c: UBIFS file-system description object
* @old_dir: parent inode of directory entry to rename
@@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
p = (void *)dent2 + aligned_dlen2;
if (new_inode) {
- pack_inode(c, p, new_inode, 0, last_reference);
+ pack_inode(c, p, new_inode, 0);
p += ALIGN(ilen, 8);
}
if (!move)
- pack_inode(c, p, old_dir, 1, 0);
+ pack_inode(c, p, old_dir, 1);
else {
- pack_inode(c, p, old_dir, 0, 0);
+ pack_inode(c, p, old_dir, 0);
p += ALIGN(plen, 8);
- pack_inode(c, p, new_dir, 1, 0);
+ pack_inode(c, p, new_dir, 1);
}
if (last_reference) {
@@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
release_head(c, BASEHD);
goto out_finish;
}
+ new_ui->del_cmtno = c->cmt_no;
}
err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
@@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, inode, 0, 0);
+ pack_inode(c, ino, inode, 0);
ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
if (dlen)
ubifs_prep_grp_node(c, dn, dlen, 1);
@@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
ubifs_prep_grp_node(c, xent, xlen, 0);
ino = (void *)xent + aligned_xlen;
- pack_inode(c, ino, inode, 0, 1);
+ pack_inode(c, ino, inode, 0);
ino = (void *)ino + UBIFS_INO_NODE_SZ;
- pack_inode(c, ino, host, 1, 0);
+ pack_inode(c, ino, host, 1);
err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
if (!sync && !err)
@@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
const struct inode *host)
{
int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
- struct ubifs_inode *host_ui = ubifs_inode(inode);
+ struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_ino_node *ino;
union ubifs_key key;
int sync = IS_DIRSYNC(host);
@@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, host, 0, 0);
- pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
+ pack_inode(c, ino, host, 0);
+ pack_inode(c, (void *)ino + aligned_len1, inode, 1);
err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
if (!sync && !err) {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36857b9ed59e..3e0aa7367556 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
return 0;
out_unlock:
+ if (err != -EAGAIN)
+ ubifs_ro_mode(c, err);
mutex_unlock(&c->log_mutex);
kfree(ref);
kfree(bud);
@@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
return -ENOMEM;
cs->ch.node_type = UBIFS_CS_NODE;
- cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
+ cs->cmt_no = cpu_to_le64(c->cmt_no);
ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
/*
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4beccfc256d2..4c12a9215d7f 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
}
/**
- * ubifs_ro_mode - switch UBIFS to read read-only mode.
- * @c: UBIFS file-system description object
- * @err: error code which is the reason of switching to R/O mode
- */
-static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
-{
- if (!c->ro_media) {
- c->ro_media = 1;
- ubifs_warn("switched to read-only mode, error %d", err);
- dbg_dump_stack();
- }
-}
-
-/**
* ubifs_compr_present - check if compressor was compiled in.
* @compr_type: compressor type to check
*
@@ -298,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
}
/**
- * ubifs_reported_space - calculate reported free space.
- * @c: the UBIFS file-system description object
- * @free: amount of free space
- *
- * This function calculates amount of free space which will be reported to
- * user-space. User-space application tend to expect that if the file-system
- * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
- * are able to write a file of size N. UBIFS attaches node headers to each data
- * node and it has to write indexind nodes as well. This introduces additional
- * overhead, and UBIFS it has to report sligtly less free space to meet the
- * above expectetion.
- *
- * This function assumes free space is made up of uncompressed data nodes and
- * full index nodes (one per data node, doubled because we always allow enough
- * space to write the index twice).
- *
- * Note, the calculation is pessimistic, which means that most of the time
- * UBIFS reports less space than it actually has.
- */
-static inline long long ubifs_reported_space(const struct ubifs_info *c,
- uint64_t free)
-{
- int divisor, factor;
-
- divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1);
- factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
- do_div(free, divisor);
-
- return free * factor;
-}
-
-/**
* ubifs_current_time - round current time to time granularity.
* @inode: inode
*/
@@ -339,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
+/**
+ * ubifs_tnc_lookup - look up a file-system node.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
+ *
+ * This function look up and reads node with key @key. The caller has to make
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure.
+ */
+static inline int ubifs_tnc_lookup(struct ubifs_info *c,
+ const union ubifs_key *key, void *node)
+{
+ return ubifs_tnc_locate(c, key, node, NULL, NULL);
+}
+
#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 3afeb9242c6a..02d3462f4d3e 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
c->cmt_orphans -= cnt;
spin_unlock(&c->orphan_lock);
if (c->cmt_orphans)
- orph->cmt_no = cpu_to_le64(c->cmt_no + 1);
+ orph->cmt_no = cpu_to_le64(c->cmt_no);
else
/* Mark the last node of the commit */
- orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63));
+ orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
ubifs_assert(c->ohead_offs + len <= c->leb_size);
ubifs_assert(c->ohead_lnum >= c->orph_first);
ubifs_assert(c->ohead_lnum <= c->orph_last);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ca1e2d4e03cc..7562464ac83f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -30,7 +30,6 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/ctype.h>
-#include <linux/random.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
@@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
if (err)
goto out_invalid;
- /* Disable readahead */
+ /* Disable read-ahead */
inode->i_mapping->backing_dev_info = &c->bdi;
switch (inode->i_mode & S_IFMT) {
@@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode)
*/
static int ubifs_write_inode(struct inode *inode, int wait)
{
- int err;
+ int err = 0;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
@@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait)
return 0;
}
- dbg_gen("inode %lu", inode->i_ino);
- err = ubifs_jnl_write_inode(c, inode, 0);
- if (err)
- ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+ /*
+ * As an optimization, do not write orphan inodes to the media just
+ * because this is not needed.
+ */
+ dbg_gen("inode %lu, mode %#x, nlink %u",
+ inode->i_ino, (int)inode->i_mode, inode->i_nlink);
+ if (inode->i_nlink) {
+ err = ubifs_jnl_write_inode(c, inode);
+ if (err)
+ ubifs_err("can't write inode %lu, error %d",
+ inode->i_ino, err);
+ }
ui->dirty = 0;
mutex_unlock(&ui->ui_mutex);
@@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode)
{
int err;
struct ubifs_info *c = inode->i_sb->s_fs_info;
+ struct ubifs_inode *ui = ubifs_inode(inode);
- if (ubifs_inode(inode)->xattr)
+ if (ui->xattr)
/*
* Extended attribute inode deletions are fully handled in
* 'ubifs_removexattr()'. These inodes are special and have
@@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode)
*/
goto out;
- dbg_gen("inode %lu", inode->i_ino);
+ dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
ubifs_assert(!atomic_read(&inode->i_count));
ubifs_assert(inode->i_nlink == 0);
@@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode)
if (is_bad_inode(inode))
goto out;
- ubifs_inode(inode)->ui_size = inode->i_size = 0;
- err = ubifs_jnl_write_inode(c, inode, 1);
+ ui->ui_size = inode->i_size = 0;
+ err = ubifs_jnl_delete_inode(c, inode);
if (err)
/*
* Worst case we have a lost orphan inode wasting space, so a
- * simple error message is ok here.
+ * simple error message is OK here.
*/
- ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+ ubifs_err("can't delete inode %lu, error %d",
+ inode->i_ino, err);
+
out:
+ if (ui->dirty)
+ ubifs_release_dirty_inode_budget(c, ui);
clear_inode(inode);
}
@@ -358,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct ubifs_info *c = dentry->d_sb->s_fs_info;
unsigned long long free;
+ __le32 *uuid = (__le32 *)c->uuid;
- free = ubifs_budg_get_free_space(c);
+ free = ubifs_get_free_space(c);
dbg_gen("free space %lld bytes (%lld blocks)",
free, free >> UBIFS_BLOCK_SHIFT);
@@ -374,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_namelen = UBIFS_MAX_NLEN;
-
+ buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
+ buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
return 0;
}
@@ -518,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
+ /*
+ * Calculate how many bytes would be wasted at the end of LEB if it was
+ * fully filled with data nodes of maximum size. This is used in
+ * calculations when reporting free space.
+ */
+ c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
return 0;
}
@@ -635,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
* internally because it does not make much sense for UBIFS, but it is
* necessary to report something for the 'statfs()' call.
*
- * Subtract the LEB reserved for GC and the LEB which is reserved for
- * deletions.
- *
- * Review 'ubifs_calc_available()' if changing this calculation.
+ * Subtract the LEB reserved for GC, the LEB which is reserved for
+ * deletions, and assume only one journal head is available.
*/
- tmp64 = c->main_lebs - 2;
- tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
+ tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
+ tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
tmp64 = ubifs_reported_space(c, tmp64);
c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
@@ -1122,8 +1140,8 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_infos;
- ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num,
- c->vi.vol_id);
+ ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
+ c->vi.ubi_num, c->vi.vol_id, c->vi.name);
if (mounted_read_only)
ubifs_msg("mounted read-only");
x = (long long)c->main_lebs * c->leb_size;
@@ -1469,6 +1487,7 @@ static void ubifs_put_super(struct super_block *sb)
*/
ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
ubifs_assert(c->budg_idx_growth == 0);
+ ubifs_assert(c->budg_dd_growth == 0);
ubifs_assert(c->budg_data_growth == 0);
/*
@@ -1657,7 +1676,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&c->orph_new);
c->highest_inum = UBIFS_FIRST_INO;
- get_random_bytes(&c->vfs_gen, sizeof(int));
c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
ubi_get_volume_info(ubi, &c->vi);
@@ -1671,10 +1689,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
}
/*
- * UBIFS provids 'backing_dev_info' in order to disable readahead. For
+ * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
* UBIFS, I/O is not deferred, it is done immediately in readpage,
* which means the user would have to wait not just for their own I/O
- * but the readahead I/O as well i.e. completely pointless.
+ * but the read-ahead I/O as well i.e. completely pointless.
*
* Read-ahead will be disabled because @c->bdi.ra_pages is 0.
*/
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a96443..7da209ab9378 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
if (keys_cmp(c, key, &node_key) != 0)
ret = 0;
}
- if (ret == 0)
+ if (ret == 0 && c->replaying)
dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
}
/**
- * ubifs_tnc_lookup - look up a file-system node.
+ * maybe_leb_gced - determine if a LEB may have been garbage collected.
* @c: UBIFS file-system description object
- * @key: node key to lookup
- * @node: the node is returned here
+ * @lnum: LEB number
+ * @gc_seq1: garbage collection sequence number
*
- * This function look up and reads node with key @key. The caller has to make
- * sure the @node buffer is large enough to fit the node. Returns zero in case
- * of success, %-ENOENT if the node was not found, and a negative error code in
- * case of failure.
+ * This function determines if @lnum may have been garbage collected since
+ * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
+ * %0 is returned.
*/
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
- void *node)
+static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
{
- int found, n, err;
- struct ubifs_znode *znode;
- struct ubifs_zbranch zbr, *zt;
+ int gc_seq2, gced_lnum;
- mutex_lock(&c->tnc_mutex);
- found = ubifs_lookup_level0(c, key, &znode, &n);
- if (!found) {
- err = -ENOENT;
- goto out;
- } else if (found < 0) {
- err = found;
- goto out;
- }
- zt = &znode->zbranch[n];
- if (is_hash_key(c, key)) {
- /*
- * In this case the leaf node cache gets used, so we pass the
- * address of the zbranch and keep the mutex locked
- */
- err = tnc_read_node_nm(c, zt, node);
- goto out;
- }
- zbr = znode->zbranch[n];
- mutex_unlock(&c->tnc_mutex);
-
- err = ubifs_tnc_read_node(c, &zbr, node);
- return err;
-
-out:
- mutex_unlock(&c->tnc_mutex);
- return err;
+ gced_lnum = c->gced_lnum;
+ smp_rmb();
+ gc_seq2 = c->gc_seq;
+ /* Same seq means no GC */
+ if (gc_seq1 == gc_seq2)
+ return 0;
+ /* Different by more than 1 means we don't know */
+ if (gc_seq1 + 1 != gc_seq2)
+ return 1;
+ /*
+ * We have seen the sequence number has increased by 1. Now we need to
+ * be sure we read the right LEB number, so read it again.
+ */
+ smp_rmb();
+ if (gced_lnum != c->gced_lnum)
+ return 1;
+ /* Finally we can check lnum */
+ if (gced_lnum == lnum)
+ return 1;
+ return 0;
}
/**
@@ -1436,16 +1425,19 @@ out:
* @lnum: LEB number is returned here
* @offs: offset is returned here
*
- * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
- * location also. See 'ubifs_tnc_lookup()'.
+ * This function look up and reads node with key @key. The caller has to make
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure. The node location can be returned in @lnum and @offs.
*/
int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
void *node, int *lnum, int *offs)
{
- int found, n, err;
+ int found, n, err, safely = 0, gc_seq1;
struct ubifs_znode *znode;
struct ubifs_zbranch zbr, *zt;
+again:
mutex_lock(&c->tnc_mutex);
found = ubifs_lookup_level0(c, key, &znode, &n);
if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
goto out;
}
zt = &znode->zbranch[n];
+ if (lnum) {
+ *lnum = zt->lnum;
+ *offs = zt->offs;
+ }
if (is_hash_key(c, key)) {
/*
* In this case the leaf node cache gets used, so we pass the
* address of the zbranch and keep the mutex locked
*/
- *lnum = zt->lnum;
- *offs = zt->offs;
err = tnc_read_node_nm(c, zt, node);
goto out;
}
+ if (safely) {
+ err = ubifs_tnc_read_node(c, zt, node);
+ goto out;
+ }
+ /* Drop the TNC mutex prematurely and race with garbage collection */
zbr = znode->zbranch[n];
+ gc_seq1 = c->gc_seq;
mutex_unlock(&c->tnc_mutex);
- *lnum = zbr.lnum;
- *offs = zbr.offs;
+ if (ubifs_get_wbuf(c, zbr.lnum)) {
+ /* We do not GC journal heads */
+ err = ubifs_tnc_read_node(c, &zbr, node);
+ return err;
+ }
- err = ubifs_tnc_read_node(c, &zbr, node);
- return err;
+ err = fallible_read_node(c, key, &zbr, node);
+ if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
+ /*
+ * The node may have been GC'ed out from under us so try again
+ * while keeping the TNC mutex locked.
+ */
+ safely = 1;
+ goto again;
+ }
+ return 0;
out:
mutex_unlock(&c->tnc_mutex);
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
{
int found, n, err;
struct ubifs_znode *znode;
- struct ubifs_zbranch zbr;
dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
mutex_lock(&c->tnc_mutex);
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
goto out_unlock;
}
- zbr = znode->zbranch[n];
- mutex_unlock(&c->tnc_mutex);
-
- err = tnc_read_node_nm(c, &zbr, node);
- return err;
+ err = tnc_read_node_nm(c, &znode->zbranch[n], node);
out_unlock:
mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8117e65ba2e9..8ac76b1c2d55 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
written = layout_leb_in_gaps(c, p);
if (written < 0) {
err = written;
- if (err == -ENOSPC) {
- if (!dbg_force_in_the_gaps_enabled) {
- /*
- * Do not print scary warnings if the
- * debugging option which forces
- * in-the-gaps is enabled.
- */
- ubifs_err("out of space");
- spin_lock(&c->space_lock);
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
- dbg_dump_lprops(c);
- }
- /* Try to commit anyway */
- err = 0;
- break;
+ if (err != -ENOSPC) {
+ kfree(c->gap_lebs);
+ c->gap_lebs = NULL;
+ return err;
}
- kfree(c->gap_lebs);
- c->gap_lebs = NULL;
- return err;
+ if (!dbg_force_in_the_gaps_enabled) {
+ /*
+ * Do not print scary warnings if the debugging
+ * option which forces in-the-gaps is enabled.
+ */
+ ubifs_err("out of space");
+ spin_lock(&c->space_lock);
+ dbg_dump_budg(c);
+ spin_unlock(&c->space_lock);
+ dbg_dump_lprops(c);
+ }
+ /* Try to commit anyway */
+ err = 0;
+ break;
}
p++;
cnt -= written;
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0cc7da9bed47..a9ecbd9af20d 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -87,7 +87,7 @@
#define UBIFS_SK_LEN 8
/* Minimum index tree fanout */
-#define UBIFS_MIN_FANOUT 2
+#define UBIFS_MIN_FANOUT 3
/* Maximum number of levels in UBIFS indexing B-tree */
#define UBIFS_MAX_LEVELS 512
@@ -228,10 +228,10 @@ enum {
/* Minimum number of orphan area logical eraseblocks */
#define UBIFS_MIN_ORPH_LEBS 1
/*
- * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1
+ * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1
* for GC, 1 for deletions, and at least 1 for committed data).
*/
-#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5)
+#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6)
/* Minimum number of logical eraseblocks */
#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e4f89f271827..17c620b93eec 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -20,8 +20,6 @@
* Adrian Hunter
*/
-/* Implementation version 0.7 */
-
#ifndef __UBIFS_H__
#define __UBIFS_H__
@@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb {
* struct ubifs_inode - UBIFS in-memory inode description.
* @vfs_inode: VFS inode description object
* @creat_sqnum: sequence number at time of creation
+ * @del_cmtno: commit number corresponding to the time the inode was deleted,
+ * protected by @c->commit_sem;
* @xattr_size: summarized size of all extended attributes in bytes
* @xattr_cnt: count of extended attributes this inode has
* @xattr_names: sum of lengths of all extended attribute names belonging to
@@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb {
struct ubifs_inode {
struct inode vfs_inode;
unsigned long long creat_sqnum;
+ unsigned long long del_cmtno;
unsigned int xattr_size;
unsigned int xattr_cnt;
unsigned int xattr_names;
@@ -779,7 +780,7 @@ struct ubifs_compressor {
/**
* struct ubifs_budget_req - budget requirements of an operation.
*
- * @fast: non-zero if the budgeting should try to aquire budget quickly and
+ * @fast: non-zero if the budgeting should try to acquire budget quickly and
* should not try to call write-back
* @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
* have to be re-calculated
@@ -805,21 +806,31 @@ struct ubifs_compressor {
* An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
* is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
* dirty by the re-name operation.
+ *
+ * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to
+ * make sure the amount of inode data which contribute to @new_ino_d and
+ * @dirtied_ino_d fields are aligned.
*/
struct ubifs_budget_req {
unsigned int fast:1;
unsigned int recalculate:1;
+#ifndef UBIFS_DEBUG
unsigned int new_page:1;
unsigned int dirtied_page:1;
unsigned int new_dent:1;
unsigned int mod_dent:1;
unsigned int new_ino:1;
unsigned int new_ino_d:13;
-#ifndef UBIFS_DEBUG
unsigned int dirtied_ino:4;
unsigned int dirtied_ino_d:15;
#else
/* Not bit-fields to check for overflows */
+ unsigned int new_page;
+ unsigned int dirtied_page;
+ unsigned int new_dent;
+ unsigned int mod_dent;
+ unsigned int new_ino;
+ unsigned int new_ino_d;
unsigned int dirtied_ino;
unsigned int dirtied_ino_d;
#endif
@@ -860,13 +871,13 @@ struct ubifs_mount_opts {
* struct ubifs_info - UBIFS file-system description data structure
* (per-superblock).
* @vfs_sb: VFS @struct super_block object
- * @bdi: backing device info object to make VFS happy and disable readahead
+ * @bdi: backing device info object to make VFS happy and disable read-ahead
*
* @highest_inum: highest used inode number
- * @vfs_gen: VFS inode generation counter
* @max_sqnum: current global sequence number
- * @cmt_no: commit number (last successfully completed commit)
- * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters
+ * @cmt_no: commit number of the last successfully completed commit, protected
+ * by @commit_sem
+ * @cnt_lock: protects @highest_inum and @max_sqnum counters
* @fmt_version: UBIFS on-flash format version
* @uuid: UUID from super block
*
@@ -984,6 +995,9 @@ struct ubifs_mount_opts {
* @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
* @max_inode_sz: maximum possible inode size in bytes
* @max_znode_sz: size of znode in bytes
+ *
+ * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
+ * data nodes of maximum size - used in free space reporting
* @dead_wm: LEB dead space watermark
* @dark_wm: LEB dark space watermark
* @block_cnt: count of 4KiB blocks on the FS
@@ -1017,6 +1031,8 @@ struct ubifs_mount_opts {
* @sbuf: a buffer of LEB size used by GC and replay for scanning
* @idx_gc: list of index LEBs that have been garbage collected
* @idx_gc_cnt: number of elements on the idx_gc list
+ * @gc_seq: incremented for every non-index LEB garbage collected
+ * @gced_lnum: last non-index LEB that was garbage collected
*
* @infos_list: links all 'ubifs_info' objects
* @umount_mutex: serializes shrinker and un-mount
@@ -1103,7 +1119,6 @@ struct ubifs_info {
struct backing_dev_info bdi;
ino_t highest_inum;
- unsigned int vfs_gen;
unsigned long long max_sqnum;
unsigned long long cmt_no;
spinlock_t cnt_lock;
@@ -1214,6 +1229,8 @@ struct ubifs_info {
int max_idx_node_sz;
long long max_inode_sz;
int max_znode_sz;
+
+ int leb_overhead;
int dead_wm;
int dark_wm;
int block_cnt;
@@ -1247,6 +1264,8 @@ struct ubifs_info {
void *sbuf;
struct list_head idx_gc;
int idx_gc_cnt;
+ volatile int gc_seq;
+ volatile int gced_lnum;
struct list_head infos_list;
struct mutex umount_mutex;
@@ -1346,6 +1365,7 @@ extern struct backing_dev_info ubifs_backing_dev_info;
extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
/* io.c */
+void ubifs_ro_mode(struct ubifs_info *c, int err);
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
int dtype);
@@ -1399,8 +1419,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
int deletion, int xent);
int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
const union ubifs_key *key, const void *buf, int len);
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
- int last_reference);
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode);
int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
const struct dentry *old_dentry,
const struct inode *new_dir,
@@ -1423,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
-long long ubifs_budg_get_free_space(struct ubifs_info *c);
+long long ubifs_get_free_space(struct ubifs_info *c);
int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
void ubifs_convert_page_budget(struct ubifs_info *c);
+long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
/* find.c */
@@ -1440,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
/* tnc.c */
int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode **zn, int *n);
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
- void *node);
int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
void *node, const struct qstr *nm);
int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 1388a078e1a9..649bec78b645 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -61,7 +61,7 @@
/*
* Limit the number of extended attributes per inode so that the total size
- * (xattr_size) is guaranteeded to fit in an 'unsigned int'.
+ * (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
*/
#define MAX_XATTRS_PER_INODE 65535
@@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
struct inode *inode;
struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = size, .dirtied_ino = 1,
- .dirtied_ino_d = host_ui->data_len};
+ .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
+ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
return -ENOSPC;
/*
* Linux limits the maximum size of the extended attribute names list
- * to %XATTR_LIST_MAX. This means we should not allow creating more*
+ * to %XATTR_LIST_MAX. This means we should not allow creating more
* extended attributes if the name list becomes larger. This limitation
* is artificial for UBIFS, though.
*/
@@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
goto out_budg;
}
- mutex_lock(&host_ui->ui_mutex);
/* Re-define all operations to be "nothing" */
inode->i_mapping->a_ops = &none_address_operations;
inode->i_op = &none_inode_operations;
@@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
ui->data = kmalloc(size, GFP_NOFS);
if (!ui->data) {
err = -ENOMEM;
- goto out_unlock;
+ goto out_free;
}
-
memcpy(ui->data, value, size);
+ inode->i_size = ui->ui_size = size;
+ ui->data_len = size;
+
+ mutex_lock(&host_ui->ui_mutex);
host->i_ctime = ubifs_current_time(host);
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
host_ui->xattr_size += CALC_XATTR_BYTES(size);
host_ui->xattr_names += nm->len;
- /*
- * We do not use i_size_write() because nobody can race with us as we
- * are holding host @host->i_mutex - every xattr operation for this
- * inode is serialized by it.
- */
- inode->i_size = ui->ui_size = size;
- ui->data_len = size;
err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
if (err)
goto out_cancel;
@@ -172,8 +167,8 @@ out_cancel:
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
-out_unlock:
mutex_unlock(&host_ui->ui_mutex);
+out_free:
make_bad_inode(inode);
iput(inode);
out_budg:
@@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_budget_req req = { .dirtied_ino = 2,
- .dirtied_ino_d = size + host_ui->data_len };
+ .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
ubifs_assert(ui->data_len == inode->i_size);
err = ubifs_budget_space(c, &req);
if (err)
return err;
- mutex_lock(&host_ui->ui_mutex);
- host->i_ctime = ubifs_current_time(host);
- host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
- host_ui->xattr_size += CALC_XATTR_BYTES(size);
-
kfree(ui->data);
ui->data = kmalloc(size, GFP_NOFS);
if (!ui->data) {
err = -ENOMEM;
- goto out_unlock;
+ goto out_free;
}
-
memcpy(ui->data, value, size);
inode->i_size = ui->ui_size = size;
ui->data_len = size;
+ mutex_lock(&host_ui->ui_mutex);
+ host->i_ctime = ubifs_current_time(host);
+ host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
+ host_ui->xattr_size += CALC_XATTR_BYTES(size);
+
/*
* It is important to write the host inode after the xattr inode
* because if the host inode gets synchronized (via 'fsync()'), then
@@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
out_cancel:
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
- make_bad_inode(inode);
-out_unlock:
mutex_unlock(&host_ui->ui_mutex);
+ make_bad_inode(inode);
+out_free:
ubifs_release_budget(c, &req);
return err;
}
@@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+ ubifs_assert(mutex_is_locked(&host->i_mutex));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
@@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
if (!xent)
return -ENOMEM;
- mutex_lock(&host->i_mutex);
xent_key_init(c, &key, host->i_ino, &nm);
err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
if (err) {
@@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
out_iput:
iput(inode);
out_unlock:
- mutex_unlock(&host->i_mutex);
kfree(xent);
return err;
}
@@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
return -ERANGE;
lowest_xent_key(c, &key, host->i_ino);
-
- mutex_lock(&host->i_mutex);
while (1) {
int type;
@@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
pxent = xent;
key_read(c, &xent->key, &key);
}
- mutex_unlock(&host->i_mutex);
kfree(pxent);
if (err != -ENOENT) {
@@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
int err;
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_inode *ui = ubifs_inode(inode);
- struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1,
- .dirtied_ino_d = host_ui->data_len };
+ struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
+ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
ubifs_assert(ui->data_len == inode->i_size);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24d..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
- const struct match_token *tp = tokens;
+ struct match_token *tp = tokens;
while (tp->token != Opt_onerror_panic && tp->token != mval)
++tp;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699a..737c9a425361 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
xfs_iops.o \
xfs_lrw.o \
xfs_super.o \
- xfs_vnode.o)
+ xfs_vnode.o \
+ xfs_xattr.o)
# Objects in support/
xfs-y += $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a0501..1cd3b55ee3d2 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
}
void
-kmem_free(void *ptr, size_t size)
+kmem_free(const void *ptr)
{
if (!is_vmalloc_addr(ptr)) {
kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size)
}
void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
unsigned int __nocast flags)
{
void *new;
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
if (new)
memcpy(new, ptr,
((oldsize < newsize) ? oldsize : newsize));
- kmem_free(ptr, oldsize);
+ kmem_free(ptr);
}
return new;
}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index a20683cf74dd..af6843c7ee4b 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
extern void *kmem_alloc(size_t, unsigned int __nocast);
extern void *kmem_zalloc(size_t, unsigned int __nocast);
extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
-extern void kmem_free(void *, size_t);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void kmem_free(const void *);
/*
* Zone interfaces
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
deleted file mode 100644
index 3abe7e9ceb33..000000000000
--- a/fs/xfs/linux-2.6/sema.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SEMA_H__
-#define __XFS_SUPPORT_SEMA_H__
-
-#include <linux/time.h>
-#include <linux/wait.h>
-#include <linux/semaphore.h>
-#include <asm/atomic.h>
-
-/*
- * sema_t structure just maps to struct semaphore in Linux kernel.
- */
-
-typedef struct semaphore sema_t;
-
-#define initnsema(sp, val, name) sema_init(sp, val)
-#define psema(sp, b) down(sp)
-#define vsema(sp) up(sp)
-#define freesema(sema) do { } while (0)
-
-static inline int issemalocked(sema_t *sp)
-{
- return down_trylock(sp) || (up(sp), 0);
-}
-
-/*
- * Map cpsema (try to get the sema) to down_trylock. We need to switch
- * the return values since cpsema returns 1 (acquired) 0 (failed) and
- * down_trylock returns the reverse 0 (acquired) 1 (failed).
- */
-static inline int cpsema(sema_t *sp)
-{
- return down_trylock(sp) ? 0 : 1;
-}
-
-#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d840..f42f80a3b1fa 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -73,7 +73,6 @@ xfs_page_trace(
unsigned long pgoff)
{
xfs_inode_t *ip;
- bhv_vnode_t *vp = vn_from_inode(inode);
loff_t isize = i_size_read(inode);
loff_t offset = page_offset(page);
int delalloc = -1, unmapped = -1, unwritten = -1;
@@ -81,7 +80,7 @@ xfs_page_trace(
if (page_has_buffers(page))
xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- ip = xfs_vtoi(vp);
+ ip = XFS_I(inode);
if (!ip->i_rwtrace)
return;
@@ -409,7 +408,6 @@ xfs_start_buffer_writeback(
STATIC void
xfs_start_page_writeback(
struct page *page,
- struct writeback_control *wbc,
int clear_dirty,
int buffers)
{
@@ -676,7 +674,7 @@ xfs_probe_cluster(
} else
pg_offset = PAGE_CACHE_SIZE;
- if (page->index == tindex && !TestSetPageLocked(page)) {
+ if (page->index == tindex && trylock_page(page)) {
pg_len = xfs_probe_page(page, pg_offset, mapped);
unlock_page(page);
}
@@ -760,7 +758,7 @@ xfs_convert_page(
if (page->index != tindex)
goto fail;
- if (TestSetPageLocked(page))
+ if (!trylock_page(page))
goto fail;
if (PageWriteback(page))
goto fail_unlock_page;
@@ -858,7 +856,7 @@ xfs_convert_page(
done = 1;
}
}
- xfs_start_page_writeback(page, wbc, !page_dirty, count);
+ xfs_start_page_writeback(page, !page_dirty, count);
}
return done;
@@ -1105,7 +1103,7 @@ xfs_page_state_convert(
* that we are writing into for the first time.
*/
type = IOMAP_NEW;
- if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+ if (trylock_buffer(bh)) {
ASSERT(buffer_mapped(bh));
if (iomap_valid)
all_bh = 1;
@@ -1130,7 +1128,7 @@ xfs_page_state_convert(
SetPageUptodate(page);
if (startio)
- xfs_start_page_writeback(page, wbc, 1, count);
+ xfs_start_page_writeback(page, 1, count);
if (ioend && iomap_valid) {
offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b4..986061ae1b9b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -58,7 +58,7 @@ xfs_buf_trace(
bp, id,
(void *)(unsigned long)bp->b_flags,
(void *)(unsigned long)bp->b_hold.counter,
- (void *)(unsigned long)bp->b_sema.count.counter,
+ (void *)(unsigned long)bp->b_sema.count,
(void *)current,
data, ra,
(void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
@@ -253,7 +253,7 @@ _xfs_buf_initialize(
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
- init_MUTEX_LOCKED(&bp->b_iodonesema);
+ init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_list);
INIT_LIST_HEAD(&bp->b_hash_list);
init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
xfs_buf_t *bp)
{
if (bp->b_pages != bp->b_page_array) {
- kmem_free(bp->b_pages,
- bp->b_page_count * sizeof(struct page *));
+ kmem_free(bp->b_pages);
}
}
@@ -839,6 +838,7 @@ xfs_buf_rele(
return;
}
+ ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
@@ -852,11 +852,6 @@ xfs_buf_rele(
spin_unlock(&hash->bh_lock);
xfs_buf_free(bp);
}
- } else {
- /*
- * Catch reference count leaks
- */
- ASSERT(atomic_read(&bp->b_hold) >= 0);
}
}
@@ -1038,7 +1033,7 @@ xfs_buf_ioend(
xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
- up(&bp->b_iodonesema);
+ complete(&bp->b_iowait);
}
}
@@ -1276,7 +1271,7 @@ xfs_buf_iowait(
XB_TRACE(bp, "iowait", 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
- down(&bp->b_iodonesema);
+ wait_for_completion(&bp->b_iowait);
XB_TRACE(bp, "iowaited", (long)bp->b_error);
return bp->b_error;
}
@@ -1398,7 +1393,7 @@ STATIC void
xfs_free_bufhash(
xfs_buftarg_t *btp)
{
- kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+ kmem_free(btp->bt_hash);
btp->bt_hash = NULL;
}
@@ -1428,13 +1423,10 @@ xfs_unregister_buftarg(
void
xfs_free_buftarg(
- xfs_buftarg_t *btp,
- int external)
+ xfs_buftarg_t *btp)
{
xfs_flush_buftarg(btp, 1);
xfs_blkdev_issue_flush(btp);
- if (external)
- xfs_blkdev_put(btp->bt_bdev);
xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
@@ -1444,7 +1436,7 @@ xfs_free_buftarg(
xfs_unregister_buftarg(btp);
kthread_stop(btp->bt_task);
- kmem_free(btp, sizeof(*btp));
+ kmem_free(btp);
}
STATIC int
@@ -1575,7 +1567,7 @@ xfs_alloc_buftarg(
return btp;
error:
- kmem_free(btp, sizeof(*btp));
+ kmem_free(btp);
return NULL;
}
@@ -1803,7 +1795,7 @@ int __init
xfs_buf_init(void)
{
#ifdef XFS_BUF_TRACE
- xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
+ xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
#endif
xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a4..fe0109956656 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -157,7 +157,7 @@ typedef struct xfs_buf {
xfs_buf_iodone_t b_iodone; /* I/O completion function */
xfs_buf_relse_t b_relse; /* releasing function */
xfs_buf_bdstrat_t b_strat; /* pre-write function */
- struct semaphore b_iodonesema; /* Semaphore for I/O waiters */
+ struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
void *b_fspriv3;
@@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
+#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
#define XFS_BUF_TARGET(bp) ((bp)->b_target)
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
* Handling of buftargs.
*/
extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b14..24fd598af846 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -139,7 +139,7 @@ xfs_nfs_get_inode(
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return ip->i_vnode;
+ return VFS_I(ip);
}
STATIC struct dentry *
@@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
if (!inode)
return NULL;
if (IS_ERR(inode))
- return ERR_PTR(PTR_ERR(inode));
+ return ERR_CAST(inode);
result = d_alloc_anon(inode);
if (!result) {
iput(inode);
@@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
if (!inode)
return NULL;
if (IS_ERR(inode))
- return ERR_PTR(PTR_ERR(inode));
+ return ERR_CAST(inode);
result = d_alloc_anon(inode);
if (!result) {
iput(inode);
@@ -215,13 +215,13 @@ xfs_fs_get_parent(
struct xfs_inode *cip;
struct dentry *parent;
- error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip);
+ error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
if (unlikely(error))
return ERR_PTR(-error);
- parent = d_alloc_anon(cip->i_vnode);
+ parent = d_alloc_anon(VFS_I(cip));
if (unlikely(!parent)) {
- iput(cip->i_vnode);
+ iput(VFS_I(cip));
return ERR_PTR(-ENOMEM);
}
return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5f60363b9343..5311c1acdd40 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = {
const struct file_operations xfs_dir_file_operations = {
.read = generic_read_dir,
.readdir = xfs_file_readdir,
+ .llseek = generic_file_llseek,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 1eefe61f0e10..36caa6d957df 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -31,7 +31,7 @@ xfs_tosspages(
xfs_off_t last,
int fiopt)
{
- struct address_space *mapping = ip->i_vnode->i_mapping;
+ struct address_space *mapping = VFS_I(ip)->i_mapping;
if (mapping->nrpages)
truncate_inode_pages(mapping, first);
@@ -44,7 +44,7 @@ xfs_flushinval_pages(
xfs_off_t last,
int fiopt)
{
- struct address_space *mapping = ip->i_vnode->i_mapping;
+ struct address_space *mapping = VFS_I(ip)->i_mapping;
int ret = 0;
if (mapping->nrpages) {
@@ -64,7 +64,7 @@ xfs_flush_pages(
uint64_t flags,
int fiopt)
{
- struct address_space *mapping = ip->i_vnode->i_mapping;
+ struct address_space *mapping = VFS_I(ip)->i_mapping;
int ret = 0;
int ret2;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 01939ba2d8de..48799ba7e3e6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
#include "xfs_dfrag.h"
#include "xfs_fsops.h"
#include "xfs_vnodeops.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -243,7 +245,7 @@ xfs_vget_fsop_handlereq(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- *inode = XFS_ITOV(ip);
+ *inode = VFS_I(ip);
return 0;
}
@@ -468,6 +470,12 @@ xfs_attrlist_by_handle(
if (al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
+ /*
+ * Reject flags, only allow namespaces.
+ */
+ if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+ return -XFS_ERROR(EINVAL);
+
error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
if (error)
goto out;
@@ -587,7 +595,7 @@ xfs_attrmulti_by_handle(
goto out;
error = E2BIG;
- size = am_hreq.opcount * sizeof(attr_multiop_t);
+ size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_vn_rele;
@@ -680,9 +688,9 @@ xfs_ioc_space(
return -XFS_ERROR(EFAULT);
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
+ attr_flags |= XFS_ATTR_NONBLOCK;
if (ioflags & IO_INVIS)
- attr_flags |= ATTR_DMI;
+ attr_flags |= XFS_ATTR_DMI;
error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
NULL, attr_flags);
@@ -873,6 +881,322 @@ xfs_ioc_fsgetxattr(
return 0;
}
+STATIC void
+xfs_set_diflags(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ unsigned int di_flags;
+
+ /* can't set PREALLOC this way, just preserve it */
+ di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+ if (xflags & XFS_XFLAG_IMMUTABLE)
+ di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (xflags & XFS_XFLAG_APPEND)
+ di_flags |= XFS_DIFLAG_APPEND;
+ if (xflags & XFS_XFLAG_SYNC)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if (xflags & XFS_XFLAG_NOATIME)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if (xflags & XFS_XFLAG_NODUMP)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if (xflags & XFS_XFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ if (xflags & XFS_XFLAG_NODEFRAG)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (xflags & XFS_XFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+ if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (xflags & XFS_XFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (xflags & XFS_XFLAG_NOSYMLINKS)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if (xflags & XFS_XFLAG_EXTSZINHERIT)
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ if (xflags & XFS_XFLAG_REALTIME)
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (xflags & XFS_XFLAG_EXTSIZE)
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ }
+
+ ip->i_d.di_flags = di_flags;
+}
+
+STATIC void
+xfs_diflags_to_linux(
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+ unsigned int xflags = xfs_ip2xflags(ip);
+
+ if (xflags & XFS_XFLAG_IMMUTABLE)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= ~S_IMMUTABLE;
+ if (xflags & XFS_XFLAG_APPEND)
+ inode->i_flags |= S_APPEND;
+ else
+ inode->i_flags &= ~S_APPEND;
+ if (xflags & XFS_XFLAG_SYNC)
+ inode->i_flags |= S_SYNC;
+ else
+ inode->i_flags &= ~S_SYNC;
+ if (xflags & XFS_XFLAG_NOATIME)
+ inode->i_flags |= S_NOATIME;
+ else
+ inode->i_flags &= ~S_NOATIME;
+}
+
+#define FSX_PROJID 1
+#define FSX_EXTSIZE 2
+#define FSX_XFLAGS 4
+#define FSX_NONBLOCK 8
+
+STATIC int
+xfs_ioctl_setattr(
+ xfs_inode_t *ip,
+ struct fsxattr *fa,
+ int mask)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ unsigned int lock_flags = 0;
+ struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *olddquot = NULL;
+ int code;
+
+ xfs_itrace_entry(ip);
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return XFS_ERROR(EROFS);
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ /*
+ * If disk quotas is on, we make sure that the dquots do exist on disk,
+ * before we start any other transactions. Trying to do this later
+ * is messy. We don't care to take a readlock to look at the ids
+ * in inode here, because we can't hold it across the trans_reserve.
+ * If the IDs do change before we take the ilock, we're covered
+ * because the i_*dquot fields will get updated anyway.
+ */
+ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+ code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+ ip->i_d.di_gid, fa->fsx_projid,
+ XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+ if (code)
+ return code;
+ }
+
+ /*
+ * For the other attributes, we acquire the inode lock and
+ * first do an error checking pass.
+ */
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ if (code)
+ goto error_return;
+
+ lock_flags = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lock_flags);
+
+ /*
+ * CAP_FOWNER overrides the following restrictions:
+ *
+ * The user ID of the calling process must be equal
+ * to the file owner ID, except in cases where the
+ * CAP_FSETID capability is applicable.
+ */
+ if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+
+ /*
+ * Do a quota reservation only if projid is actually going to change.
+ */
+ if (mask & FSX_PROJID) {
+ if (XFS_IS_PQUOTA_ON(mp) &&
+ ip->i_d.di_projid != fa->fsx_projid) {
+ ASSERT(tp);
+ code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ capable(CAP_FOWNER) ?
+ XFS_QMOPT_FORCE_RES : 0);
+ if (code) /* out of quota */
+ goto error_return;
+ }
+ }
+
+ if (mask & FSX_EXTSIZE) {
+ /*
+ * Can't change extent size if any extents are allocated.
+ */
+ if (ip->i_d.di_nextents &&
+ ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+ fa->fsx_extsize)) {
+ code = XFS_ERROR(EINVAL); /* EFBIG? */
+ goto error_return;
+ }
+
+ /*
+ * Extent size must be a multiple of the appropriate block
+ * size, if set at all.
+ */
+ if (fa->fsx_extsize != 0) {
+ xfs_extlen_t size;
+
+ if (XFS_IS_REALTIME_INODE(ip) ||
+ ((mask & FSX_XFLAGS) &&
+ (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+ size = mp->m_sb.sb_rextsize <<
+ mp->m_sb.sb_blocklog;
+ } else {
+ size = mp->m_sb.sb_blocksize;
+ }
+
+ if (fa->fsx_extsize % size) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ }
+ }
+
+
+ if (mask & FSX_XFLAGS) {
+ /*
+ * Can't change realtime flag if any extents are allocated.
+ */
+ if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+ (XFS_IS_REALTIME_INODE(ip)) !=
+ (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ code = XFS_ERROR(EINVAL); /* EFBIG? */
+ goto error_return;
+ }
+
+ /*
+ * If realtime flag is set then must have realtime data.
+ */
+ if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ if ((mp->m_sb.sb_rblocks == 0) ||
+ (mp->m_sb.sb_rextsize == 0) ||
+ (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ }
+
+ /*
+ * Can't modify an immutable/append-only file unless
+ * we have appropriate permission.
+ */
+ if ((ip->i_d.di_flags &
+ (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+ (fa->fsx_xflags &
+ (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ !capable(CAP_LINUX_IMMUTABLE)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+ }
+
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ihold(tp, ip);
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ * If the system was configured with the "restricted_chown"
+ * option, the owner is not permitted to give away the file,
+ * and can change the group id only to a group of which he
+ * or she is a member.
+ */
+ if (mask & FSX_PROJID) {
+ /*
+ * CAP_FSETID overrides the following restrictions:
+ *
+ * The set-user-ID and set-group-ID bits of a file will be
+ * cleared upon successful return from chown()
+ */
+ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ !capable(CAP_FSETID))
+ ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+ */
+ if (ip->i_d.di_projid != fa->fsx_projid) {
+ if (XFS_IS_PQUOTA_ON(mp)) {
+ olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+ ip->i_d.di_projid = fa->fsx_projid;
+
+ /*
+ * We may have to rev the inode as well as
+ * the superblock version number since projids didn't
+ * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+ */
+ if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+ xfs_bump_ino_vers2(tp, ip);
+ }
+
+ }
+
+ if (mask & FSX_EXTSIZE)
+ ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+ if (mask & FSX_XFLAGS) {
+ xfs_set_diflags(ip, fa->fsx_xflags);
+ xfs_diflags_to_linux(ip);
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ /*
+ * If this is a synchronous mount, make sure that the
+ * transaction goes to disk before returning to the user.
+ * This is slightly sub-optimal in that truncates require
+ * two sync transactions instead of one for wsync filesystems.
+ * One for the truncate and one for the timestamps since we
+ * don't want to change the timestamps unless we're sure the
+ * truncate worked. Truncates are less than 1% of the laddis
+ * mix so this probably isn't worth the trouble to optimize.
+ */
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+ code = xfs_trans_commit(tp, 0);
+ xfs_iunlock(ip, lock_flags);
+
+ /*
+ * Release any dquot(s) the inode had kept before chown.
+ */
+ XFS_QM_DQRELE(mp, olddquot);
+ XFS_QM_DQRELE(mp, udqp);
+ XFS_QM_DQRELE(mp, gdqp);
+
+ if (code)
+ return code;
+
+ if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
+ XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
+ NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
+ (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
+ }
+
+ return 0;
+
+ error_return:
+ XFS_QM_DQRELE(mp, udqp);
+ XFS_QM_DQRELE(mp, gdqp);
+ xfs_trans_cancel(tp, 0);
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
+ return code;
+}
+
STATIC int
xfs_ioc_fssetxattr(
xfs_inode_t *ip,
@@ -880,31 +1204,16 @@ xfs_ioc_fssetxattr(
void __user *arg)
{
struct fsxattr fa;
- struct bhv_vattr *vattr;
- int error;
- int attr_flags;
+ unsigned int mask;
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
- vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
- if (unlikely(!vattr))
- return -ENOMEM;
-
- attr_flags = 0;
+ mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
-
- vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
- vattr->va_xflags = fa.fsx_xflags;
- vattr->va_extsize = fa.fsx_extsize;
- vattr->va_projid = fa.fsx_projid;
+ mask |= FSX_NONBLOCK;
- error = -xfs_setattr(ip, vattr, attr_flags, NULL);
- if (!error)
- vn_revalidate(XFS_ITOV(ip)); /* update flags */
- kfree(vattr);
- return 0;
+ return -xfs_ioctl_setattr(ip, &fa, mask);
}
STATIC int
@@ -926,10 +1235,9 @@ xfs_ioc_setxflags(
struct file *filp,
void __user *arg)
{
- struct bhv_vattr *vattr;
+ struct fsxattr fa;
unsigned int flags;
- int attr_flags;
- int error;
+ unsigned int mask;
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
@@ -939,22 +1247,12 @@ xfs_ioc_setxflags(
FS_SYNC_FL))
return -EOPNOTSUPP;
- vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
- if (unlikely(!vattr))
- return -ENOMEM;
-
- attr_flags = 0;
+ mask = FSX_XFLAGS;
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= ATTR_NONBLOCK;
-
- vattr->va_mask = XFS_AT_XFLAGS;
- vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+ mask |= FSX_NONBLOCK;
+ fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
- error = -xfs_setattr(ip, vattr, attr_flags, NULL);
- if (likely(!error))
- vn_revalidate(XFS_ITOV(ip)); /* update flags */
- kfree(vattr);
- return error;
+ return -xfs_ioctl_setattr(ip, &fa, mask);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 5fc61c824bb9..095d271f3434 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,7 +62,7 @@ void
xfs_synchronize_atime(
xfs_inode_t *ip)
{
- struct inode *inode = ip->i_vnode;
+ struct inode *inode = VFS_I(ip);
if (inode) {
ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
@@ -79,7 +79,7 @@ void
xfs_mark_inode_dirty_sync(
xfs_inode_t *ip)
{
- struct inode *inode = ip->i_vnode;
+ struct inode *inode = VFS_I(ip);
if (inode)
mark_inode_dirty_sync(inode);
@@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync(
* Change the requested timestamp in the given inode.
* We don't lock across timestamp updates, and we don't log them but
* we do record the fact that there is dirty information in core.
- *
- * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG
- * with XFS_ICHGTIME_ACC to be sure that access time
- * update will take. Calling first with XFS_ICHGTIME_ACC
- * and then XFS_ICHGTIME_MOD may fail to modify the access
- * timestamp if the filesystem is mounted noacctm.
*/
void
xfs_ichgtime(
xfs_inode_t *ip,
int flags)
{
- struct inode *inode = vn_to_inode(XFS_ITOV(ip));
+ struct inode *inode = VFS_I(ip);
timespec_t tv;
+ int sync_it = 0;
+
+ tv = current_fs_time(inode->i_sb);
- nanotime(&tv);
- if (flags & XFS_ICHGTIME_MOD) {
+ if ((flags & XFS_ICHGTIME_MOD) &&
+ !timespec_equal(&inode->i_mtime, &tv)) {
inode->i_mtime = tv;
ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+ sync_it = 1;
}
- if (flags & XFS_ICHGTIME_ACC) {
- inode->i_atime = tv;
- ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
- }
- if (flags & XFS_ICHGTIME_CHG) {
+ if ((flags & XFS_ICHGTIME_CHG) &&
+ !timespec_equal(&inode->i_ctime, &tv)) {
inode->i_ctime = tv;
ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
+ sync_it = 1;
}
/*
@@ -130,72 +125,11 @@ xfs_ichgtime(
* ensure that the compiler does not reorder the update
* of i_update_core above the timestamp updates above.
*/
- SYNCHRONIZE();
- ip->i_update_core = 1;
- if (!(inode->i_state & I_NEW))
+ if (sync_it) {
+ SYNCHRONIZE();
+ ip->i_update_core = 1;
mark_inode_dirty_sync(inode);
-}
-
-/*
- * Variant on the above which avoids querying the system clock
- * in situations where we know the Linux inode timestamps have
- * just been updated (and so we can update our inode cheaply).
- */
-void
-xfs_ichgtime_fast(
- xfs_inode_t *ip,
- struct inode *inode,
- int flags)
-{
- timespec_t *tvp;
-
- /*
- * Atime updates for read() & friends are handled lazily now, and
- * explicit updates must go through xfs_ichgtime()
- */
- ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
-
- if (flags & XFS_ICHGTIME_MOD) {
- tvp = &inode->i_mtime;
- ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
- }
- if (flags & XFS_ICHGTIME_CHG) {
- tvp = &inode->i_ctime;
- ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec;
}
-
- /*
- * We update the i_update_core field _after_ changing
- * the timestamps in order to coordinate properly with
- * xfs_iflush() so that we don't lose timestamp updates.
- * This keeps us from having to hold the inode lock
- * while doing this. We use the SYNCHRONIZE macro to
- * ensure that the compiler does not reorder the update
- * of i_update_core above the timestamp updates above.
- */
- SYNCHRONIZE();
- ip->i_update_core = 1;
- if (!(inode->i_state & I_NEW))
- mark_inode_dirty_sync(inode);
-}
-
-
-/*
- * Pull the link count and size up from the xfs inode to the linux inode
- */
-STATIC void
-xfs_validate_fields(
- struct inode *inode)
-{
- struct xfs_inode *ip = XFS_I(inode);
- loff_t size;
-
- /* we're under i_sem so i_size can't change under us */
- size = XFS_ISIZE(ip);
- if (i_size_read(inode) != size)
- i_size_write(inode, size);
}
/*
@@ -245,8 +179,7 @@ STATIC void
xfs_cleanup_inode(
struct inode *dir,
struct inode *inode,
- struct dentry *dentry,
- int mode)
+ struct dentry *dentry)
{
struct xfs_name teardown;
@@ -257,10 +190,7 @@ xfs_cleanup_inode(
*/
xfs_dentry_to_name(&teardown, dentry);
- if (S_ISDIR(mode))
- xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
- else
- xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+ xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
iput(inode);
}
@@ -275,7 +205,7 @@ xfs_vn_mknod(
struct xfs_inode *ip = NULL;
xfs_acl_t *default_acl = NULL;
struct xfs_name name;
- attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
+ int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
int error;
/*
@@ -320,7 +250,7 @@ xfs_vn_mknod(
if (unlikely(error))
goto out_free_acl;
- inode = ip->i_vnode;
+ inode = VFS_I(ip);
error = xfs_init_security(inode, dir);
if (unlikely(error))
@@ -335,14 +265,11 @@ xfs_vn_mknod(
}
- if (S_ISDIR(mode))
- xfs_validate_fields(inode);
d_instantiate(dentry, inode);
- xfs_validate_fields(dir);
return -error;
out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry, mode);
+ xfs_cleanup_inode(dir, inode, dentry);
out_free_acl:
if (default_acl)
_ACL_FREE(default_acl);
@@ -382,7 +309,7 @@ xfs_vn_lookup(
return ERR_PTR(-ENAMETOOLONG);
xfs_dentry_to_name(&name, dentry);
- error = xfs_lookup(XFS_I(dir), &name, &cip);
+ error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
if (unlikely(error)) {
if (unlikely(error != ENOENT))
return ERR_PTR(-error);
@@ -390,7 +317,47 @@ xfs_vn_lookup(
return NULL;
}
- return d_splice_alias(cip->i_vnode, dentry);
+ return d_splice_alias(VFS_I(cip), dentry);
+}
+
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+ struct inode *dir,
+ struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct xfs_inode *ip;
+ struct xfs_name xname;
+ struct xfs_name ci_name;
+ struct qstr dname;
+ int error;
+
+ if (dentry->d_name.len >= MAXNAMELEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ xfs_dentry_to_name(&xname, dentry);
+ error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+ if (unlikely(error)) {
+ if (unlikely(error != ENOENT))
+ return ERR_PTR(-error);
+ /*
+ * call d_add(dentry, NULL) here when d_drop_negative_children
+ * is called in xfs_vn_mknod (ie. allow negative dentries
+ * with CI filesystems).
+ */
+ return NULL;
+ }
+
+ /* if exact match, just splice and exit */
+ if (!ci_name.name)
+ return d_splice_alias(VFS_I(ip), dentry);
+
+ /* else case-insensitive match... */
+ dname.name = ci_name.name;
+ dname.len = ci_name.len;
+ dentry = d_add_ci(dentry, VFS_I(ip), &dname);
+ kmem_free(ci_name.name);
+ return dentry;
}
STATIC int
@@ -414,7 +381,6 @@ xfs_vn_link(
}
xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
- xfs_validate_fields(inode);
d_instantiate(dentry, inode);
return 0;
}
@@ -424,19 +390,23 @@ xfs_vn_unlink(
struct inode *dir,
struct dentry *dentry)
{
- struct inode *inode;
struct xfs_name name;
int error;
- inode = dentry->d_inode;
xfs_dentry_to_name(&name, dentry);
- error = xfs_remove(XFS_I(dir), &name, XFS_I(inode));
- if (likely(!error)) {
- xfs_validate_fields(dir); /* size needs update */
- xfs_validate_fields(inode);
- }
- return -error;
+ error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+ if (error)
+ return error;
+
+ /*
+ * With unlink, the VFS makes the dentry "negative": no inode,
+ * but still hashed. This is incompatible with case-insensitive
+ * mode, so invalidate (unhash) the dentry in CI-mode.
+ */
+ if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+ d_invalidate(dentry);
+ return 0;
}
STATIC int
@@ -459,43 +429,22 @@ xfs_vn_symlink(
if (unlikely(error))
goto out;
- inode = cip->i_vnode;
+ inode = VFS_I(cip);
error = xfs_init_security(inode, dir);
if (unlikely(error))
goto out_cleanup_inode;
d_instantiate(dentry, inode);
- xfs_validate_fields(dir);
- xfs_validate_fields(inode);
return 0;
out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry, 0);
+ xfs_cleanup_inode(dir, inode, dentry);
out:
return -error;
}
STATIC int
-xfs_vn_rmdir(
- struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- struct xfs_name name;
- int error;
-
- xfs_dentry_to_name(&name, dentry);
-
- error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
- if (likely(!error)) {
- xfs_validate_fields(inode);
- xfs_validate_fields(dir);
- }
- return -error;
-}
-
-STATIC int
xfs_vn_rename(
struct inode *odir,
struct dentry *odentry,
@@ -505,22 +454,13 @@ xfs_vn_rename(
struct inode *new_inode = ndentry->d_inode;
struct xfs_name oname;
struct xfs_name nname;
- int error;
xfs_dentry_to_name(&oname, odentry);
xfs_dentry_to_name(&nname, ndentry);
- error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+ return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
XFS_I(ndir), &nname, new_inode ?
XFS_I(new_inode) : NULL);
- if (likely(!error)) {
- if (new_inode)
- xfs_validate_fields(new_inode);
- xfs_validate_fields(odir);
- if (ndir != odir)
- xfs_validate_fields(ndir);
- }
- return -error;
}
/*
@@ -659,57 +599,9 @@ xfs_vn_getattr(
STATIC int
xfs_vn_setattr(
struct dentry *dentry,
- struct iattr *attr)
+ struct iattr *iattr)
{
- struct inode *inode = dentry->d_inode;
- unsigned int ia_valid = attr->ia_valid;
- bhv_vattr_t vattr = { 0 };
- int flags = 0;
- int error;
-
- if (ia_valid & ATTR_UID) {
- vattr.va_mask |= XFS_AT_UID;
- vattr.va_uid = attr->ia_uid;
- }
- if (ia_valid & ATTR_GID) {
- vattr.va_mask |= XFS_AT_GID;
- vattr.va_gid = attr->ia_gid;
- }
- if (ia_valid & ATTR_SIZE) {
- vattr.va_mask |= XFS_AT_SIZE;
- vattr.va_size = attr->ia_size;
- }
- if (ia_valid & ATTR_ATIME) {
- vattr.va_mask |= XFS_AT_ATIME;
- vattr.va_atime = attr->ia_atime;
- inode->i_atime = attr->ia_atime;
- }
- if (ia_valid & ATTR_MTIME) {
- vattr.va_mask |= XFS_AT_MTIME;
- vattr.va_mtime = attr->ia_mtime;
- }
- if (ia_valid & ATTR_CTIME) {
- vattr.va_mask |= XFS_AT_CTIME;
- vattr.va_ctime = attr->ia_ctime;
- }
- if (ia_valid & ATTR_MODE) {
- vattr.va_mask |= XFS_AT_MODE;
- vattr.va_mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- inode->i_mode &= ~S_ISGID;
- }
-
- if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
- flags |= ATTR_UTIME;
-#ifdef ATTR_NO_BLOCK
- if ((ia_valid & ATTR_NO_BLOCK))
- flags |= ATTR_NONBLOCK;
-#endif
-
- error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
- if (likely(!error))
- vn_revalidate(vn_from_inode(inode));
- return -error;
+ return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
}
/*
@@ -727,109 +619,6 @@ xfs_vn_truncate(
WARN_ON(error);
}
-STATIC int
-xfs_vn_setxattr(
- struct dentry *dentry,
- const char *name,
- const void *data,
- size_t size,
- int flags)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- int error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (flags & XATTR_CREATE)
- xflags |= ATTR_CREATE;
- if (flags & XATTR_REPLACE)
- xflags |= ATTR_REPLACE;
- xflags |= namesp->attr_flag;
- return namesp->attr_set(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-xfs_vn_getxattr(
- struct dentry *dentry,
- const char *name,
- void *data,
- size_t size)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- ssize_t error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (!size) {
- xflags |= ATTR_KERNOVAL;
- data = NULL;
- }
- xflags |= namesp->attr_flag;
- return namesp->attr_get(vp, attr, (void *)data, size, xflags);
-}
-
-STATIC ssize_t
-xfs_vn_listxattr(
- struct dentry *dentry,
- char *data,
- size_t size)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- int error, xflags = ATTR_KERNAMELS;
- ssize_t result;
-
- if (!size)
- xflags |= ATTR_KERNOVAL;
- xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
-
- error = attr_generic_list(vp, data, size, xflags, &result);
- if (error < 0)
- return error;
- return result;
-}
-
-STATIC int
-xfs_vn_removexattr(
- struct dentry *dentry,
- const char *name)
-{
- bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
- char *attr = (char *)name;
- attrnames_t *namesp;
- int xflags = 0;
- int error;
-
- namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- attr += namesp->attr_namelen;
- error = namesp->attr_capable(vp, NULL);
- if (error)
- return error;
- xflags |= namesp->attr_flag;
- return namesp->attr_remove(vp, attr, xflags);
-}
-
STATIC long
xfs_vn_fallocate(
struct inode *inode,
@@ -853,18 +642,18 @@ xfs_vn_fallocate(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
- 0, NULL, ATTR_NOLOCK);
+ 0, NULL, XFS_ATTR_NOLOCK);
if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode))
new_size = offset + len;
/* Change file size if needed */
if (new_size) {
- bhv_vattr_t va;
+ struct iattr iattr;
- va.va_mask = XFS_AT_SIZE;
- va.va_size = new_size;
- error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL);
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = new_size;
+ error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -872,46 +661,172 @@ out_error:
return error;
}
-const struct inode_operations xfs_inode_operations = {
+static const struct inode_operations xfs_inode_operations = {
.permission = xfs_vn_permission,
.truncate = xfs_vn_truncate,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
- .setxattr = xfs_vn_setxattr,
- .getxattr = xfs_vn_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .removexattr = xfs_vn_removexattr,
.fallocate = xfs_vn_fallocate,
};
-const struct inode_operations xfs_dir_inode_operations = {
+static const struct inode_operations xfs_dir_inode_operations = {
.create = xfs_vn_create,
.lookup = xfs_vn_lookup,
.link = xfs_vn_link,
.unlink = xfs_vn_unlink,
.symlink = xfs_vn_symlink,
.mkdir = xfs_vn_mkdir,
- .rmdir = xfs_vn_rmdir,
+ /*
+ * Yes, XFS uses the same method for rmdir and unlink.
+ *
+ * There are some subtile differences deeper in the code,
+ * but we use S_ISDIR to check for those.
+ */
+ .rmdir = xfs_vn_unlink,
+ .mknod = xfs_vn_mknod,
+ .rename = xfs_vn_rename,
+ .permission = xfs_vn_permission,
+ .getattr = xfs_vn_getattr,
+ .setattr = xfs_vn_setattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_dir_ci_inode_operations = {
+ .create = xfs_vn_create,
+ .lookup = xfs_vn_ci_lookup,
+ .link = xfs_vn_link,
+ .unlink = xfs_vn_unlink,
+ .symlink = xfs_vn_symlink,
+ .mkdir = xfs_vn_mkdir,
+ /*
+ * Yes, XFS uses the same method for rmdir and unlink.
+ *
+ * There are some subtile differences deeper in the code,
+ * but we use S_ISDIR to check for those.
+ */
+ .rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
.permission = xfs_vn_permission,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
- .setxattr = xfs_vn_setxattr,
- .getxattr = xfs_vn_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .removexattr = xfs_vn_removexattr,
};
-const struct inode_operations xfs_symlink_inode_operations = {
+static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = xfs_vn_follow_link,
.put_link = xfs_vn_put_link,
.permission = xfs_vn_permission,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
- .setxattr = xfs_vn_setxattr,
- .getxattr = xfs_vn_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .removexattr = xfs_vn_removexattr,
};
+
+STATIC void
+xfs_diflags_to_iflags(
+ struct inode *inode,
+ struct xfs_inode *ip)
+{
+ if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= ~S_IMMUTABLE;
+ if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ inode->i_flags |= S_APPEND;
+ else
+ inode->i_flags &= ~S_APPEND;
+ if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+ inode->i_flags |= S_SYNC;
+ else
+ inode->i_flags &= ~S_SYNC;
+ if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+ inode->i_flags |= S_NOATIME;
+ else
+ inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Initialize the Linux inode, set up the operation vectors and
+ * unlock the inode.
+ *
+ * When reading existing inodes from disk this is called directly
+ * from xfs_iget, when creating a new inode it is called from
+ * xfs_ialloc after setting up the inode.
+ */
+void
+xfs_setup_inode(
+ struct xfs_inode *ip)
+{
+ struct inode *inode = ip->i_vnode;
+
+ inode->i_mode = ip->i_d.di_mode;
+ inode->i_nlink = ip->i_d.di_nlink;
+ inode->i_uid = ip->i_d.di_uid;
+ inode->i_gid = ip->i_d.di_gid;
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+ inode->i_rdev =
+ MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+ sysv_minor(ip->i_df.if_u2.if_rdev));
+ break;
+ default:
+ inode->i_rdev = 0;
+ break;
+ }
+
+ inode->i_generation = ip->i_d.di_gen;
+ i_size_write(inode, ip->i_d.di_size);
+ inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
+ inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
+ inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+ inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+ inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+ inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+ xfs_diflags_to_iflags(inode, ip);
+ xfs_iflags_clear(ip, XFS_IMODIFIED);
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ inode->i_op = &xfs_inode_operations;
+ inode->i_fop = &xfs_file_operations;
+ inode->i_mapping->a_ops = &xfs_address_space_operations;
+ break;
+ case S_IFDIR:
+ if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+ inode->i_op = &xfs_dir_ci_inode_operations;
+ else
+ inode->i_op = &xfs_dir_inode_operations;
+ inode->i_fop = &xfs_dir_file_operations;
+ break;
+ case S_IFLNK:
+ inode->i_op = &xfs_symlink_inode_operations;
+ if (!(ip->i_df.if_flags & XFS_IFINLINE))
+ inode->i_mapping->a_ops = &xfs_address_space_operations;
+ break;
+ default:
+ inode->i_op = &xfs_inode_operations;
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ break;
+ }
+
+ xfs_iflags_clear(ip, XFS_INEW);
+ barrier();
+
+ unlock_new_inode(inode);
+}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7afff..8b1a1e31dc21 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,23 +18,14 @@
#ifndef __XFS_IOPS_H__
#define __XFS_IOPS_H__
-extern const struct inode_operations xfs_inode_operations;
-extern const struct inode_operations xfs_dir_inode_operations;
-extern const struct inode_operations xfs_symlink_inode_operations;
+struct xfs_inode;
extern const struct file_operations xfs_file_operations;
extern const struct file_operations xfs_dir_file_operations;
extern const struct file_operations xfs_invis_file_operations;
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-struct xfs_inode;
-extern void xfs_ichgtime(struct xfs_inode *, int);
-extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int);
-
-#define xfs_vtoi(vp) \
- ((struct xfs_inode *)vn_to_inode(vp)->i_private)
-
-#define XFS_I(inode) \
- ((struct xfs_inode *)(inode)->i_private)
+extern void xfs_setup_inode(struct xfs_inode *);
#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b57..cc0f7b3a9795 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -45,13 +45,13 @@
#include <mrlock.h>
#include <sv.h>
#include <mutex.h>
-#include <sema.h>
#include <time.h>
#include <support/ktrace.h>
#include <support/debug.h>
#include <support/uuid.h>
+#include <linux/semaphore.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
@@ -76,6 +76,7 @@
#include <linux/log2.h>
#include <linux/spinlock.h>
#include <linux/random.h>
+#include <linux/ctype.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -125,8 +126,6 @@
#define current_cpu() (raw_smp_processor_id())
#define current_pid() (current->pid)
-#define current_fsuid(cred) (current->fsuid)
-#define current_fsgid(cred) (current->fsgid)
#define current_test_flags(f) (current->flags & (f))
#define current_set_flags_nested(sp, f) \
(*(sp) = current->flags, current->flags |= (f))
@@ -179,7 +178,7 @@
#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
#define xfs_stack_trace() dump_stack()
#define xfs_itruncate_data(ip, off) \
- (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off)))
+ (-vmtruncate(VFS_I(ip), (off)))
/* Move the kernel do_div definition off to one side */
@@ -299,4 +298,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
return x;
}
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 82333b3e118e..1957e5357d04 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -137,7 +137,7 @@ xfs_iozero(
struct address_space *mapping;
int status;
- mapping = ip->i_vnode->i_mapping;
+ mapping = VFS_I(ip)->i_mapping;
do {
unsigned offset, bytes;
void *fsdata;
@@ -674,9 +674,7 @@ start:
*/
if (likely(!(ioflags & IO_INVIS) &&
!mnt_want_write(file->f_path.mnt))) {
- file_update_time(file);
- xfs_ichgtime_fast(xip, inode,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
mnt_drop_write(file->f_path.mnt);
}
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b6102051..3d5b67c075c7 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
return len;
}
-void
+int
xfs_init_procfs(void)
{
if (!proc_mkdir("fs/xfs", NULL))
- return;
- create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+ goto out;
+
+ if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
+ xfs_read_xfsstats, NULL))
+ goto out_remove_entry;
+ return 0;
+
+ out_remove_entry:
+ remove_proc_entry("fs/xfs", NULL);
+ out:
+ return -ENOMEM;
}
void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb2..e83820febc9f 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
-extern void xfs_init_procfs(void);
+extern int xfs_init_procfs(void);
extern void xfs_cleanup_procfs(void);
@@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void);
# define XFS_STATS_DEC(count)
# define XFS_STATS_ADD(count, inc)
-static inline void xfs_init_procfs(void) { };
-static inline void xfs_cleanup_procfs(void) { };
+static inline int xfs_init_procfs(void)
+{
+ return 0;
+}
+
+static inline void xfs_cleanup_procfs(void)
+{
+}
#endif /* !CONFIG_PROC_FS */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 943381284e2e..73c65f19e549 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,12 @@
#include "xfs_version.h"
#include "xfs_log_priv.h"
#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_trace.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -60,6 +66,7 @@
#include <linux/writeback.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/parser.h>
static struct quotactl_ops xfs_quotactl_operations;
static struct super_operations xfs_super_operations;
@@ -74,7 +81,10 @@ xfs_args_allocate(
{
struct xfs_mount_args *args;
- args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+ args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
+ if (!args)
+ return NULL;
+
args->logbufs = args->logbufsize = -1;
strncpy(args->fsname, sb->s_id, MAXNAMELEN);
@@ -138,6 +148,23 @@ xfs_args_allocate(
#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+ Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static match_table_t tokens = {
+ {Opt_barrier, "barrier"},
+ {Opt_nobarrier, "nobarrier"},
+ {Opt_err, NULL}
+};
+
+
STATIC unsigned long
suffix_strtoul(char *s, char **endp, unsigned int base)
{
@@ -314,6 +341,7 @@ xfs_parseargs(
args->flags |= XFSMNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
args->flags &= ~XFSMNT_ATTR2;
+ args->flags |= XFSMNT_NOATTR2;
} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
args->flags2 |= XFSMNT2_FILESTREAMS;
} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
@@ -553,115 +581,6 @@ xfs_max_file_offset(
return (((__uint64_t)pagefactor) << bitshift) - 1;
}
-STATIC_INLINE void
-xfs_set_inodeops(
- struct inode *inode)
-{
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- inode->i_op = &xfs_inode_operations;
- inode->i_fop = &xfs_file_operations;
- inode->i_mapping->a_ops = &xfs_address_space_operations;
- break;
- case S_IFDIR:
- inode->i_op = &xfs_dir_inode_operations;
- inode->i_fop = &xfs_dir_file_operations;
- break;
- case S_IFLNK:
- inode->i_op = &xfs_symlink_inode_operations;
- if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE))
- inode->i_mapping->a_ops = &xfs_address_space_operations;
- break;
- default:
- inode->i_op = &xfs_inode_operations;
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
- break;
- }
-}
-
-STATIC_INLINE void
-xfs_revalidate_inode(
- xfs_mount_t *mp,
- bhv_vnode_t *vp,
- xfs_inode_t *ip)
-{
- struct inode *inode = vn_to_inode(vp);
-
- inode->i_mode = ip->i_d.di_mode;
- inode->i_nlink = ip->i_d.di_nlink;
- inode->i_uid = ip->i_d.di_uid;
- inode->i_gid = ip->i_d.di_gid;
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- inode->i_rdev =
- MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
- sysv_minor(ip->i_df.if_u2.if_rdev));
- break;
- default:
- inode->i_rdev = 0;
- break;
- }
-
- inode->i_generation = ip->i_d.di_gen;
- i_size_write(inode, ip->i_d.di_size);
- inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
- inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
- inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
- if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
- xfs_iflags_clear(ip, XFS_IMODIFIED);
-}
-
-void
-xfs_initialize_vnode(
- struct xfs_mount *mp,
- bhv_vnode_t *vp,
- struct xfs_inode *ip)
-{
- struct inode *inode = vn_to_inode(vp);
-
- if (!ip->i_vnode) {
- ip->i_vnode = vp;
- inode->i_private = ip;
- }
-
- /*
- * We need to set the ops vectors, and unlock the inode, but if
- * we have been called during the new inode create process, it is
- * too early to fill in the Linux inode. We will get called a
- * second time once the inode is properly set up, and then we can
- * finish our work.
- */
- if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) {
- xfs_revalidate_inode(mp, vp, ip);
- xfs_set_inodeops(inode);
-
- xfs_iflags_clear(ip, XFS_INEW);
- barrier();
-
- unlock_new_inode(inode);
- }
-}
-
int
xfs_blkdev_get(
xfs_mount_t *mp,
@@ -733,14 +652,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
return;
}
- if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
- QUEUE_ORDERED_NONE) {
- xfs_fs_cmn_err(CE_NOTE, mp,
- "Disabling barriers, not supported by the underlying device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, underlying device is readonly");
@@ -764,6 +675,139 @@ xfs_blkdev_issue_flush(
blkdev_issue_flush(buftarg->bt_bdev, NULL);
}
+STATIC void
+xfs_close_devices(
+ struct xfs_mount *mp)
+{
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+ struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+ xfs_free_buftarg(mp->m_logdev_targp);
+ xfs_blkdev_put(logdev);
+ }
+ if (mp->m_rtdev_targp) {
+ struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+ xfs_free_buftarg(mp->m_rtdev_targp);
+ xfs_blkdev_put(rtdev);
+ }
+ xfs_free_buftarg(mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ * (1) device (partition) with data and internal log
+ * (2) logical volume with data and log subvolumes.
+ * (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present. The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+ struct xfs_mount *mp,
+ struct xfs_mount_args *args)
+{
+ struct block_device *ddev = mp->m_super->s_bdev;
+ struct block_device *logdev = NULL, *rtdev = NULL;
+ int error;
+
+ /*
+ * Open real time and log devices - order is important.
+ */
+ if (args->logname[0]) {
+ error = xfs_blkdev_get(mp, args->logname, &logdev);
+ if (error)
+ goto out;
+ }
+
+ if (args->rtname[0]) {
+ error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+ if (error)
+ goto out_close_logdev;
+
+ if (rtdev == ddev || rtdev == logdev) {
+ cmn_err(CE_WARN,
+ "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+ error = EINVAL;
+ goto out_close_rtdev;
+ }
+ }
+
+ /*
+ * Setup xfs_mount buffer target pointers
+ */
+ error = ENOMEM;
+ mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+ if (!mp->m_ddev_targp)
+ goto out_close_rtdev;
+
+ if (rtdev) {
+ mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+ if (!mp->m_rtdev_targp)
+ goto out_free_ddev_targ;
+ }
+
+ if (logdev && logdev != ddev) {
+ mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+ if (!mp->m_logdev_targp)
+ goto out_free_rtdev_targ;
+ } else {
+ mp->m_logdev_targp = mp->m_ddev_targp;
+ }
+
+ return 0;
+
+ out_free_rtdev_targ:
+ if (mp->m_rtdev_targp)
+ xfs_free_buftarg(mp->m_rtdev_targp);
+ out_free_ddev_targ:
+ xfs_free_buftarg(mp->m_ddev_targp);
+ out_close_rtdev:
+ if (rtdev)
+ xfs_blkdev_put(rtdev);
+ out_close_logdev:
+ if (logdev && logdev != ddev)
+ xfs_blkdev_put(logdev);
+ out:
+ return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+ mp->m_sb.sb_sectsize);
+ if (error)
+ return error;
+
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+ unsigned int log_sector_size = BBSIZE;
+
+ if (xfs_sb_version_hassector(&mp->m_sb))
+ log_sector_size = mp->m_sb.sb_logsectsize;
+ error = xfs_setsize_buftarg(mp->m_logdev_targp,
+ mp->m_sb.sb_blocksize,
+ log_sector_size);
+ if (error)
+ return error;
+ }
+ if (mp->m_rtdev_targp) {
+ error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+ mp->m_sb.sb_blocksize,
+ mp->m_sb.sb_sectsize);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
/*
* XFS AIL push thread support
*/
@@ -826,62 +870,21 @@ STATIC struct inode *
xfs_fs_alloc_inode(
struct super_block *sb)
{
- bhv_vnode_t *vp;
-
- vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
- if (unlikely(!vp))
- return NULL;
- return vn_to_inode(vp);
+ return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
}
STATIC void
xfs_fs_destroy_inode(
struct inode *inode)
{
- kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode));
+ kmem_zone_free(xfs_vnode_zone, inode);
}
STATIC void
xfs_fs_inode_init_once(
void *vnode)
{
- inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
-}
-
-STATIC int __init
-xfs_init_zones(void)
-{
- xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
- if (!xfs_vnode_zone)
- goto out;
-
- xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
- if (!xfs_ioend_zone)
- goto out_destroy_vnode_zone;
-
- xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
- xfs_ioend_zone);
- if (!xfs_ioend_pool)
- goto out_free_ioend_zone;
- return 0;
-
- out_free_ioend_zone:
- kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
- kmem_zone_destroy(xfs_vnode_zone);
- out:
- return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
- mempool_destroy(xfs_ioend_pool);
- kmem_zone_destroy(xfs_vnode_zone);
- kmem_zone_destroy(xfs_ioend_zone);
+ inode_init_once((struct inode *)vnode);
}
/*
@@ -986,7 +989,7 @@ void
xfs_flush_inode(
xfs_inode_t *ip)
{
- struct inode *inode = ip->i_vnode;
+ struct inode *inode = VFS_I(ip);
igrab(inode);
xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
@@ -1011,7 +1014,7 @@ void
xfs_flush_device(
xfs_inode_t *ip)
{
- struct inode *inode = vn_to_inode(XFS_ITOV(ip));
+ struct inode *inode = VFS_I(ip);
igrab(inode);
xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
@@ -1073,7 +1076,7 @@ xfssyncd(
list_del(&work->w_list);
if (work == &mp->m_sync_work)
continue;
- kmem_free(work, sizeof(struct bhv_vfs_sync_work));
+ kmem_free(work);
}
}
@@ -1081,18 +1084,76 @@ xfssyncd(
}
STATIC void
+xfs_free_fsname(
+ struct xfs_mount *mp)
+{
+ kfree(mp->m_fsname);
+ kfree(mp->m_rtname);
+ kfree(mp->m_logname);
+}
+
+STATIC void
xfs_fs_put_super(
struct super_block *sb)
{
struct xfs_mount *mp = XFS_M(sb);
+ struct xfs_inode *rip = mp->m_rootip;
+ int unmount_event_flags = 0;
int error;
kthread_stop(mp->m_sync_task);
xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
- error = xfs_unmount(mp, 0, NULL);
- if (error)
- printk("XFS: unmount got error=%d\n", error);
+
+#ifdef HAVE_DMAPI
+ if (mp->m_flags & XFS_MOUNT_DMAPI) {
+ unmount_event_flags =
+ (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
+ 0 : DM_FLAGS_UNWANTED;
+ /*
+ * Ignore error from dmapi here, first unmount is not allowed
+ * to fail anyway, and second we wouldn't want to fail a
+ * unmount because of dmapi.
+ */
+ XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
+ NULL, NULL, 0, 0, unmount_event_flags);
+ }
+#endif
+
+ /*
+ * Blow away any referenced inode in the filestreams cache.
+ * This can and will cause log traffic as inodes go inactive
+ * here.
+ */
+ xfs_filestream_unmount(mp);
+
+ XFS_bflush(mp->m_ddev_targp);
+ error = xfs_unmount_flush(mp, 0);
+ WARN_ON(error);
+
+ /*
+ * If we're forcing a shutdown, typically because of a media error,
+ * we want to make sure we invalidate dirty pages that belong to
+ * referenced vnodes as well.
+ */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
+ ASSERT(error != EFSCORRUPTED);
+ }
+
+ if (mp->m_flags & XFS_MOUNT_DMAPI) {
+ XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
+ unmount_event_flags);
+ }
+
+ xfs_unmountfs(mp);
+ xfs_freesb(mp);
+ xfs_icsb_destroy_counters(mp);
+ xfs_close_devices(mp);
+ xfs_qmops_put(mp);
+ xfs_dmops_put(mp);
+ xfs_free_fsname(mp);
+ kfree(mp);
}
STATIC void
@@ -1215,14 +1276,54 @@ xfs_fs_remount(
char *options)
{
struct xfs_mount *mp = XFS_M(sb);
- struct xfs_mount_args *args = xfs_args_allocate(sb, 0);
- int error;
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
- error = xfs_parseargs(mp, options, args, 1);
- if (!error)
- error = xfs_mntupdate(mp, flags, args);
- kmem_free(args, sizeof(*args));
- return -error;
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_barrier:
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+
+ /*
+ * Test if barriers are actually working if we can,
+ * else delay this check until the filesystem is
+ * marked writeable.
+ */
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY))
+ xfs_mountfs_check_barriers(mp);
+ break;
+ case Opt_nobarrier:
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ break;
+ default:
+ printk(KERN_INFO
+ "XFS: mount option \"%s\" not supported for remount\n", p);
+ return -EINVAL;
+ }
+ }
+
+ /* rw/ro -> rw */
+ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+ mp->m_flags &= ~XFS_MOUNT_RDONLY;
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_mountfs_check_barriers(mp);
+ }
+
+ /* rw -> ro */
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+ xfs_filestream_flush(mp);
+ xfs_sync(mp, SYNC_DATA_QUIESCE);
+ xfs_attr_quiesce(mp);
+ mp->m_flags |= XFS_MOUNT_RDONLY;
+ }
+
+ return 0;
}
/*
@@ -1299,6 +1400,245 @@ xfs_fs_setxquota(
Q_XSETPQLIM), id, (caddr_t)fdq);
}
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ */
+STATIC int
+xfs_start_flags(
+ struct xfs_mount_args *ap,
+ struct xfs_mount *mp)
+{
+ int error;
+
+ /* Values are in BBs */
+ if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+ /*
+ * At this point the superblock has not been read
+ * in, therefore we do not know the block size.
+ * Before the mount call ends we will convert
+ * these to FSBs.
+ */
+ mp->m_dalign = ap->sunit;
+ mp->m_swidth = ap->swidth;
+ }
+
+ if (ap->logbufs != -1 &&
+ ap->logbufs != 0 &&
+ (ap->logbufs < XLOG_MIN_ICLOGS ||
+ ap->logbufs > XLOG_MAX_ICLOGS)) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufs value: %d [not %d-%d]",
+ ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+ return XFS_ERROR(EINVAL);
+ }
+ mp->m_logbufs = ap->logbufs;
+ if (ap->logbufsize != -1 &&
+ ap->logbufsize != 0 &&
+ (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
+ ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
+ !is_power_of_2(ap->logbufsize))) {
+ cmn_err(CE_WARN,
+ "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+ ap->logbufsize);
+ return XFS_ERROR(EINVAL);
+ }
+
+ error = ENOMEM;
+
+ mp->m_logbsize = ap->logbufsize;
+ mp->m_fsname_len = strlen(ap->fsname) + 1;
+
+ mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
+ if (!mp->m_fsname)
+ goto out;
+
+ if (ap->rtname[0]) {
+ mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
+ if (!mp->m_rtname)
+ goto out_free_fsname;
+
+ }
+
+ if (ap->logname[0]) {
+ mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
+ if (!mp->m_logname)
+ goto out_free_rtname;
+ }
+
+ if (ap->flags & XFSMNT_WSYNC)
+ mp->m_flags |= XFS_MOUNT_WSYNC;
+#if XFS_BIG_INUMS
+ if (ap->flags & XFSMNT_INO64) {
+ mp->m_flags |= XFS_MOUNT_INO64;
+ mp->m_inoadd = XFS_INO64_OFFSET;
+ }
+#endif
+ if (ap->flags & XFSMNT_RETERR)
+ mp->m_flags |= XFS_MOUNT_RETERR;
+ if (ap->flags & XFSMNT_NOALIGN)
+ mp->m_flags |= XFS_MOUNT_NOALIGN;
+ if (ap->flags & XFSMNT_SWALLOC)
+ mp->m_flags |= XFS_MOUNT_SWALLOC;
+ if (ap->flags & XFSMNT_OSYNCISOSYNC)
+ mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
+ if (ap->flags & XFSMNT_32BITINODES)
+ mp->m_flags |= XFS_MOUNT_32BITINODES;
+
+ if (ap->flags & XFSMNT_IOSIZE) {
+ if (ap->iosizelog > XFS_MAX_IO_LOG ||
+ ap->iosizelog < XFS_MIN_IO_LOG) {
+ cmn_err(CE_WARN,
+ "XFS: invalid log iosize: %d [not %d-%d]",
+ ap->iosizelog, XFS_MIN_IO_LOG,
+ XFS_MAX_IO_LOG);
+ return XFS_ERROR(EINVAL);
+ }
+
+ mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+ mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
+ }
+
+ if (ap->flags & XFSMNT_IKEEP)
+ mp->m_flags |= XFS_MOUNT_IKEEP;
+ if (ap->flags & XFSMNT_DIRSYNC)
+ mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ if (ap->flags & XFSMNT_ATTR2)
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+ if (ap->flags & XFSMNT_NOATTR2)
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
+
+ if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
+ mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+
+ /*
+ * no recovery flag requires a read-only mount
+ */
+ if (ap->flags & XFSMNT_NORECOVERY) {
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ cmn_err(CE_WARN,
+ "XFS: tried to mount a FS read-write without recovery!");
+ return XFS_ERROR(EINVAL);
+ }
+ mp->m_flags |= XFS_MOUNT_NORECOVERY;
+ }
+
+ if (ap->flags & XFSMNT_NOUUID)
+ mp->m_flags |= XFS_MOUNT_NOUUID;
+ if (ap->flags & XFSMNT_BARRIER)
+ mp->m_flags |= XFS_MOUNT_BARRIER;
+ else
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+
+ if (ap->flags2 & XFSMNT2_FILESTREAMS)
+ mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+
+ if (ap->flags & XFSMNT_DMAPI)
+ mp->m_flags |= XFS_MOUNT_DMAPI;
+ return 0;
+
+
+ out_free_rtname:
+ kfree(mp->m_rtname);
+ out_free_fsname:
+ kfree(mp->m_fsname);
+ out:
+ return error;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+ struct xfs_mount_args *ap,
+ struct xfs_mount *mp)
+{
+ int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+ /* Fail a mount where the logbuf is smaller then the log stripe */
+ if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+ if ((ap->logbufsize <= 0) &&
+ (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+ mp->m_logbsize = mp->m_sb.sb_logsunit;
+ } else if (ap->logbufsize > 0 &&
+ ap->logbufsize < mp->m_sb.sb_logsunit) {
+ cmn_err(CE_WARN,
+ "XFS: logbuf size must be greater than or equal to log stripe size");
+ return XFS_ERROR(EINVAL);
+ }
+ } else {
+ /* Fail a mount if the logbuf is larger than 32K */
+ if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+ cmn_err(CE_WARN,
+ "XFS: logbuf size for version 1 logs must be 16K or 32K");
+ return XFS_ERROR(EINVAL);
+ }
+ }
+
+ /*
+ * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+ * told by noattr2 to turn it off
+ */
+ if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+ !(ap->flags & XFSMNT_NOATTR2))
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+
+ /*
+ * prohibit r/w mounts of read-only filesystems
+ */
+ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+ cmn_err(CE_WARN,
+ "XFS: cannot mount a read-only filesystem as read-write");
+ return XFS_ERROR(EROFS);
+ }
+
+ /*
+ * check for shared mount.
+ */
+ if (ap->flags & XFSMNT_SHARED) {
+ if (!xfs_sb_version_hasshared(&mp->m_sb))
+ return XFS_ERROR(EINVAL);
+
+ /*
+ * For IRIX 6.5, shared mounts must have the shared
+ * version bit set, have the persistent readonly
+ * field set, must be version 0 and can only be mounted
+ * read-only.
+ */
+ if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
+ (mp->m_sb.sb_shared_vn != 0))
+ return XFS_ERROR(EINVAL);
+
+ mp->m_flags |= XFS_MOUNT_SHARED;
+
+ /*
+ * Shared XFS V0 can't deal with DMI. Return EINVAL.
+ */
+ if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
+ return XFS_ERROR(EINVAL);
+ }
+
+ if (ap->flags & XFSMNT_UQUOTA) {
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ if (ap->flags & XFSMNT_UQUOTAENF)
+ mp->m_qflags |= XFS_UQUOTA_ENFD;
+ }
+
+ if (ap->flags & XFSMNT_GQUOTA) {
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ if (ap->flags & XFSMNT_GQUOTAENF)
+ mp->m_qflags |= XFS_OQUOTA_ENFD;
+ } else if (ap->flags & XFSMNT_PQUOTA) {
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+ if (ap->flags & XFSMNT_PQUOTAENF)
+ mp->m_qflags |= XFS_OQUOTA_ENFD;
+ }
+
+ return 0;
+}
+
STATIC int
xfs_fs_fill_super(
struct super_block *sb,
@@ -1307,11 +1647,21 @@ xfs_fs_fill_super(
{
struct inode *root;
struct xfs_mount *mp = NULL;
- struct xfs_mount_args *args = xfs_args_allocate(sb, silent);
- int error;
+ struct xfs_mount_args *args;
+ int flags = 0, error = ENOMEM;
- mp = xfs_mount_init();
+ args = xfs_args_allocate(sb, silent);
+ if (!args)
+ return -ENOMEM;
+ mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+ if (!mp)
+ goto out_free_args;
+
+ spin_lock_init(&mp->m_sb_lock);
+ mutex_init(&mp->m_ilock);
+ mutex_init(&mp->m_growlock);
+ atomic_set(&mp->m_active_trans, 0);
INIT_LIST_HEAD(&mp->m_sync_list);
spin_lock_init(&mp->m_sync_lock);
init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1324,16 +1674,60 @@ xfs_fs_fill_super(
error = xfs_parseargs(mp, (char *)data, args, 0);
if (error)
- goto fail_vfsop;
+ goto out_free_mp;
sb_min_blocksize(sb, BBSIZE);
+ sb->s_xattr = xfs_xattr_handlers;
sb->s_export_op = &xfs_export_operations;
sb->s_qcop = &xfs_quotactl_operations;
sb->s_op = &xfs_super_operations;
- error = xfs_mount(mp, args, NULL);
+ error = xfs_dmops_get(mp, args);
+ if (error)
+ goto out_free_mp;
+ error = xfs_qmops_get(mp, args);
+ if (error)
+ goto out_put_dmops;
+
+ if (args->flags & XFSMNT_QUIET)
+ flags |= XFS_MFSI_QUIET;
+
+ error = xfs_open_devices(mp, args);
+ if (error)
+ goto out_put_qmops;
+
+ if (xfs_icsb_init_counters(mp))
+ mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+
+ /*
+ * Setup flags based on mount(2) options and then the superblock
+ */
+ error = xfs_start_flags(args, mp);
+ if (error)
+ goto out_free_fsname;
+ error = xfs_readsb(mp, flags);
+ if (error)
+ goto out_free_fsname;
+ error = xfs_finish_flags(args, mp);
if (error)
- goto fail_vfsop;
+ goto out_free_sb;
+
+ error = xfs_setup_devices(mp);
+ if (error)
+ goto out_free_sb;
+
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_mountfs_check_barriers(mp);
+
+ error = xfs_filestream_mount(mp);
+ if (error)
+ goto out_free_sb;
+
+ error = xfs_mountfs(mp);
+ if (error)
+ goto out_filestream_unmount;
+
+ XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
sb->s_dirt = 1;
sb->s_magic = XFS_SB_MAGIC;
@@ -1343,7 +1737,7 @@ xfs_fs_fill_super(
sb->s_time_gran = 1;
set_posix_acl_flag(sb);
- root = igrab(mp->m_rootip->i_vnode);
+ root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = ENOENT;
goto fail_unmount;
@@ -1368,10 +1762,28 @@ xfs_fs_fill_super(
xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
- kmem_free(args, sizeof(*args));
+ kfree(args);
return 0;
-fail_vnrele:
+ out_filestream_unmount:
+ xfs_filestream_unmount(mp);
+ out_free_sb:
+ xfs_freesb(mp);
+ out_free_fsname:
+ xfs_free_fsname(mp);
+ xfs_icsb_destroy_counters(mp);
+ xfs_close_devices(mp);
+ out_put_qmops:
+ xfs_qmops_put(mp);
+ out_put_dmops:
+ xfs_dmops_put(mp);
+ out_free_mp:
+ kfree(mp);
+ out_free_args:
+ kfree(args);
+ return -error;
+
+ fail_vnrele:
if (sb->s_root) {
dput(sb->s_root);
sb->s_root = NULL;
@@ -1379,12 +1791,20 @@ fail_vnrele:
iput(root);
}
-fail_unmount:
- xfs_unmount(mp, 0, NULL);
+ fail_unmount:
+ /*
+ * Blow away any referenced inode in the filestreams cache.
+ * This can and will cause log traffic as inodes go inactive
+ * here.
+ */
+ xfs_filestream_unmount(mp);
+
+ XFS_bflush(mp->m_ddev_targp);
+ error = xfs_unmount_flush(mp, 0);
+ WARN_ON(error);
-fail_vfsop:
- kmem_free(args, sizeof(*args));
- return -error;
+ xfs_unmountfs(mp);
+ goto out_free_sb;
}
STATIC int
@@ -1429,9 +1849,235 @@ static struct file_system_type xfs_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
+STATIC int __init
+xfs_alloc_trace_bufs(void)
+{
+#ifdef XFS_ALLOC_TRACE
+ xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_alloc_trace_buf)
+ goto out;
+#endif
+#ifdef XFS_BMAP_TRACE
+ xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_bmap_trace_buf)
+ goto out_free_alloc_trace;
+#endif
+#ifdef XFS_BMBT_TRACE
+ xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_bmbt_trace_buf)
+ goto out_free_bmap_trace;
+#endif
+#ifdef XFS_ATTR_TRACE
+ xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_attr_trace_buf)
+ goto out_free_bmbt_trace;
+#endif
+#ifdef XFS_DIR2_TRACE
+ xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
+ if (!xfs_dir2_trace_buf)
+ goto out_free_attr_trace;
+#endif
+
+ return 0;
+
+#ifdef XFS_DIR2_TRACE
+ out_free_attr_trace:
+#endif
+#ifdef XFS_ATTR_TRACE
+ ktrace_free(xfs_attr_trace_buf);
+ out_free_bmbt_trace:
+#endif
+#ifdef XFS_BMBT_TRACE
+ ktrace_free(xfs_bmbt_trace_buf);
+ out_free_bmap_trace:
+#endif
+#ifdef XFS_BMAP_TRACE
+ ktrace_free(xfs_bmap_trace_buf);
+ out_free_alloc_trace:
+#endif
+#ifdef XFS_ALLOC_TRACE
+ ktrace_free(xfs_alloc_trace_buf);
+ out:
+#endif
+ return -ENOMEM;
+}
+
+STATIC void
+xfs_free_trace_bufs(void)
+{
+#ifdef XFS_DIR2_TRACE
+ ktrace_free(xfs_dir2_trace_buf);
+#endif
+#ifdef XFS_ATTR_TRACE
+ ktrace_free(xfs_attr_trace_buf);
+#endif
+#ifdef XFS_BMBT_TRACE
+ ktrace_free(xfs_bmbt_trace_buf);
+#endif
+#ifdef XFS_BMAP_TRACE
+ ktrace_free(xfs_bmap_trace_buf);
+#endif
+#ifdef XFS_ALLOC_TRACE
+ ktrace_free(xfs_alloc_trace_buf);
+#endif
+}
+
+STATIC int __init
+xfs_init_zones(void)
+{
+ xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+ KM_ZONE_SPREAD,
+ xfs_fs_inode_init_once);
+ if (!xfs_vnode_zone)
+ goto out;
+
+ xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+ if (!xfs_ioend_zone)
+ goto out_destroy_vnode_zone;
+
+ xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+ xfs_ioend_zone);
+ if (!xfs_ioend_pool)
+ goto out_destroy_ioend_zone;
+
+ xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+ "xfs_log_ticket");
+ if (!xfs_log_ticket_zone)
+ goto out_destroy_ioend_pool;
+
+ xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+ "xfs_bmap_free_item");
+ if (!xfs_bmap_free_item_zone)
+ goto out_destroy_log_ticket_zone;
+ xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+ "xfs_btree_cur");
+ if (!xfs_btree_cur_zone)
+ goto out_destroy_bmap_free_item_zone;
+
+ xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+ "xfs_da_state");
+ if (!xfs_da_state_zone)
+ goto out_destroy_btree_cur_zone;
+
+ xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+ if (!xfs_dabuf_zone)
+ goto out_destroy_da_state_zone;
+
+ xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+ if (!xfs_ifork_zone)
+ goto out_destroy_dabuf_zone;
+
+ xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+ if (!xfs_trans_zone)
+ goto out_destroy_ifork_zone;
+
+ /*
+ * The size of the zone allocated buf log item is the maximum
+ * size possible under XFS. This wastes a little bit of memory,
+ * but it is much faster.
+ */
+ xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+ (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
+ NBWORD) * sizeof(int))), "xfs_buf_item");
+ if (!xfs_buf_item_zone)
+ goto out_destroy_trans_zone;
+
+ xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+ ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+ sizeof(xfs_extent_t))), "xfs_efd_item");
+ if (!xfs_efd_zone)
+ goto out_destroy_buf_item_zone;
+
+ xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+ ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+ sizeof(xfs_extent_t))), "xfs_efi_item");
+ if (!xfs_efi_zone)
+ goto out_destroy_efd_zone;
+
+ xfs_inode_zone =
+ kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+ KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+ KM_ZONE_SPREAD, NULL);
+ if (!xfs_inode_zone)
+ goto out_destroy_efi_zone;
+
+ xfs_ili_zone =
+ kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+ KM_ZONE_SPREAD, NULL);
+ if (!xfs_ili_zone)
+ goto out_destroy_inode_zone;
+
+#ifdef CONFIG_XFS_POSIX_ACL
+ xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
+ if (!xfs_acl_zone)
+ goto out_destroy_ili_zone;
+#endif
+
+ return 0;
+
+#ifdef CONFIG_XFS_POSIX_ACL
+ out_destroy_ili_zone:
+#endif
+ kmem_zone_destroy(xfs_ili_zone);
+ out_destroy_inode_zone:
+ kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+ kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+ kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+ kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_trans_zone:
+ kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+ kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+ kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+ kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+ kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+ kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+ kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+ mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+ kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+ kmem_zone_destroy(xfs_vnode_zone);
+ out:
+ return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+#ifdef CONFIG_XFS_POSIX_ACL
+ kmem_zone_destroy(xfs_acl_zone);
+#endif
+ kmem_zone_destroy(xfs_ili_zone);
+ kmem_zone_destroy(xfs_inode_zone);
+ kmem_zone_destroy(xfs_efi_zone);
+ kmem_zone_destroy(xfs_efd_zone);
+ kmem_zone_destroy(xfs_buf_item_zone);
+ kmem_zone_destroy(xfs_trans_zone);
+ kmem_zone_destroy(xfs_ifork_zone);
+ kmem_zone_destroy(xfs_dabuf_zone);
+ kmem_zone_destroy(xfs_da_state_zone);
+ kmem_zone_destroy(xfs_btree_cur_zone);
+ kmem_zone_destroy(xfs_bmap_free_item_zone);
+ kmem_zone_destroy(xfs_log_ticket_zone);
+ mempool_destroy(xfs_ioend_pool);
+ kmem_zone_destroy(xfs_ioend_zone);
+ kmem_zone_destroy(xfs_vnode_zone);
+
+}
STATIC int __init
-init_xfs_fs( void )
+init_xfs_fs(void)
{
int error;
static char message[] __initdata = KERN_INFO \
@@ -1440,42 +2086,73 @@ init_xfs_fs( void )
printk(message);
ktrace_init(64);
+ vn_init();
+ xfs_dir_startup();
error = xfs_init_zones();
- if (error < 0)
- goto undo_zones;
+ if (error)
+ goto out;
+
+ error = xfs_alloc_trace_bufs();
+ if (error)
+ goto out_destroy_zones;
+
+ error = xfs_mru_cache_init();
+ if (error)
+ goto out_free_trace_buffers;
+
+ error = xfs_filestream_init();
+ if (error)
+ goto out_mru_cache_uninit;
error = xfs_buf_init();
- if (error < 0)
- goto undo_buffers;
+ if (error)
+ goto out_filestream_uninit;
+
+ error = xfs_init_procfs();
+ if (error)
+ goto out_buf_terminate;
+
+ error = xfs_sysctl_register();
+ if (error)
+ goto out_cleanup_procfs;
- vn_init();
- xfs_init();
- uuid_init();
vfs_initquota();
error = register_filesystem(&xfs_fs_type);
if (error)
- goto undo_register;
+ goto out_sysctl_unregister;
return 0;
-undo_register:
+ out_sysctl_unregister:
+ xfs_sysctl_unregister();
+ out_cleanup_procfs:
+ xfs_cleanup_procfs();
+ out_buf_terminate:
xfs_buf_terminate();
-
-undo_buffers:
+ out_filestream_uninit:
+ xfs_filestream_uninit();
+ out_mru_cache_uninit:
+ xfs_mru_cache_uninit();
+ out_free_trace_buffers:
+ xfs_free_trace_bufs();
+ out_destroy_zones:
xfs_destroy_zones();
-
-undo_zones:
+ out:
return error;
}
STATIC void __exit
-exit_xfs_fs( void )
+exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
- xfs_cleanup();
+ xfs_sysctl_unregister();
+ xfs_cleanup_procfs();
xfs_buf_terminate();
+ xfs_filestream_uninit();
+ xfs_mru_cache_uninit();
+ xfs_free_trace_bufs();
xfs_destroy_zones();
ktrace_uninit();
}
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d3303..fe2ef4e6a0f9 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -101,18 +101,13 @@ struct block_device;
extern __uint64_t xfs_max_file_offset(unsigned int);
-extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
- struct xfs_inode *ip);
-
extern void xfs_flush_inode(struct xfs_inode *);
extern void xfs_flush_device(struct xfs_inode *);
-extern int xfs_blkdev_get(struct xfs_mount *, const char *,
- struct block_device **);
-extern void xfs_blkdev_put(struct block_device *);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern const struct export_operations xfs_export_operations;
+extern struct xattr_handler *xfs_xattr_handlers[];
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05c..7dacb5bbde3f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
{}
};
-void
+int
xfs_sysctl_register(void)
{
xfs_table_header = register_sysctl_table(xfs_root_table);
+ if (!xfs_table_header)
+ return -ENOMEM;
+ return 0;
}
void
xfs_sysctl_unregister(void)
{
- if (xfs_table_header)
- unregister_sysctl_table(xfs_table_header);
+ unregister_sysctl_table(xfs_table_header);
}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6f..4aadb8056c37 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
extern xfs_param_t xfs_params;
#ifdef CONFIG_SYSCTL
-extern void xfs_sysctl_register(void);
+extern int xfs_sysctl_register(void);
extern void xfs_sysctl_unregister(void);
#else
-# define xfs_sysctl_register() do { } while (0)
+# define xfs_sysctl_register() (0)
# define xfs_sysctl_unregister() do { } while (0)
#endif /* CONFIG_SYSCTL */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe007338..b52528bbbfff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -33,7 +33,7 @@
/*
- * Dedicated vnode inactive/reclaim sync semaphores.
+ * Dedicated vnode inactive/reclaim sync wait queues.
* Prime number of hash buckets since address is used as the key.
*/
#define NVSYNC 37
@@ -82,74 +82,6 @@ vn_ioerror(
xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
}
-/*
- * Revalidate the Linux inode from the XFS inode.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
- */
-int
-vn_revalidate(
- bhv_vnode_t *vp)
-{
- struct inode *inode = vn_to_inode(vp);
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- unsigned long xflags;
-
- xfs_itrace_entry(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- inode->i_mode = ip->i_d.di_mode;
- inode->i_uid = ip->i_d.di_uid;
- inode->i_gid = ip->i_d.di_gid;
- inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
-
- xflags = xfs_ip2xflags(ip);
- if (xflags & XFS_XFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- xfs_iflags_clear(ip, XFS_IMODIFIED);
- return 0;
-}
-
-/*
- * Add a reference to a referenced vnode.
- */
-bhv_vnode_t *
-vn_hold(
- bhv_vnode_t *vp)
-{
- struct inode *inode;
-
- XFS_STATS_INC(vn_hold);
-
- inode = igrab(vn_to_inode(vp));
- ASSERT(inode);
-
- return vp;
-}
-
#ifdef XFS_INODE_TRACE
/*
@@ -158,7 +90,7 @@ vn_hold(
*/
static inline int xfs_icount(struct xfs_inode *ip)
{
- bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
+ struct inode *vp = VFS_I(ip);
if (vp)
return vn_count(vp);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9b..683ce16210ff 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,24 +19,9 @@
#define __XFS_VNODE_H__
struct file;
-struct bhv_vattr;
struct xfs_iomap;
struct attrlist_cursor_kern;
-typedef struct inode bhv_vnode_t;
-
-/*
- * Vnode to Linux inode mapping.
- */
-static inline bhv_vnode_t *vn_from_inode(struct inode *inode)
-{
- return inode;
-}
-static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
-{
- return vnode;
-}
-
/*
* Return values for xfs_inactive. A return value of
* VN_INACTIVE_NOCACHE implies that the file system behavior
@@ -66,87 +51,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
Prevent VM access to the pages until
the operation completes. */
-/*
- * Vnode attributes. va_mask indicates those attributes the caller
- * wants to set or extract.
- */
-typedef struct bhv_vattr {
- int va_mask; /* bit-mask of attributes present */
- mode_t va_mode; /* file access mode and type */
- xfs_nlink_t va_nlink; /* number of references to file */
- uid_t va_uid; /* owner user id */
- gid_t va_gid; /* owner group id */
- xfs_ino_t va_nodeid; /* file id */
- xfs_off_t va_size; /* file size in bytes */
- u_long va_blocksize; /* blocksize preferred for i/o */
- struct timespec va_atime; /* time of last access */
- struct timespec va_mtime; /* time of last modification */
- struct timespec va_ctime; /* time file changed */
- u_int va_gen; /* generation number of file */
- xfs_dev_t va_rdev; /* device the special file represents */
- __int64_t va_nblocks; /* number of blocks allocated */
- u_long va_xflags; /* random extended file flags */
- u_long va_extsize; /* file extent size */
- u_long va_nextents; /* number of extents in file */
- u_long va_anextents; /* number of attr extents in file */
- prid_t va_projid; /* project id */
-} bhv_vattr_t;
-
-/*
- * setattr or getattr attributes
- */
-#define XFS_AT_TYPE 0x00000001
-#define XFS_AT_MODE 0x00000002
-#define XFS_AT_UID 0x00000004
-#define XFS_AT_GID 0x00000008
-#define XFS_AT_FSID 0x00000010
-#define XFS_AT_NODEID 0x00000020
-#define XFS_AT_NLINK 0x00000040
-#define XFS_AT_SIZE 0x00000080
-#define XFS_AT_ATIME 0x00000100
-#define XFS_AT_MTIME 0x00000200
-#define XFS_AT_CTIME 0x00000400
-#define XFS_AT_RDEV 0x00000800
-#define XFS_AT_BLKSIZE 0x00001000
-#define XFS_AT_NBLOCKS 0x00002000
-#define XFS_AT_VCODE 0x00004000
-#define XFS_AT_MAC 0x00008000
-#define XFS_AT_UPDATIME 0x00010000
-#define XFS_AT_UPDMTIME 0x00020000
-#define XFS_AT_UPDCTIME 0x00040000
-#define XFS_AT_ACL 0x00080000
-#define XFS_AT_CAP 0x00100000
-#define XFS_AT_INF 0x00200000
-#define XFS_AT_XFLAGS 0x00400000
-#define XFS_AT_EXTSIZE 0x00800000
-#define XFS_AT_NEXTENTS 0x01000000
-#define XFS_AT_ANEXTENTS 0x02000000
-#define XFS_AT_PROJID 0x04000000
-#define XFS_AT_SIZE_NOPERM 0x08000000
-#define XFS_AT_GENCOUNT 0x10000000
-
-#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
- XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
- XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
- XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
- XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
- XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
-
-#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
- XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
- XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
- XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
-
-#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
-
-#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
-
-#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
- XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
- XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
extern void vn_init(void);
-extern int vn_revalidate(bhv_vnode_t *);
/*
* Yeah, these don't take vnode anymore at all, all this should be
@@ -156,57 +62,52 @@ extern void vn_iowait(struct xfs_inode *ip);
extern void vn_iowake(struct xfs_inode *ip);
extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
-static inline int vn_count(bhv_vnode_t *vp)
+static inline int vn_count(struct inode *vp)
{
- return atomic_read(&vn_to_inode(vp)->i_count);
+ return atomic_read(&vp->i_count);
}
-/*
- * Vnode reference counting functions (and macros for compatibility).
- */
-extern bhv_vnode_t *vn_hold(bhv_vnode_t *);
+#define IHOLD(ip) \
+do { \
+ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
+ atomic_inc(&(VFS_I(ip)->i_count)); \
+ xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
+} while (0)
-#if defined(XFS_INODE_TRACE)
-#define VN_HOLD(vp) \
- ((void)vn_hold(vp), \
- xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address))
-#define VN_RELE(vp) \
- (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \
- iput(vn_to_inode(vp)))
-#else
-#define VN_HOLD(vp) ((void)vn_hold(vp))
-#define VN_RELE(vp) (iput(vn_to_inode(vp)))
-#endif
+#define IRELE(ip) \
+do { \
+ xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
+ iput(VFS_I(ip)); \
+} while (0)
-static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp)
+static inline struct inode *vn_grab(struct inode *vp)
{
- struct inode *inode = igrab(vn_to_inode(vp));
- return inode ? vn_from_inode(inode) : NULL;
+ return igrab(vp);
}
/*
* Dealing with bad inodes
*/
-static inline int VN_BAD(bhv_vnode_t *vp)
+static inline int VN_BAD(struct inode *vp)
{
- return is_bad_inode(vn_to_inode(vp));
+ return is_bad_inode(vp);
}
/*
* Extracting atime values in various formats
*/
-static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime)
+static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime)
{
bs_atime->tv_sec = vp->i_atime.tv_sec;
bs_atime->tv_nsec = vp->i_atime.tv_nsec;
}
-static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts)
+static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts)
{
*ts = vp->i_atime;
}
-static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
+static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
{
*tt = vp->i_atime.tv_sec;
}
@@ -214,20 +115,11 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
/*
* Some useful predicates.
*/
-#define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping)
-#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages)
-#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \
+#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
+#define VN_CACHED(vp) (vp->i_mapping->nrpages)
+#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
PAGECACHE_TAG_DIRTY)
-/*
- * Flags to vop_setattr/getattr.
- */
-#define ATTR_UTIME 0x01 /* non-default utime(2) request */
-#define ATTR_DMI 0x08 /* invocation from a DMI function */
-#define ATTR_LAZY 0x80 /* set/get attributes lazily */
-#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
-#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */
-#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
/*
* Tracking vnode activity.
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 000000000000..964621fde6ed
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+/*
+ * ACL handling. Should eventually be moved into xfs_acl.c
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+ if (strcmp(name, "posix_acl_access") == 0)
+ return _ACL_TYPE_ACCESS;
+ else if (strcmp(name, "posix_acl_default") == 0)
+ return _ACL_TYPE_DEFAULT;
+ return -EINVAL;
+}
+
+/*
+ * Get system extended attributes which at the moment only
+ * includes Posix ACLs.
+ */
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ int acl;
+
+ acl = xfs_decode_acl(name);
+ if (acl < 0)
+ return acl;
+
+ return xfs_acl_vget(inode, buffer, size, acl);
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int acl;
+
+ acl = xfs_decode_acl(name);
+ if (acl < 0)
+ return acl;
+ if (flags & XATTR_CREATE)
+ return -EINVAL;
+
+ if (!value)
+ return xfs_acl_vremove(inode, acl);
+
+ return xfs_acl_vset(inode, (void *)value, size, acl);
+}
+
+static struct xattr_handler xfs_xattr_system_handler = {
+ .prefix = XATTR_SYSTEM_PREFIX,
+ .get = xfs_xattr_system_get,
+ .set = xfs_xattr_system_set,
+};
+
+
+/*
+ * Real xattr handling. The only difference between the namespaces is
+ * a flag passed to the low-level attr code.
+ */
+
+static int
+__xfs_xattr_get(struct inode *inode, const char *name,
+ void *value, size_t size, int xflags)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ int error, asize = size;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ /* Convert Linux syscall to XFS internal ATTR flags */
+ if (!size) {
+ xflags |= ATTR_KERNOVAL;
+ value = NULL;
+ }
+
+ error = -xfs_attr_get(ip, name, value, &asize, xflags);
+ if (error)
+ return error;
+ return asize;
+}
+
+static int
+__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags, int xflags)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ /* Convert Linux syscall to XFS internal ATTR flags */
+ if (flags & XATTR_CREATE)
+ xflags |= ATTR_CREATE;
+ if (flags & XATTR_REPLACE)
+ xflags |= ATTR_REPLACE;
+
+ if (!value)
+ return -xfs_attr_remove(ip, name, xflags);
+ return -xfs_attr_set(ip, name, (void *)value, size, xflags);
+}
+
+static int
+xfs_xattr_user_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return __xfs_xattr_get(inode, name, value, size, 0);
+}
+
+static int
+xfs_xattr_user_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __xfs_xattr_set(inode, name, value, size, flags, 0);
+}
+
+static struct xattr_handler xfs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = xfs_xattr_user_get,
+ .set = xfs_xattr_user_set,
+};
+
+
+static int
+xfs_xattr_trusted_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
+}
+
+static int
+xfs_xattr_trusted_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
+}
+
+static struct xattr_handler xfs_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = xfs_xattr_trusted_get,
+ .set = xfs_xattr_trusted_set,
+};
+
+
+static int
+xfs_xattr_secure_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
+}
+
+static int
+xfs_xattr_secure_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
+}
+
+static struct xattr_handler xfs_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = xfs_xattr_secure_get,
+ .set = xfs_xattr_secure_set,
+};
+
+
+struct xattr_handler *xfs_xattr_handlers[] = {
+ &xfs_xattr_user_handler,
+ &xfs_xattr_trusted_handler,
+ &xfs_xattr_security_handler,
+ &xfs_xattr_system_handler,
+ NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+ if (flags & XFS_ATTR_SECURE)
+ return sizeof("security");
+ else if (flags & XFS_ATTR_ROOT)
+ return sizeof("trusted");
+ else
+ return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+ if (flags & XFS_ATTR_SECURE)
+ return xfs_xattr_security_handler.prefix;
+ else if (flags & XFS_ATTR_ROOT)
+ return xfs_xattr_trusted_handler.prefix;
+ else
+ return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
+ char *name, int namelen, int valuelen, char *value)
+{
+ unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+ char *offset;
+ int arraytop;
+
+ ASSERT(context->count >= 0);
+
+ /*
+ * Only show root namespace entries if we are actually allowed to
+ * see them.
+ */
+ if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+ return 0;
+
+ arraytop = context->count + prefix_len + namelen + 1;
+ if (arraytop > context->firstu) {
+ context->count = -1; /* insufficient space */
+ return 1;
+ }
+ offset = (char *)context->alist + context->count;
+ strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+ offset += prefix_len;
+ strncpy(offset, name, namelen); /* real name */
+ offset += namelen;
+ *offset = '\0';
+ context->count += prefix_len + namelen + 1;
+ return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
+ char *name, int namelen, int valuelen, char *value)
+{
+ context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+ return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+ size_t size, ssize_t *result)
+{
+ char *p = data + *result;
+
+ *result += len;
+ if (!size)
+ return 0;
+ if (*result > size)
+ return -ERANGE;
+
+ strcpy(p, name);
+ return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+ struct xfs_attr_list_context context;
+ struct attrlist_cursor_kern cursor = { 0 };
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ /*
+ * First read the regular on-disk attributes.
+ */
+ memset(&context, 0, sizeof(context));
+ context.dp = XFS_I(inode);
+ context.cursor = &cursor;
+ context.resynch = 1;
+ context.alist = data;
+ context.bufsize = size;
+ context.firstu = context.bufsize;
+
+ if (size)
+ context.put_listent = xfs_xattr_put_listent;
+ else
+ context.put_listent = xfs_xattr_put_listent_sizes;
+
+ xfs_attr_list_int(&context);
+ if (context.count < 0)
+ return -ERANGE;
+
+ /*
+ * Then add the two synthetic ACL attributes.
+ */
+ if (xfs_acl_vhasacl_access(inode)) {
+ error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+ strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+ data, size, &context.count);
+ if (error)
+ return error;
+ }
+
+ if (xfs_acl_vhasacl_default(inode)) {
+ error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+ strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+ data, size, &context.count);
+ if (error)
+ return error;
+ }
+
+ return context.count;
+}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd5..f2705f2fd43c 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,11 +101,18 @@ xfs_qm_dqinit(
if (brandnewdquot) {
dqp->dq_flnext = dqp->dq_flprev = dqp;
mutex_init(&dqp->q_qlock);
- initnsema(&dqp->q_flock, 1, "fdq");
sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&dqp->q_flush);
+ complete(&dqp->q_flush);
+
#ifdef XFS_DQUOT_TRACE
- dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
+ dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
xfs_dqtrace_entry(dqp, "DQINIT");
#endif
} else {
@@ -150,7 +157,6 @@ xfs_qm_dqdestroy(
ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
mutex_destroy(&dqp->q_qlock);
- freesema(&dqp->q_flock);
sv_destroy(&dqp->q_pinwait);
#ifdef XFS_DQUOT_TRACE
@@ -431,7 +437,7 @@ xfs_qm_dqalloc(
* when it unlocks the inode. Since we want to keep the quota
* inode around, we bump the vnode ref count now.
*/
- VN_HOLD(XFS_ITOV(quotip));
+ IHOLD(quotip);
xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
nmaps = 1;
@@ -1211,7 +1217,7 @@ xfs_qm_dqflush(
int error;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+ ASSERT(!completion_done(&dqp->q_flush));
xfs_dqtrace_entry(dqp, "DQFLUSH");
/*
@@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done(
xfs_dqfunlock(dqp);
}
-
-int
-xfs_qm_dqflock_nowait(
- xfs_dquot_t *dqp)
-{
- int locked;
-
- locked = cpsema(&((dqp)->q_flock));
-
- /* XXX ifdef these out */
- if (locked)
- (dqp)->dq_flags |= XFS_DQ_FLOCKED;
- return (locked);
-}
-
-
int
xfs_qm_dqlock_nowait(
xfs_dquot_t *dqp)
{
- return (mutex_trylock(&((dqp)->q_qlock)));
+ return mutex_trylock(&dqp->q_qlock);
}
void
xfs_dqlock(
xfs_dquot_t *dqp)
{
- mutex_lock(&(dqp->q_qlock));
+ mutex_lock(&dqp->q_qlock);
}
void
@@ -1435,8 +1425,7 @@ xfs_dqlock2(
/* ARGSUSED */
int
xfs_qm_dqpurge(
- xfs_dquot_t *dqp,
- uint flags)
+ xfs_dquot_t *dqp)
{
xfs_dqhash_t *thishash;
xfs_mount_t *mp = dqp->q_mount;
@@ -1469,7 +1458,7 @@ xfs_qm_dqpurge(
* if we're turning off quotas. Basically, we need this flush
* lock, and are willing to block on it.
*/
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
/*
* Block on the flush lock after nudging dquot buffer,
* if it is incore.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e2..8958d0faf8d3 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -82,7 +82,7 @@ typedef struct xfs_dquot {
xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
mutex_t q_qlock; /* quota lock */
- sema_t q_flock; /* flush lock */
+ struct completion q_flush; /* flush completion queue */
uint q_pincount; /* pin count for this dquot */
sv_t q_pinwait; /* sync var for pinning */
#ifdef XFS_DQUOT_TRACE
@@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
/*
- * The following three routines simply manage the q_flock
- * semaphore embedded in the dquot. This semaphore synchronizes
- * processes attempting to flush the in-core dquot back to disk.
+ * Manage the q_flush completion queue embedded in the dquot. This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
*/
-#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\
- (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
-#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \
- vsema(&((dqp)->q_flock)); \
- (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
+static inline void xfs_dqflock(xfs_dquot_t *dqp)
+{
+ wait_for_completion(&dqp->q_flush);
+}
+
+static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
+{
+ return try_wait_for_completion(&dqp->q_flush);
+}
+
+static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+{
+ complete(&dqp->q_flush);
+}
-#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -164,10 +172,9 @@ extern void xfs_qm_dqprint(xfs_dquot_t *);
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int xfs_qm_dqpurge(xfs_dquot_t *, uint);
+extern int xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
xfs_disk_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca78412..f028644caa5e 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push(
dqp = logitem->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+ ASSERT(!completion_done(&dqp->q_flush));
/*
* Since we were able to lock the dquot's flush lock and
@@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf(
* inode flush completed and the inode was taken off the AIL.
* So, just get out.
*/
- if (!issemalocked(&(dqp->q_flock)) ||
+ if (completion_done(&dqp->q_flush) ||
((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
@@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf(
if (bp != NULL) {
if (XFS_BUF_ISDELAYWRITE(bp)) {
dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
- issemalocked(&(dqp->q_flock)));
+ !completion_done(&dqp->q_flush));
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
@@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock(
return (XFS_ITEM_LOCKED);
retval = XFS_ITEM_SUCCESS;
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
/*
* The dquot is already being flushed. It may have been
* flushed delayed write, however, and we don't want to
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
* xfs_trans_delete_ail() drops the AIL lock.
*/
xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
- kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
- kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
+ kmem_free(qfs);
+ kmem_free(qfe);
return (xfs_lsn_t)-1;
}
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c5..df0ffef9775a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
}
- kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
- kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
+ kmem_free(xqm->qm_usr_dqhtable);
+ kmem_free(xqm->qm_grp_dqhtable);
xqm->qm_usr_dqhtable = NULL;
xqm->qm_grp_dqhtable = NULL;
xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
#ifdef DEBUG
mutex_destroy(&qcheck_lock);
#endif
- kmem_free(xqm, sizeof(xfs_qm_t));
+ kmem_free(xqm);
}
/*
@@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy(
*/
void
xfs_qm_mount_quotas(
- xfs_mount_t *mp,
- int mfsi_flags)
+ xfs_mount_t *mp)
{
int error = 0;
uint sbf;
@@ -346,8 +345,7 @@ xfs_qm_mount_quotas(
/*
* If any of the quotas are not consistent, do a quotacheck.
*/
- if (XFS_QM_NEED_QUOTACHECK(mp) &&
- !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
+ if (XFS_QM_NEED_QUOTACHECK(mp)) {
error = xfs_qm_quotacheck(mp);
if (error) {
/* Quotacheck failed and disabled quotas. */
@@ -445,11 +443,11 @@ xfs_qm_unmount_quotas(
}
}
if (uqp) {
- XFS_PURGE_INODE(uqp);
+ IRELE(uqp);
mp->m_quotainfo->qi_uquotaip = NULL;
}
if (gqp) {
- XFS_PURGE_INODE(gqp);
+ IRELE(gqp);
mp->m_quotainfo->qi_gquotaip = NULL;
}
out:
@@ -484,7 +482,7 @@ again:
xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
/*
* If we can't grab the flush lock then check
* to see if the dquot has been flushed delayed
@@ -631,7 +629,7 @@ xfs_qm_dqpurge_int(
* freelist in INACTIVE state.
*/
nextdqp = dqp->MPL_NEXT;
- nmisses += xfs_qm_dqpurge(dqp, flags);
+ nmisses += xfs_qm_dqpurge(dqp);
dqp = nextdqp;
}
xfs_qm_mplist_unlock(mp);
@@ -1062,7 +1060,7 @@ xfs_qm_sync(
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
if (nowait) {
xfs_dqunlock(dqp);
continue;
@@ -1134,7 +1132,7 @@ xfs_qm_init_quotainfo(
* and change the superblock accordingly.
*/
if ((error = xfs_qm_init_quotainos(mp))) {
- kmem_free(qinf, sizeof(xfs_quotainfo_t));
+ kmem_free(qinf);
mp->m_quotainfo = NULL;
return error;
}
@@ -1240,15 +1238,15 @@ xfs_qm_destroy_quotainfo(
xfs_qm_list_destroy(&qi->qi_dqlist);
if (qi->qi_uquotaip) {
- XFS_PURGE_INODE(qi->qi_uquotaip);
+ IRELE(qi->qi_uquotaip);
qi->qi_uquotaip = NULL; /* paranoia */
}
if (qi->qi_gquotaip) {
- XFS_PURGE_INODE(qi->qi_gquotaip);
+ IRELE(qi->qi_gquotaip);
qi->qi_gquotaip = NULL;
}
mutex_destroy(&qi->qi_quotaofflock);
- kmem_free(qi, sizeof(xfs_quotainfo_t));
+ kmem_free(qi);
mp->m_quotainfo = NULL;
}
@@ -1394,7 +1392,7 @@ xfs_qm_qino_alloc(
* locked exclusively and joined to the transaction already.
*/
ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
- VN_HOLD(XFS_ITOV((*ip)));
+ IHOLD(*ip);
/*
* Make the changes in the superblock, and log those too.
@@ -1623,7 +1621,7 @@ xfs_qm_dqiterate(
break;
} while (nmaps > 0);
- kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
+ kmem_free(map);
return error;
}
@@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist(
* Try to grab the flush lock. If this dquot is in the process of
* getting flushed to disk, we don't want to reclaim it.
*/
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
xfs_dqunlock(dqp);
dqp = dqp->dq_flnext;
continue;
@@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void)
* Try to grab the flush lock. If this dquot is in the process of
* getting flushed to disk, we don't want to reclaim it.
*/
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
xfs_dqunlock(dqp);
continue;
}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index cd2300e374af..44f25349e478 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void xfs_qm_mount_quotas(xfs_mount_t *, int);
+extern void xfs_qm_mount_quotas(xfs_mount_t *);
extern int xfs_qm_quotacheck(xfs_mount_t *);
extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
extern int xfs_qm_unmount_quotas(xfs_mount_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index f4f6c4c861d7..eea2e60b456b 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -162,7 +162,7 @@ xfs_qm_newmount(
* mounting, and get on with the boring life
* without disk quotas.
*/
- xfs_qm_mount_quotas(mp, 0);
+ xfs_qm_mount_quotas(mp);
} else {
/*
* Clear the quota flags, but remember them. This
@@ -184,13 +184,12 @@ STATIC int
xfs_qm_endmount(
xfs_mount_t *mp,
uint needquotamount,
- uint quotaflags,
- int mfsi_flags)
+ uint quotaflags)
{
if (needquotamount) {
ASSERT(mp->m_qflags == 0);
mp->m_qflags = quotaflags;
- xfs_qm_mount_quotas(mp, mfsi_flags);
+ xfs_qm_mount_quotas(mp);
}
#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b6..1a3b803dfa55 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
* if we don't need them anymore.
*/
if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
- XFS_PURGE_INODE(XFS_QI_UQIP(mp));
+ IRELE(XFS_QI_UQIP(mp));
XFS_QI_UQIP(mp) = NULL;
}
if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
- XFS_PURGE_INODE(XFS_QI_GQIP(mp));
+ IRELE(XFS_QI_GQIP(mp));
XFS_QI_GQIP(mp) = NULL;
}
out_error:
@@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes(
{
xfs_inode_t *ip, *topino;
uint ireclaims;
- bhv_vnode_t *vp;
+ struct inode *vp;
boolean_t vnode_refd;
ASSERT(mp->m_quotainfo);
@@ -1059,7 +1059,7 @@ again:
ip = ip->i_mnext;
continue;
}
- vp = XFS_ITOV_NULL(ip);
+ vp = VFS_I(ip);
if (!vp) {
ASSERT(ip->i_udquot == NULL);
ASSERT(ip->i_gdquot == NULL);
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
xfs_dqtest_cmp(d);
e = (xfs_dqtest_t *) d->HL_NEXT;
- kmem_free(d, sizeof(xfs_dqtest_t));
+ kmem_free(d);
d = e;
}
h1 = &qmtest_gdqtab[i];
for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
xfs_dqtest_cmp(d);
e = (xfs_dqtest_t *) d->HL_NEXT;
- kmem_free(d, sizeof(xfs_dqtest_t));
+ kmem_free(d);
d = e;
}
}
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
} else {
cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
}
- kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
- kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+ kmem_free(qmtest_udqtab);
+ kmem_free(qmtest_gdqtab);
mutex_unlock(&qcheck_lock);
return (qmtest_nfails);
}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c565..c4fcea600bc2 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
#define XFS_IS_SUSER_DQUOT(dqp) \
(!((dqp)->q_core.d_id))
-#define XFS_PURGE_INODE(ip) \
- IRELE(ip);
-
#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
(((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
(((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508f..a34ef05489b1 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
if (sleep & KM_SLEEP)
panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
- kmem_free(ktp, sizeof(*ktp));
+ kmem_free(ktp);
return NULL;
}
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
} else {
entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
- kmem_free(ktp->kt_entries, entries_size);
+ kmem_free(ktp->kt_entries);
}
kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf8590..5830c040ea7e 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
*/
#include <xfs.h>
-static mutex_t uuid_monitor;
+static DEFINE_MUTEX(uuid_monitor);
static int uuid_table_size;
static uuid_t *uuid_table;
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
ASSERT(i < uuid_table_size);
mutex_unlock(&uuid_monitor);
}
-
-void __init
-uuid_init(void)
-{
- mutex_init(&uuid_monitor);
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199ba..cff5b607d445 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
unsigned char __u_bits[16];
} uuid_t;
-extern void uuid_init(void);
extern void uuid_create_nil(uuid_t *uuid);
extern int uuid_is_nil(uuid_t *uuid);
extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703a..b2f639a1416f 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -37,15 +37,15 @@
#include <linux/capability.h>
#include <linux/posix_acl_xattr.h>
-STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *);
+STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
STATIC void xfs_acl_get_endian(xfs_acl_t *);
STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
STATIC int xfs_acl_invalid(xfs_acl_t *);
STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *);
-STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *);
-STATIC int xfs_acl_allow_set(bhv_vnode_t *, int);
+STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
+STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
+STATIC int xfs_acl_allow_set(struct inode *, int);
kmem_zone_t *xfs_acl_zone;
@@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone;
*/
int
xfs_acl_vhasacl_access(
- bhv_vnode_t *vp)
+ struct inode *vp)
{
int error;
@@ -68,7 +68,7 @@ xfs_acl_vhasacl_access(
*/
int
xfs_acl_vhasacl_default(
- bhv_vnode_t *vp)
+ struct inode *vp)
{
int error;
@@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr(
int
xfs_acl_vget(
- bhv_vnode_t *vp,
+ struct inode *vp,
void *acl,
size_t size,
int kind)
@@ -217,7 +217,6 @@ xfs_acl_vget(
posix_acl_xattr_header *ext_acl = acl;
int flags = 0;
- VN_HOLD(vp);
if(size) {
if (!(_ACL_ALLOC(xfs_acl))) {
error = ENOMEM;
@@ -239,11 +238,10 @@ xfs_acl_vget(
goto out;
}
if (kind == _ACL_TYPE_ACCESS)
- xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
+ xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
}
out:
- VN_RELE(vp);
if(xfs_acl)
_ACL_FREE(xfs_acl);
return -error;
@@ -251,28 +249,26 @@ out:
int
xfs_acl_vremove(
- bhv_vnode_t *vp,
+ struct inode *vp,
int kind)
{
int error;
- VN_HOLD(vp);
error = xfs_acl_allow_set(vp, kind);
if (!error) {
- error = xfs_attr_remove(xfs_vtoi(vp),
+ error = xfs_attr_remove(XFS_I(vp),
kind == _ACL_TYPE_DEFAULT?
SGI_ACL_DEFAULT: SGI_ACL_FILE,
ATTR_ROOT);
if (error == ENOATTR)
error = 0; /* 'scool */
}
- VN_RELE(vp);
return -error;
}
int
xfs_acl_vset(
- bhv_vnode_t *vp,
+ struct inode *vp,
void *acl,
size_t size,
int kind)
@@ -298,7 +294,6 @@ xfs_acl_vset(
return 0;
}
- VN_HOLD(vp);
error = xfs_acl_allow_set(vp, kind);
/* Incoming ACL exists, set file mode based on its value */
@@ -321,7 +316,6 @@ xfs_acl_vset(
}
out:
- VN_RELE(vp);
_ACL_FREE(xfs_acl);
return -error;
}
@@ -341,8 +335,7 @@ xfs_acl_iaccess(
/* If the file has no ACL return -1. */
rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
- ATTR_ROOT | ATTR_KERNACCESS)) {
+ if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
_ACL_FREE(acl);
return -1;
}
@@ -364,7 +357,7 @@ xfs_acl_iaccess(
STATIC int
xfs_acl_allow_set(
- bhv_vnode_t *vp,
+ struct inode *vp,
int kind)
{
if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
@@ -373,7 +366,7 @@ xfs_acl_allow_set(
return ENOTDIR;
if (vp->i_sb->s_flags & MS_RDONLY)
return EROFS;
- if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
+ if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
return EPERM;
return 0;
}
@@ -567,7 +560,7 @@ xfs_acl_get_endian(
*/
STATIC void
xfs_acl_get_attr(
- bhv_vnode_t *vp,
+ struct inode *vp,
xfs_acl_t *aclp,
int kind,
int flags,
@@ -577,7 +570,7 @@ xfs_acl_get_attr(
ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
flags |= ATTR_ROOT;
- *error = xfs_attr_get(xfs_vtoi(vp),
+ *error = xfs_attr_get(XFS_I(vp),
kind == _ACL_TYPE_ACCESS ?
SGI_ACL_FILE : SGI_ACL_DEFAULT,
(char *)aclp, &len, flags);
@@ -591,7 +584,7 @@ xfs_acl_get_attr(
*/
STATIC void
xfs_acl_set_attr(
- bhv_vnode_t *vp,
+ struct inode *vp,
xfs_acl_t *aclp,
int kind,
int *error)
@@ -616,7 +609,7 @@ xfs_acl_set_attr(
INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
}
INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- *error = xfs_attr_set(xfs_vtoi(vp),
+ *error = xfs_attr_set(XFS_I(vp),
kind == _ACL_TYPE_ACCESS ?
SGI_ACL_FILE: SGI_ACL_DEFAULT,
(char *)newacl, len, ATTR_ROOT);
@@ -625,7 +618,7 @@ xfs_acl_set_attr(
int
xfs_acl_vtoacl(
- bhv_vnode_t *vp,
+ struct inode *vp,
xfs_acl_t *access_acl,
xfs_acl_t *default_acl)
{
@@ -640,7 +633,7 @@ xfs_acl_vtoacl(
if (error)
access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
else /* We have a good ACL and the file mode, synchronize. */
- xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
+ xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
}
if (default_acl) {
@@ -657,7 +650,7 @@ xfs_acl_vtoacl(
*/
int
xfs_acl_inherit(
- bhv_vnode_t *vp,
+ struct inode *vp,
mode_t mode,
xfs_acl_t *pdaclp)
{
@@ -716,11 +709,11 @@ out_error:
*/
STATIC int
xfs_acl_setmode(
- bhv_vnode_t *vp,
+ struct inode *vp,
xfs_acl_t *acl,
int *basicperms)
{
- bhv_vattr_t va;
+ struct iattr iattr;
xfs_acl_entry_t *ap;
xfs_acl_entry_t *gap = NULL;
int i, nomask = 1;
@@ -734,25 +727,25 @@ xfs_acl_setmode(
* Copy the u::, g::, o::, and m:: bits from the ACL into the
* mode. The m:: bits take precedence over the g:: bits.
*/
- va.va_mask = XFS_AT_MODE;
- va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
- va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
+ iattr.ia_valid = ATTR_MODE;
+ iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
+ iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
ap = acl->acl_entry;
for (i = 0; i < acl->acl_cnt; ++i) {
switch (ap->ae_tag) {
case ACL_USER_OBJ:
- va.va_mode |= ap->ae_perm << 6;
+ iattr.ia_mode |= ap->ae_perm << 6;
break;
case ACL_GROUP_OBJ:
gap = ap;
break;
case ACL_MASK: /* more than just standard modes */
nomask = 0;
- va.va_mode |= ap->ae_perm << 3;
+ iattr.ia_mode |= ap->ae_perm << 3;
*basicperms = 0;
break;
case ACL_OTHER:
- va.va_mode |= ap->ae_perm;
+ iattr.ia_mode |= ap->ae_perm;
break;
default: /* more than just standard modes */
*basicperms = 0;
@@ -763,9 +756,9 @@ xfs_acl_setmode(
/* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
if (gap && nomask)
- va.va_mode |= gap->ae_perm << 3;
+ iattr.ia_mode |= gap->ae_perm << 3;
- return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred);
+ return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred);
}
/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c4..a4e293b93efa 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
+#define _ACL_TYPE_ACCESS 1
+#define _ACL_TYPE_DEFAULT 2
#ifdef CONFIG_XFS_POSIX_ACL
@@ -57,17 +59,15 @@ extern struct kmem_zone *xfs_acl_zone;
(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
-extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *);
+extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(bhv_vnode_t *);
-extern int xfs_acl_vhasacl_default(bhv_vnode_t *);
-extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int);
-extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int);
-extern int xfs_acl_vremove(bhv_vnode_t *, int);
+extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
+extern int xfs_acl_vhasacl_access(struct inode *);
+extern int xfs_acl_vhasacl_default(struct inode *);
+extern int xfs_acl_vset(struct inode *, void *, size_t, int);
+extern int xfs_acl_vget(struct inode *, void *, size_t, int);
+extern int xfs_acl_vremove(struct inode *, int);
-#define _ACL_TYPE_ACCESS 1
-#define _ACL_TYPE_DEFAULT 2
#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index f9472a2076d4..0b3b5efe848c 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -92,16 +92,6 @@
((__u8*)(pointer))[1] = (((value) ) & 0xff); \
}
-/* define generic INT_ macros */
-
-#define INT_GET(reference,arch) \
- (((arch) == ARCH_NOCONVERT) \
- ? \
- (reference) \
- : \
- INT_SWAP((reference),(reference)) \
- )
-
/* does not return a value */
#define INT_SET(reference,arch,valueref) \
(__builtin_constant_p(valueref) ? \
@@ -112,64 +102,6 @@
) \
)
-/* does not return a value */
-#define INT_MOD_EXPR(reference,arch,code) \
- (((arch) == ARCH_NOCONVERT) \
- ? \
- (void)((reference) code) \
- : \
- (void)( \
- (reference) = INT_GET((reference),arch) , \
- ((reference) code), \
- INT_SET(reference, arch, reference) \
- ) \
- )
-
-/* does not return a value */
-#define INT_MOD(reference,arch,delta) \
- (void)( \
- INT_MOD_EXPR(reference,arch,+=(delta)) \
- )
-
-/*
- * INT_COPY - copy a value between two locations with the
- * _same architecture_ but _potentially different sizes_
- *
- * if the types of the two parameters are equal or they are
- * in native architecture, a simple copy is done
- *
- * otherwise, architecture conversions are done
- *
- */
-
-/* does not return a value */
-#define INT_COPY(dst,src,arch) \
- ( \
- ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \
- ? \
- (void)((dst) = (src)) \
- : \
- INT_SET(dst, arch, INT_GET(src, arch)) \
- )
-
-/*
- * INT_XLATE - copy a value in either direction between two locations
- * with different architectures
- *
- * dir < 0 - copy from memory to buffer (native to arch)
- * dir > 0 - copy from buffer to memory (arch to native)
- */
-
-/* does not return a value */
-#define INT_XLATE(buf,mem,dir,arch) {\
- ASSERT(dir); \
- if (dir>0) { \
- (mem)=INT_GET(buf, arch); \
- } else { \
- INT_SET(buf, arch, mem); \
- } \
-}
-
/*
* In directories inode numbers are stored as unaligned arrays of unsigned
* 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a859186..f7cdc28aff41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <linux/capability.h>
-
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
@@ -57,11 +55,6 @@
* Provide the external interfaces to manage attribute lists.
*/
-#define ATTR_SYSCOUNT 2
-static struct attrnames posix_acl_access;
-static struct attrnames posix_acl_default;
-static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
-
/*========================================================================
* Function prototypes for the kernel.
*========================================================================*/
@@ -116,6 +109,17 @@ xfs_attr_name_to_xname(
return 0;
}
+STATIC int
+xfs_inode_hasattr(
+ struct xfs_inode *ip)
+{
+ if (!XFS_IFORK_Q(ip) ||
+ (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+ ip->i_d.di_anextents == 0))
+ return 0;
+ return 1;
+}
+
/*========================================================================
* Overall external interface routines.
*========================================================================*/
@@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
xfs_da_args_t args;
int error;
- if ((XFS_IFORK_Q(ip) == 0) ||
- (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_anextents == 0))
- return(ENOATTR);
+ if (!xfs_inode_hasattr(ip))
+ return ENOATTR;
/*
* Fill in the arg structure for this request.
@@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
/*
* Decide on what work routines to call based on the inode size.
*/
- if (XFS_IFORK_Q(ip) == 0 ||
- (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_anextents == 0)) {
- error = XFS_ERROR(ENOATTR);
- } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+ if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = xfs_attr_shortform_getvalue(&args);
} else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
error = xfs_attr_leaf_get(&args);
@@ -196,6 +194,46 @@ xfs_attr_get(
return(error);
}
+/*
+ * Calculate how many blocks we need for the new attribute,
+ */
+int
+xfs_attr_calc_size(
+ struct xfs_inode *ip,
+ int namelen,
+ int valuelen,
+ int *local)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int size;
+ int nblks;
+
+ /*
+ * Determine space new attribute will use, and if it would be
+ * "local" or "remote" (note: local != inline).
+ */
+ size = xfs_attr_leaf_newentsize(namelen, valuelen,
+ mp->m_sb.sb_blocksize, local);
+
+ nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
+ if (*local) {
+ if (size > (mp->m_sb.sb_blocksize >> 1)) {
+ /* Double split possible */
+ nblks *= 2;
+ }
+ } else {
+ /*
+ * Out of line attribute, cannot double split, but
+ * make room for the attribute value itself.
+ */
+ uint dblocks = XFS_B_TO_FSB(mp, valuelen);
+ nblks += dblocks;
+ nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
+ }
+
+ return nblks;
+}
+
STATIC int
xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
char *value, int valuelen, int flags)
@@ -204,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
int error, err2, committed;
- int local, size;
- uint nblks;
xfs_mount_t *mp = dp->i_mount;
int rsvd = (flags & ATTR_ROOT) != 0;
+ int local;
/*
* Attach the dquots to the inode.
@@ -241,33 +278,10 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
args.firstblock = &firstblock;
args.flist = &flist;
args.whichfork = XFS_ATTR_FORK;
- args.addname = 1;
- args.oknoent = 1;
-
- /*
- * Determine space new attribute will use, and if it would be
- * "local" or "remote" (note: local != inline).
- */
- size = xfs_attr_leaf_newentsize(name->len, valuelen,
- mp->m_sb.sb_blocksize, &local);
-
- nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
- if (local) {
- if (size > (mp->m_sb.sb_blocksize >> 1)) {
- /* Double split possible */
- nblks <<= 1;
- }
- } else {
- uint dblocks = XFS_B_TO_FSB(mp, valuelen);
- /* Out of line attribute, cannot double split, but make
- * room for the attribute value itself.
- */
- nblks += dblocks;
- nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
- }
+ args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
/* Size is now blocks for attribute data */
- args.total = nblks;
+ args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local);
/*
* Start our first transaction of the day.
@@ -289,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
if (rsvd)
args.trans->t_flags |= XFS_TRANS_RESERVE;
- if ((error = xfs_trans_reserve(args.trans, (uint) nblks,
- XFS_ATTRSET_LOG_RES(mp, nblks),
- 0, XFS_TRANS_PERM_LOG_RES,
- XFS_ATTRSET_LOG_COUNT))) {
+ if ((error = xfs_trans_reserve(args.trans, args.total,
+ XFS_ATTRSET_LOG_RES(mp, args.total), 0,
+ XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) {
xfs_trans_cancel(args.trans, 0);
return(error);
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
- rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
- XFS_QMOPT_RES_REGBLKS);
+ error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+ rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
+ XFS_QMOPT_RES_REGBLKS);
if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
@@ -387,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
* Commit the leaf transformation. We'll need another (linked)
* transaction to add the new attribute to the leaf.
*/
- if ((error = xfs_attr_rolltrans(&args.trans, dp)))
+
+ error = xfs_trans_roll(&args.trans, dp);
+ if (error)
goto out;
}
@@ -529,9 +544,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
/*
* Decide on what work routines to call based on the inode size.
*/
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp)) {
error = XFS_ERROR(ENOATTR);
goto out;
}
@@ -601,29 +614,33 @@ xfs_attr_remove(
return error;
xfs_ilock(dp, XFS_ILOCK_SHARED);
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp)) {
xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return(XFS_ERROR(ENOATTR));
+ return XFS_ERROR(ENOATTR);
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
return xfs_attr_remove_int(dp, &xname, flags);
}
-STATIC int
+int
xfs_attr_list_int(xfs_attr_list_context_t *context)
{
int error;
xfs_inode_t *dp = context->dp;
+ XFS_STATS_INC(xs_attr_list);
+
+ if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+ return EIO;
+
+ xfs_ilock(dp, XFS_ILOCK_SHARED);
+ xfs_attr_trace_l_c("syscall start", context);
+
/*
* Decide on what work routines to call based on the inode size.
*/
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp)) {
error = 0;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = xfs_attr_shortform_list(context);
@@ -632,6 +649,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
} else {
error = xfs_attr_node_list(context);
}
+
+ xfs_iunlock(dp, XFS_ILOCK_SHARED);
+ xfs_attr_trace_l_c("syscall end", context);
+
return error;
}
@@ -648,74 +669,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
*/
/*ARGSUSED*/
STATIC int
-xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
+xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
char *name, int namelen,
int valuelen, char *value)
{
+ struct attrlist *alist = (struct attrlist *)context->alist;
attrlist_ent_t *aep;
int arraytop;
ASSERT(!(context->flags & ATTR_KERNOVAL));
ASSERT(context->count >= 0);
ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
- ASSERT(context->firstu >= sizeof(*context->alist));
+ ASSERT(context->firstu >= sizeof(*alist));
ASSERT(context->firstu <= context->bufsize);
- arraytop = sizeof(*context->alist) +
- context->count * sizeof(context->alist->al_offset[0]);
+ /*
+ * Only list entries in the right namespace.
+ */
+ if (((context->flags & ATTR_SECURE) == 0) !=
+ ((flags & XFS_ATTR_SECURE) == 0))
+ return 0;
+ if (((context->flags & ATTR_ROOT) == 0) !=
+ ((flags & XFS_ATTR_ROOT) == 0))
+ return 0;
+
+ arraytop = sizeof(*alist) +
+ context->count * sizeof(alist->al_offset[0]);
context->firstu -= ATTR_ENTSIZE(namelen);
if (context->firstu < arraytop) {
xfs_attr_trace_l_c("buffer full", context);
- context->alist->al_more = 1;
+ alist->al_more = 1;
context->seen_enough = 1;
return 1;
}
- aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
+ aep = (attrlist_ent_t *)&context->alist[context->firstu];
aep->a_valuelen = valuelen;
memcpy(aep->a_name, name, namelen);
- aep->a_name[ namelen ] = 0;
- context->alist->al_offset[ context->count++ ] = context->firstu;
- context->alist->al_count = context->count;
+ aep->a_name[namelen] = 0;
+ alist->al_offset[context->count++] = context->firstu;
+ alist->al_count = context->count;
xfs_attr_trace_l_c("add", context);
return 0;
}
-STATIC int
-xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
- char *name, int namelen,
- int valuelen, char *value)
-{
- char *offset;
- int arraytop;
-
- ASSERT(context->count >= 0);
-
- arraytop = context->count + namesp->attr_namelen + namelen + 1;
- if (arraytop > context->firstu) {
- context->count = -1; /* insufficient space */
- return 1;
- }
- offset = (char *)context->alist + context->count;
- strncpy(offset, namesp->attr_name, namesp->attr_namelen);
- offset += namesp->attr_namelen;
- strncpy(offset, name, namelen); /* real name */
- offset += namelen;
- *offset = '\0';
- context->count += namesp->attr_namelen + namelen + 1;
- return 0;
-}
-
-/*ARGSUSED*/
-STATIC int
-xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
- char *name, int namelen,
- int valuelen, char *value)
-{
- context->count += namesp->attr_namelen + namelen + 1;
- return 0;
-}
-
/*
* Generate a list of extended attribute names and optionally
* also value lengths. Positive return value follows the XFS
@@ -732,10 +729,9 @@ xfs_attr_list(
attrlist_cursor_kern_t *cursor)
{
xfs_attr_list_context_t context;
+ struct attrlist *alist;
int error;
- XFS_STATS_INC(xs_attr_list);
-
/*
* Validate the cursor.
*/
@@ -756,52 +752,23 @@ xfs_attr_list(
/*
* Initialize the output buffer.
*/
+ memset(&context, 0, sizeof(context));
context.dp = dp;
context.cursor = cursor;
- context.count = 0;
- context.dupcnt = 0;
context.resynch = 1;
context.flags = flags;
- context.seen_enough = 0;
- context.alist = (attrlist_t *)buffer;
- context.put_value = 0;
-
- if (flags & ATTR_KERNAMELS) {
- context.bufsize = bufsize;
- context.firstu = context.bufsize;
- if (flags & ATTR_KERNOVAL)
- context.put_listent = xfs_attr_kern_list_sizes;
- else
- context.put_listent = xfs_attr_kern_list;
- } else {
- context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
- context.firstu = context.bufsize;
- context.alist->al_count = 0;
- context.alist->al_more = 0;
- context.alist->al_offset[0] = context.bufsize;
- context.put_listent = xfs_attr_put_listent;
- }
-
- if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return EIO;
+ context.alist = buffer;
+ context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
+ context.firstu = context.bufsize;
+ context.put_listent = xfs_attr_put_listent;
- xfs_ilock(dp, XFS_ILOCK_SHARED);
- xfs_attr_trace_l_c("syscall start", &context);
+ alist = (struct attrlist *)context.alist;
+ alist->al_count = 0;
+ alist->al_more = 0;
+ alist->al_offset[0] = context.bufsize;
error = xfs_attr_list_int(&context);
-
- xfs_iunlock(dp, XFS_ILOCK_SHARED);
- xfs_attr_trace_l_c("syscall end", &context);
-
- if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
- /* must return negated buffer size or the error */
- if (context.count < 0)
- error = XFS_ERROR(ERANGE);
- else
- error = -context.count;
- } else
- ASSERT(error >= 0);
-
+ ASSERT(error >= 0);
return error;
}
@@ -816,12 +783,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
xfs_ilock(dp, XFS_ILOCK_SHARED);
- if ((XFS_IFORK_Q(dp) == 0) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp) ||
+ dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return(0);
+ return 0;
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
@@ -854,10 +819,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
/*
* Decide on what work routines to call based on the inode size.
*/
- if ((XFS_IFORK_Q(dp) == 0) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
+ if (!xfs_inode_hasattr(dp) ||
+ dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = 0;
goto out;
}
@@ -974,7 +937,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_da_brelse(args->trans, bp);
return(retval);
}
- args->rename = 1; /* an atomic rename */
+ args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
@@ -1019,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* Commit the current trans (including the inode) and start
* a new one.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
return (error);
/*
@@ -1033,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* Commit the transaction that added the attr name so that
* later routines can manage their own transactions.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
return (error);
/*
@@ -1054,7 +1019,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* so that one disappears and one appears atomically. Then we
* must remove the "old" attribute/value pair.
*/
- if (args->rename) {
+ if (args->op_flags & XFS_DA_OP_RENAME) {
/*
* In a separate transaction, set the incomplete flag on the
* "old" attr and clear the incomplete flag on the "new" attr.
@@ -1122,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
/*
* Commit the remove and start the next trans in series.
*/
- error = xfs_attr_rolltrans(&args->trans, dp);
+ error = xfs_trans_roll(&args->trans, dp);
} else if (args->rmtblkno > 0) {
/*
@@ -1307,7 +1272,7 @@ restart:
} else if (retval == EEXIST) {
if (args->flags & ATTR_CREATE)
goto out;
- args->rename = 1; /* atomic rename op */
+ args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
args->blkno2 = args->blkno; /* set 2nd entry info*/
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
@@ -1353,7 +1318,8 @@ restart:
* Commit the node conversion and start the next
* trans in the chain.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
goto out;
goto restart;
@@ -1404,7 +1370,8 @@ restart:
* Commit the leaf addition or btree split and start the next
* trans in the chain.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
goto out;
/*
@@ -1425,7 +1392,7 @@ restart:
* so that one disappears and one appears atomically. Then we
* must remove the "old" attribute/value pair.
*/
- if (args->rename) {
+ if (args->op_flags & XFS_DA_OP_RENAME) {
/*
* In a separate transaction, set the incomplete flag on the
* "old" attr and clear the incomplete flag on the "new" attr.
@@ -1504,7 +1471,8 @@ restart:
/*
* Commit and start the next trans in the chain.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
goto out;
} else if (args->rmtblkno > 0) {
@@ -1636,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
/*
* Commit the Btree join operation and start a new trans.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
goto out;
}
@@ -2137,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
/*
* Start the next trans in the chain.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, dp)))
+ error = xfs_trans_roll(&args->trans, dp);
+ if (error)
return (error);
}
@@ -2287,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
/*
* Close out trans and start the next one in the chain.
*/
- if ((error = xfs_attr_rolltrans(&args->trans, args->dp)))
+ error = xfs_trans_roll(&args->trans, args->dp);
+ if (error)
return (error);
}
return(0);
@@ -2300,23 +2271,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
void
xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
(__psunsigned_t)NULL,
(__psunsigned_t)NULL,
(__psunsigned_t)NULL);
@@ -2329,23 +2284,7 @@ void
xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
struct xfs_da_intnode *node)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
(__psunsigned_t)be16_to_cpu(node->hdr.count),
(__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
(__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2298,7 @@ void
xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
struct xfs_da_node_entry *btree)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
(__psunsigned_t)be32_to_cpu(btree->hashval),
(__psunsigned_t)be32_to_cpu(btree->before),
(__psunsigned_t)NULL);
@@ -2388,23 +2311,7 @@ void
xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
struct xfs_attr_leafblock *leaf)
{
- xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
- (__psunsigned_t)context->dp,
- (__psunsigned_t)context->cursor->hashval,
- (__psunsigned_t)context->cursor->blkno,
- (__psunsigned_t)context->cursor->offset,
- (__psunsigned_t)context->alist,
- (__psunsigned_t)context->bufsize,
- (__psunsigned_t)context->count,
- (__psunsigned_t)context->firstu,
- (__psunsigned_t)
- ((context->count > 0) &&
- !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
- ? (ATTR_ENTRY(context->alist,
- context->count-1)->a_valuelen)
- : 0,
- (__psunsigned_t)context->dupcnt,
- (__psunsigned_t)context->flags,
+ xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
(__psunsigned_t)be16_to_cpu(leaf->hdr.count),
(__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
(__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,329 +2324,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
*/
void
xfs_attr_trace_enter(int type, char *where,
- __psunsigned_t a2, __psunsigned_t a3,
- __psunsigned_t a4, __psunsigned_t a5,
- __psunsigned_t a6, __psunsigned_t a7,
- __psunsigned_t a8, __psunsigned_t a9,
- __psunsigned_t a10, __psunsigned_t a11,
- __psunsigned_t a12, __psunsigned_t a13,
- __psunsigned_t a14, __psunsigned_t a15)
+ struct xfs_attr_list_context *context,
+ __psunsigned_t a13, __psunsigned_t a14,
+ __psunsigned_t a15)
{
ASSERT(xfs_attr_trace_buf);
ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
- (void *)where,
- (void *)a2, (void *)a3, (void *)a4,
- (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10,
- (void *)a11, (void *)a12, (void *)a13,
- (void *)a14, (void *)a15);
+ (void *)((__psunsigned_t)where),
+ (void *)((__psunsigned_t)context->dp),
+ (void *)((__psunsigned_t)context->cursor->hashval),
+ (void *)((__psunsigned_t)context->cursor->blkno),
+ (void *)((__psunsigned_t)context->cursor->offset),
+ (void *)((__psunsigned_t)context->alist),
+ (void *)((__psunsigned_t)context->bufsize),
+ (void *)((__psunsigned_t)context->count),
+ (void *)((__psunsigned_t)context->firstu),
+ NULL,
+ (void *)((__psunsigned_t)context->dupcnt),
+ (void *)((__psunsigned_t)context->flags),
+ (void *)a13, (void *)a14, (void *)a15);
}
#endif /* XFS_ATTR_TRACE */
-
-
-/*========================================================================
- * System (pseudo) namespace attribute interface routines.
- *========================================================================*/
-
-STATIC int
-posix_acl_access_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
-}
-
-STATIC int
-posix_acl_access_exists(
- bhv_vnode_t *vp)
-{
- return xfs_acl_vhasacl_access(vp);
-}
-
-STATIC int
-posix_acl_default_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
-}
-
-STATIC int
-posix_acl_default_exists(
- bhv_vnode_t *vp)
-{
- return xfs_acl_vhasacl_default(vp);
-}
-
-static struct attrnames posix_acl_access = {
- .attr_name = "posix_acl_access",
- .attr_namelen = sizeof("posix_acl_access") - 1,
- .attr_get = posix_acl_access_get,
- .attr_set = posix_acl_access_set,
- .attr_remove = posix_acl_access_remove,
- .attr_exists = posix_acl_access_exists,
-};
-
-static struct attrnames posix_acl_default = {
- .attr_name = "posix_acl_default",
- .attr_namelen = sizeof("posix_acl_default") - 1,
- .attr_get = posix_acl_default_get,
- .attr_set = posix_acl_default_set,
- .attr_remove = posix_acl_default_remove,
- .attr_exists = posix_acl_default_exists,
-};
-
-static struct attrnames *attr_system_names[] =
- { &posix_acl_access, &posix_acl_default };
-
-
-/*========================================================================
- * Namespace-prefix-style attribute name interface routines.
- *========================================================================*/
-
-STATIC int
-attr_generic_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
-}
-
-STATIC int
-attr_generic_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- int error, asize = size;
-
- error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
- if (!error)
- return asize;
- return -error;
-}
-
-STATIC int
-attr_generic_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
-}
-
-STATIC int
-attr_generic_listadd(
- attrnames_t *prefix,
- attrnames_t *namesp,
- void *data,
- size_t size,
- ssize_t *result)
-{
- char *p = data + *result;
-
- *result += prefix->attr_namelen;
- *result += namesp->attr_namelen + 1;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
- strcpy(p, prefix->attr_name);
- p += prefix->attr_namelen;
- strcpy(p, namesp->attr_name);
- p += namesp->attr_namelen + 1;
- return 0;
-}
-
-STATIC int
-attr_system_list(
- bhv_vnode_t *vp,
- void *data,
- size_t size,
- ssize_t *result)
-{
- attrnames_t *namesp;
- int i, error = 0;
-
- for (i = 0; i < ATTR_SYSCOUNT; i++) {
- namesp = attr_system_names[i];
- if (!namesp->attr_exists || !namesp->attr_exists(vp))
- continue;
- error = attr_generic_listadd(&attr_system, namesp,
- data, size, result);
- if (error)
- break;
- }
- return error;
-}
-
-int
-attr_generic_list(
- bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
-{
- attrlist_cursor_kern_t cursor = { 0 };
- int error;
-
- error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
- if (error > 0)
- return -error;
- *result = -error;
- return attr_system_list(vp, data, size, result);
-}
-
-attrnames_t *
-attr_lookup_namespace(
- char *name,
- struct attrnames **names,
- int nnames)
-{
- int i;
-
- for (i = 0; i < nnames; i++)
- if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
- return names[i];
- return NULL;
-}
-
-/*
- * Some checks to prevent people abusing EAs to get over quota:
- * - Don't allow modifying user EAs on devices/symlinks;
- * - Don't allow modifying user EAs if sticky bit set;
- */
-STATIC int
-attr_user_capable(
- bhv_vnode_t *vp,
- cred_t *cred)
-{
- struct inode *inode = vn_to_inode(vp);
-
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return -EPERM;
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
- (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
- return -EPERM;
- return 0;
-}
-
-STATIC int
-attr_trusted_capable(
- bhv_vnode_t *vp,
- cred_t *cred)
-{
- struct inode *inode = vn_to_inode(vp);
-
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- return 0;
-}
-
-STATIC int
-attr_system_set(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- attrnames_t *namesp;
- int error;
-
- if (xflags & ATTR_CREATE)
- return -EINVAL;
-
- namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- error = namesp->attr_set(vp, name, data, size, xflags);
- if (!error)
- error = vn_revalidate(vp);
- return error;
-}
-
-STATIC int
-attr_system_get(
- bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
- attrnames_t *namesp;
-
- namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- return namesp->attr_get(vp, name, data, size, xflags);
-}
-
-STATIC int
-attr_system_remove(
- bhv_vnode_t *vp, char *name, int xflags)
-{
- attrnames_t *namesp;
-
- namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
- if (!namesp)
- return -EOPNOTSUPP;
- return namesp->attr_remove(vp, name, xflags);
-}
-
-struct attrnames attr_system = {
- .attr_name = "system.",
- .attr_namelen = sizeof("system.") - 1,
- .attr_flag = ATTR_SYSTEM,
- .attr_get = attr_system_get,
- .attr_set = attr_system_set,
- .attr_remove = attr_system_remove,
- .attr_capable = (attrcapable_t)fs_noerr,
-};
-
-struct attrnames attr_trusted = {
- .attr_name = "trusted.",
- .attr_namelen = sizeof("trusted.") - 1,
- .attr_flag = ATTR_ROOT,
- .attr_get = attr_generic_get,
- .attr_set = attr_generic_set,
- .attr_remove = attr_generic_remove,
- .attr_capable = attr_trusted_capable,
-};
-
-struct attrnames attr_secure = {
- .attr_name = "security.",
- .attr_namelen = sizeof("security.") - 1,
- .attr_flag = ATTR_SECURE,
- .attr_get = attr_generic_get,
- .attr_set = attr_generic_set,
- .attr_remove = attr_generic_remove,
- .attr_capable = (attrcapable_t)fs_noerr,
-};
-
-struct attrnames attr_user = {
- .attr_name = "user.",
- .attr_namelen = sizeof("user.") - 1,
- .attr_get = attr_generic_get,
- .attr_set = attr_generic_set,
- .attr_remove = attr_generic_remove,
- .attr_capable = attr_user_capable,
-};
-
-struct attrnames *attr_namespaces[] =
- { &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe35..fb3b2a68b9b9 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
#ifndef __XFS_ATTR_H__
#define __XFS_ATTR_H__
+struct xfs_inode;
+struct xfs_da_args;
+struct xfs_attr_list_context;
+
/*
- * xfs_attr.h
- *
* Large attribute lists are structured around Btrees where all the data
* elements are in the leaf nodes. Attribute names are hashed into an int,
* then that int is used as the index into the Btree. Since the hashval
@@ -35,35 +37,6 @@
* External interfaces
*========================================================================*/
-struct cred;
-struct xfs_attr_list_context;
-
-typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
-typedef int (*attrexists_t)(bhv_vnode_t *);
-typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
-
-typedef struct attrnames {
- char * attr_name;
- unsigned int attr_namelen;
- unsigned int attr_flag;
- attrget_t attr_get;
- attrset_t attr_set;
- attrremove_t attr_remove;
- attrexists_t attr_exists;
- attrcapable_t attr_capable;
-} attrnames_t;
-
-#define ATTR_NAMECOUNT 4
-extern struct attrnames attr_user;
-extern struct attrnames attr_secure;
-extern struct attrnames attr_system;
-extern struct attrnames attr_trusted;
-extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
-
-extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
-extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
@@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */
#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */
-#define ATTR_SYSTEM 0x0100 /* use attrs in system (pseudo) namespace */
-#define ATTR_KERNACCESS 0x0400 /* [kernel] iaccess, inode held io-locked */
#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
-#define ATTR_KERNAMELS 0x4000 /* [kernel] list attr names (simple list) */
-
-#define ATTR_KERNORMALS 0x0800 /* [kernel] normal attr list: user+secure */
-#define ATTR_KERNROOTLS 0x8000 /* [kernel] include root in the attr list */
-#define ATTR_KERNFULLS (ATTR_KERNORMALS|ATTR_KERNROOTLS)
/*
* The maximum size (into the kernel or returned from the kernel) of an
@@ -119,22 +85,6 @@ typedef struct attrlist_ent { /* data from attr_list() */
&((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
/*
- * Multi-attribute operation vector.
- */
-typedef struct attr_multiop {
- int am_opcode; /* operation to perform (ATTR_OP_GET, etc.) */
- int am_error; /* [out arg] result of this sub-op (an errno) */
- char *am_attrname; /* attribute name to work with */
- char *am_attrvalue; /* [in/out arg] attribute value (raw bytes) */
- int am_length; /* [in/out arg] length of value */
- int am_flags; /* bitwise OR of attr API flags defined above */
-} attr_multiop_t;
-
-#define ATTR_OP_GET 1 /* return the indicated attr's value */
-#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
-#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
-
-/*
* Kernel-internal version of the attrlist cursor.
*/
typedef struct attrlist_cursor_kern {
@@ -148,20 +98,41 @@ typedef struct attrlist_cursor_kern {
/*========================================================================
- * Function prototypes for the kernel.
+ * Structure used to pass context around among the routines.
*========================================================================*/
-struct xfs_inode;
-struct attrlist_cursor_kern;
-struct xfs_da_args;
+
+typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
+ char *, int, int, char *);
+
+typedef struct xfs_attr_list_context {
+ struct xfs_inode *dp; /* inode */
+ struct attrlist_cursor_kern *cursor; /* position in list */
+ char *alist; /* output buffer */
+ int seen_enough; /* T/F: seen enough of list? */
+ ssize_t count; /* num used entries */
+ int dupcnt; /* count dup hashvals seen */
+ int bufsize; /* total buffer size */
+ int firstu; /* first used byte in buffer */
+ int flags; /* from VOP call */
+ int resynch; /* T/F: resynch with cursor */
+ int put_value; /* T/F: need value for listent */
+ put_listent_func_t put_listent; /* list output fmt function */
+ int index; /* index into output buffer */
+} xfs_attr_list_context_t;
+
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
/*
* Overall external interface routines.
*/
+int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
int xfs_attr_inactive(struct xfs_inode *dp);
-
-int xfs_attr_shortform_getvalue(struct xfs_da_args *);
int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217b..79da6b2ea99e 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
* Namespace helper routines
*========================================================================*/
-STATIC_INLINE attrnames_t *
-xfs_attr_flags_namesp(int flags)
-{
- return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
- ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
-}
-
/*
* If namespace bits don't match return 0.
* If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
}
-/*
- * If namespace bits don't match and we don't have an override for it
- * then return 0.
- * If all match or are overridable then return 1.
- */
-STATIC_INLINE int
-xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
-{
- if (((arg_flags & ATTR_SECURE) == 0) !=
- ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
- !(arg_flags & ATTR_KERNORMALS))
- return 0;
- if (((arg_flags & ATTR_ROOT) == 0) !=
- ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
- !(arg_flags & ATTR_KERNROOTLS))
- return 0;
- return 1;
-}
-
/*========================================================================
* External routines when attribute fork size < XFS_LITINO(mp).
@@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
* Fix up the start offset of the attribute fork
*/
totsize -= size;
- if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
- (mp->m_flags & XFS_MOUNT_ATTR2) &&
- (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
+ if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
+ !(args->op_flags & XFS_DA_OP_ADDNAME) &&
+ (mp->m_flags & XFS_MOUNT_ATTR2) &&
+ (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
/*
* Last attribute now removed, revert to original
* inode format making all literal area available
@@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
ASSERT(dp->i_d.di_forkoff);
- ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
- !(mp->m_flags & XFS_MOUNT_ATTR2) ||
- dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
+ ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
+ (args->op_flags & XFS_DA_OP_ADDNAME) ||
+ !(mp->m_flags & XFS_MOUNT_ATTR2) ||
+ dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_df.if_ext_max =
@@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
nargs.total = args->total;
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
- nargs.oknoent = 1;
+ nargs.op_flags = XFS_DA_OP_OKNOENT;
sfe = &sf->list[0];
for (i = 0; i < sf->hdr.count; i++) {
@@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
out:
if(bp)
xfs_da_buf_done(bp);
- kmem_free(tmpbuffer, size);
+ kmem_free(tmpbuffer);
return(error);
}
@@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
(XFS_ISRESET_CURSOR(cursor) &&
(dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
- attrnames_t *namesp;
-
- if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
- continue;
- }
- namesp = xfs_attr_flags_namesp(sfe->flags);
error = context->put_listent(context,
- namesp,
+ sfe->flags,
(char *)sfe->nameval,
(int)sfe->namelen,
(int)sfe->valuelen,
@@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount, sfe);
xfs_attr_trace_l_c("sf corrupted", context);
- kmem_free(sbuf, sbsize);
+ kmem_free(sbuf);
return XFS_ERROR(EFSCORRUPTED);
}
- if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
- continue;
- }
+
sbp->entno = i;
sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
sbp->name = (char *)sfe->nameval;
@@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
}
}
if (i == nsbuf) {
- kmem_free(sbuf, sbsize);
+ kmem_free(sbuf);
xfs_attr_trace_l_c("blk end", context);
return(0);
}
@@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
* Loop putting entries into the user buffer.
*/
for ( ; i < nsbuf; i++, sbp++) {
- attrnames_t *namesp;
-
- namesp = xfs_attr_flags_namesp(sbp->flags);
-
if (cursor->hashval != sbp->hash) {
cursor->hashval = sbp->hash;
cursor->offset = 0;
}
error = context->put_listent(context,
- namesp,
+ sbp->flags,
sbp->name,
sbp->namelen,
sbp->valuelen,
@@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
cursor->offset++;
}
- kmem_free(sbuf, sbsize);
+ kmem_free(sbuf);
xfs_attr_trace_l_c("sf E-O-F", context);
return(0);
}
@@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
nargs.total = args->total;
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
- nargs.oknoent = 1;
+ nargs.op_flags = XFS_DA_OP_OKNOENT;
entry = &leaf->entries[0];
for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
error = 0;
out:
- kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
+ kmem_free(tmpbuffer);
return(error);
}
@@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
entry->hashval = cpu_to_be32(args->hashval);
entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
- if (args->rename) {
+ if (args->op_flags & XFS_DA_OP_RENAME) {
entry->flags |= XFS_ATTR_INCOMPLETE;
if ((args->blkno2 == args->blkno) &&
(args->index2 <= args->index)) {
@@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
be16_to_cpu(hdr_s->count), mp);
xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
- kmem_free(tmpbuffer, XFS_LBSIZE(mp));
+ kmem_free(tmpbuffer);
}
/*
@@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
be16_to_cpu(drop_hdr->count), mp);
}
memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
- kmem_free(tmpbuffer, state->blocksize);
+ kmem_free(tmpbuffer);
}
xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
*/
retval = 0;
for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
- attrnames_t *namesp;
-
if (be32_to_cpu(entry->hashval) != cursor->hashval) {
cursor->hashval = be32_to_cpu(entry->hashval);
cursor->offset = 0;
@@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
if (entry->flags & XFS_ATTR_INCOMPLETE)
continue; /* skip incomplete entries */
- if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
- continue;
-
- namesp = xfs_attr_flags_namesp(entry->flags);
if (entry->flags & XFS_ATTR_LOCAL) {
xfs_attr_leaf_name_local_t *name_loc =
XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
retval = context->put_listent(context,
- namesp,
+ entry->flags,
(char *)name_loc->nameval,
(int)name_loc->namelen,
be16_to_cpu(name_loc->valuelen),
@@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
if (retval)
return retval;
retval = context->put_listent(context,
- namesp,
+ entry->flags,
(char *)name_rmt->name,
(int)name_rmt->namelen,
valuelen,
(char*)args.value);
- kmem_free(args.value, valuelen);
- }
- else {
+ kmem_free(args.value);
+ } else {
retval = context->put_listent(context,
- namesp,
+ entry->flags,
(char *)name_rmt->name,
(int)name_rmt->namelen,
valuelen,
@@ -2543,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
/*
* Commit the flag value change and start the next trans in series.
*/
- error = xfs_attr_rolltrans(&args->trans, args->dp);
-
- return(error);
+ return xfs_trans_roll(&args->trans, args->dp);
}
/*
@@ -2592,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
/*
* Commit the flag value change and start the next trans in series.
*/
- error = xfs_attr_rolltrans(&args->trans, args->dp);
-
- return(error);
+ return xfs_trans_roll(&args->trans, args->dp);
}
/*
@@ -2710,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
/*
* Commit the flag value change and start the next trans in series.
*/
- error = xfs_attr_rolltrans(&args->trans, args->dp);
+ error = xfs_trans_roll(&args->trans, args->dp);
return(error);
}
@@ -2768,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
/*
* Commit the invalidate and start the next transaction.
*/
- error = xfs_attr_rolltrans(trans, dp);
+ error = xfs_trans_roll(trans, dp);
return (error);
}
@@ -2870,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
/*
* Atomically commit the whole invalidate stuff.
*/
- if ((error = xfs_attr_rolltrans(trans, dp)))
+ error = xfs_trans_roll(trans, dp);
+ if (error)
return (error);
}
@@ -2954,7 +2906,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
error = tmp; /* save only the 1st errno */
}
- kmem_free((xfs_caddr_t)list, size);
+ kmem_free((xfs_caddr_t)list);
return(error);
}
@@ -3009,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
/*
* Roll to next transaction.
*/
- if ((error = xfs_attr_rolltrans(trans, dp)))
+ error = xfs_trans_roll(trans, dp);
+ if (error)
return (error);
}
@@ -3019,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
return(0);
}
-
-
-/*
- * Roll from one trans in the sequence of PERMANENT transactions to the next.
- */
-int
-xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp)
-{
- xfs_trans_t *trans;
- unsigned int logres, count;
- int error;
-
- /*
- * Ensure that the inode is always logged.
- */
- trans = *transp;
- xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
-
- /*
- * Copy the critical parameters from one trans to the next.
- */
- logres = trans->t_log_res;
- count = trans->t_log_count;
- *transp = xfs_trans_dup(trans);
-
- /*
- * Commit the current transaction.
- * If this commit failed, then it'd just unlock those items that
- * are not marked ihold. That also means that a filesystem shutdown
- * is in progress. The caller takes the responsibility to cancel
- * the duplicate transaction that gets returned.
- */
- if ((error = xfs_trans_commit(trans, 0)))
- return (error);
-
- trans = *transp;
-
- /*
- * Reserve space in the log for th next transaction.
- * This also pushes items in the "AIL", the list of logged items,
- * out to disk if they are taking up space at the tail of the log
- * that we want to use. This requires that either nothing be locked
- * across this call, or that anything that is locked be logged in
- * the prior and the next transactions.
- */
- error = xfs_trans_reserve(trans, 0, logres, 0,
- XFS_TRANS_PERM_LOG_RES, count);
- /*
- * Ensure that the inode is in the new transaction and locked.
- */
- if (!error) {
- xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(trans, dp);
- }
- return (error);
-
-}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e2..83e9af417ca2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
struct attrlist;
struct attrlist_cursor_kern;
-struct attrnames;
+struct xfs_attr_list_context;
struct xfs_dabuf;
struct xfs_da_args;
struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
return (((bsize) >> 1) + ((bsize) >> 2));
}
-
-/*========================================================================
- * Structure used to pass context around among the routines.
- *========================================================================*/
-
-
-struct xfs_attr_list_context;
-
-typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
- char *, int, int, char *);
-
-typedef struct xfs_attr_list_context {
- struct xfs_inode *dp; /* inode */
- struct attrlist_cursor_kern *cursor; /* position in list */
- struct attrlist *alist; /* output buffer */
- int seen_enough; /* T/F: seen enough of list? */
- int count; /* num used entries */
- int dupcnt; /* count dup hashvals seen */
- int bufsize; /* total buffer size */
- int firstu; /* first used byte in buffer */
- int flags; /* from VOP call */
- int resynch; /* T/F: resynch with cursor */
- int put_value; /* T/F: need value for listent */
- put_listent_func_t put_listent; /* list output fmt function */
- int index; /* index into output buffer */
-} xfs_attr_list_context_t;
-
/*
* Used to keep a list of "remote value" extents when unlinking an inode.
*/
@@ -301,6 +274,4 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
struct xfs_dabuf *leaf2_bp);
int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
int *local);
-int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
-
#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b1..ea22839caed2 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
struct xfs_attr_leafblock *leaf);
void xfs_attr_trace_enter(int type, char *where,
- __psunsigned_t a2, __psunsigned_t a3,
- __psunsigned_t a4, __psunsigned_t a5,
- __psunsigned_t a6, __psunsigned_t a7,
- __psunsigned_t a8, __psunsigned_t a9,
- __psunsigned_t a10, __psunsigned_t a11,
- __psunsigned_t a12, __psunsigned_t a13,
- __psunsigned_t a14, __psunsigned_t a15);
+ struct xfs_attr_list_context *context,
+ __psunsigned_t a13, __psunsigned_t a14,
+ __psunsigned_t a15);
#else
#define xfs_attr_trace_l_c(w,c)
#define xfs_attr_trace_l_cn(w,c,n)
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index fab0b6d5a41b..48228848f5ae 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -25,109 +25,6 @@
* XFS bit manipulation routines, used in non-realtime code.
*/
-#ifndef HAVE_ARCH_HIGHBIT
-/*
- * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
- */
-static const char xfs_highbit[256] = {
- -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
- 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
- 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
- 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */
- 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */
- 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */
- 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */
- 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */
- 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */
- 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */
- 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */
- 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */
- 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */
- 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */
- 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */
- 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */
- 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */
- 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */
-};
-#endif
-
-/*
- * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
- */
-inline int
-xfs_highbit32(
- __uint32_t v)
-{
-#ifdef HAVE_ARCH_HIGHBIT
- return highbit32(v);
-#else
- int i;
-
- if (v & 0xffff0000)
- if (v & 0xff000000)
- i = 24;
- else
- i = 16;
- else if (v & 0x0000ffff)
- if (v & 0x0000ff00)
- i = 8;
- else
- i = 0;
- else
- return -1;
- return i + xfs_highbit[(v >> i) & 0xff];
-#endif
-}
-
-/*
- * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
- */
-int
-xfs_lowbit64(
- __uint64_t v)
-{
- __uint32_t w = (__uint32_t)v;
- int n = 0;
-
- if (w) { /* lower bits */
- n = ffs(w);
- } else { /* upper bits */
- w = (__uint32_t)(v >> 32);
- if (w && (n = ffs(w)))
- n += 32;
- }
- return n - 1;
-}
-
-/*
- * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
- */
-int
-xfs_highbit64(
- __uint64_t v)
-{
- __uint32_t h = (__uint32_t)(v >> 32);
-
- if (h)
- return xfs_highbit32(h) + 32;
- return xfs_highbit32((__uint32_t)v);
-}
-
-
/*
* Return whether bitmap is empty.
* Size is number of words in the bitmap, which is padded to word boundary
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 082641a9782c..8e0e463dae2d 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n)
}
/* Get high bit set out of 32-bit argument, -1 if none set */
-extern int xfs_highbit32(__uint32_t v);
+static inline int xfs_highbit32(__uint32_t v)
+{
+ return fls(v) - 1;
+}
+
+/* Get high bit set out of 64-bit argument, -1 if none set */
+static inline int xfs_highbit64(__uint64_t v)
+{
+ return fls64(v) - 1;
+}
+
+/* Get low bit set out of 32-bit argument, -1 if none set */
+static inline int xfs_lowbit32(__uint32_t v)
+{
+ unsigned long t = v;
+ return (v) ? find_first_bit(&t, 32) : -1;
+}
/* Get low bit set out of 64-bit argument, -1 if none set */
-extern int xfs_lowbit64(__uint64_t v);
+static inline int xfs_lowbit64(__uint64_t v)
+{
+ __uint32_t w = (__uint32_t)v;
+ int n = 0;
-/* Get high bit set out of 64-bit argument, -1 if none set */
-extern int xfs_highbit64(__uint64_t);
+ if (w) { /* lower bits */
+ n = ffs(w);
+ } else { /* upper bits */
+ w = (__uint32_t)(v >> 32);
+ if (w && (n = ffs(w)))
+ n += 32;
+ }
+ return n - 1;
+}
/* Return whether bitmap is empty (1 == empty) */
extern int xfs_bitmap_empty(uint *map, uint size);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5af..a1aab9275d5a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -384,14 +384,14 @@ xfs_bmap_count_tree(
int levelin,
int *count);
-STATIC int
+STATIC void
xfs_bmap_count_leaves(
xfs_ifork_t *ifp,
xfs_extnum_t idx,
int numrecs,
int *count);
-STATIC int
+STATIC void
xfs_bmap_disk_count_leaves(
xfs_extnum_t idx,
xfs_bmbt_block_t *block,
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
cur->bc_private.b.firstblock = *firstblock;
if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
goto error0;
- ASSERT(stat == 1); /* must be at least one entry */
+ /* must be at least one entry */
+ XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
goto error0;
if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startblock, LEFT.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
new->br_startblock,
PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
*dnew = 0;
/* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startblock, LEFT.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_startblock,
RIGHT.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock, new->br_blockcount,
newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur,
PREV.br_startoff + new->br_blockcount,
PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur,
PREV.br_startoff + new->br_blockcount,
PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
cur->bc_rec.b = *new;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: One in-core extent is split in two. */
temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock,
PREV.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
PREV.br_startblock,
PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
PREV.br_startblock,
PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
new->br_startblock, new->br_blockcount,
&i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = XFS_EXT_NORM;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: One in-core extent is split in two. */
temp = PREV.br_startoff;
@@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real(
PREV.br_startblock, PREV.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
/* new right extent - oldext */
if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
r[1].br_startblock, r[1].br_blockcount,
r[1].br_state)))
goto done;
/* new left extent - oldext */
- PREV.br_blockcount =
- new->br_startoff - PREV.br_startoff;
cur->bc_rec.b = PREV;
+ cur->bc_rec.b.br_blockcount =
+ new->br_startoff - PREV.br_startoff;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
- if ((error = xfs_bmbt_increment(cur, 0, &i)))
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+ /*
+ * Reset the cursor to the position of the new extent
+ * we are about to insert as we can't trust it after
+ * the previous insert.
+ */
+ if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+ new->br_startblock, new->br_blockcount,
+ &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
/* new middle extent - newext */
- cur->bc_rec.b = *new;
+ cur->bc_rec.b.br_state = new->br_state;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: One in-core extent is split in three. */
temp = PREV.br_startoff;
@@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real(
right.br_startblock,
right.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_decrement(cur, 0, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
@@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real(
left.br_startblock,
left.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
@@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real(
right.br_startblock,
right.br_blockcount, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
if ((error = xfs_bmbt_update(cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
@@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real(
new->br_startblock,
new->br_blockcount, &i)))
goto done;
- ASSERT(i == 0);
+ XFS_WANT_CORRUPTED_GOTO(i == 0, done);
cur->bc_rec.b.br_state = new->br_state;
if ((error = xfs_bmbt_insert(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
/* DELTA: A new extent was added in a hole. */
temp = new->br_startoff;
@@ -3131,7 +3139,7 @@ xfs_bmap_del_extent(
got.br_startblock, got.br_blockcount,
&i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
da_old = da_new = 0;
} else {
@@ -3164,7 +3172,7 @@ xfs_bmap_del_extent(
}
if ((error = xfs_bmbt_delete(cur, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
break;
case 2:
@@ -3268,7 +3276,7 @@ xfs_bmap_del_extent(
got.br_startblock,
temp, &i)))
goto done;
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
/*
* Update the btree record back
* to the original value.
@@ -3289,7 +3297,7 @@ xfs_bmap_del_extent(
error = XFS_ERROR(ENOSPC);
goto done;
}
- ASSERT(i == 1);
+ XFS_WANT_CORRUPTED_GOTO(i == 1, done);
} else
flags |= XFS_ILOG_FEXT(whichfork);
XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -3992,7 +4000,7 @@ xfs_bmap_add_attrfork(
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
}
ASSERT(ip->i_d.di_anextents == 0);
- VN_HOLD(XFS_ITOV(ip));
+ IHOLD(ip);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
switch (ip->i_d.di_format) {
@@ -5970,7 +5978,7 @@ unlock_and_return:
xfs_iunlock_map_shared(ip, lock);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- kmem_free(map, subnex * sizeof(*map));
+ kmem_free(map);
return error;
}
@@ -6088,7 +6096,7 @@ xfs_bmap_get_bp(
tp = cur->bc_tp;
licp = &tp->t_items;
while (!bp && licp != NULL) {
- if (XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (xfs_lic_are_all_free(licp)) {
licp = licp->lic_next;
continue;
}
@@ -6098,11 +6106,11 @@ xfs_bmap_get_bp(
xfs_buf_log_item_t *bip;
xfs_buf_t *lbp;
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
- lidp = XFS_LIC_SLOT(licp, i);
+ lidp = xfs_lic_slot(licp, i);
lip = lidp->lid_item;
if (lip->li_type != XFS_LI_BUF)
continue;
@@ -6359,13 +6367,9 @@ xfs_bmap_count_blocks(
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
- if (unlikely(xfs_bmap_count_leaves(ifp, 0,
+ xfs_bmap_count_leaves(ifp, 0,
ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
- count) < 0)) {
- XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)",
- XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
- }
+ count);
return 0;
}
@@ -6446,13 +6450,7 @@ xfs_bmap_count_tree(
for (;;) {
nextbno = be64_to_cpu(block->bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
- if (unlikely(xfs_bmap_disk_count_leaves(0,
- block, numrecs, count) < 0)) {
- xfs_trans_brelse(tp, bp);
- XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
- XFS_ERRLEVEL_LOW, mp);
- return XFS_ERROR(EFSCORRUPTED);
- }
+ xfs_bmap_disk_count_leaves(0, block, numrecs, count);
xfs_trans_brelse(tp, bp);
if (nextbno == NULLFSBLOCK)
break;
@@ -6470,7 +6468,7 @@ xfs_bmap_count_tree(
/*
* Count leaf blocks given a range of extent records.
*/
-STATIC int
+STATIC void
xfs_bmap_count_leaves(
xfs_ifork_t *ifp,
xfs_extnum_t idx,
@@ -6483,14 +6481,13 @@ xfs_bmap_count_leaves(
xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
*count += xfs_bmbt_get_blockcount(frp);
}
- return 0;
}
/*
* Count leaf blocks given a range of extent records originally
* in btree format.
*/
-STATIC int
+STATIC void
xfs_bmap_disk_count_leaves(
xfs_extnum_t idx,
xfs_bmbt_block_t *block,
@@ -6504,5 +6501,4 @@ xfs_bmap_disk_count_leaves(
frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
*count += xfs_bmbt_disk_get_blockcount(frp);
}
- return 0;
}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451c..9f3e3a836d15 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
/*
* Header for free extent list.
+ *
+ * xbf_low is used by the allocator to activate the lowspace algorithm -
+ * when free space is running low the extent allocator may choose to
+ * allocate an extent from an AG without leaving sufficient space for
+ * a btree split when inserting the new extent. In this case the allocator
+ * will enable the lowspace algorithm which is supposed to allow further
+ * allocations (such as btree splits and newroots) to allocate from
+ * sequential AGs. In order to avoid locking AGs out of order the lowspace
+ * algorithm will start searching for free space from AG 0. If the correct
+ * transaction reservations have been made then this algorithm will eventually
+ * find all the space it needs.
*/
typedef struct xfs_bmap_free
{
xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
int xbf_count; /* count of items on list */
- int xbf_low; /* kludge: alloc in low mode */
+ int xbf_low; /* alloc in low mode */
} xfs_bmap_free_t;
#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973e..23efad29a5cd 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,27 @@ xfs_bmbt_split(
left = XFS_BUF_TO_BMBT_BLOCK(lbp);
args.fsbno = cur->bc_private.b.firstblock;
args.firstblock = args.fsbno;
+ args.minleft = 0;
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = lbno;
args.type = XFS_ALLOCTYPE_START_BNO;
- } else
+ /*
+ * Make sure there is sufficient room left in the AG to
+ * complete a full tree split for an extent insert. If
+ * we are converting the middle part of an extent then
+ * we may need space for two tree splits.
+ *
+ * We are relying on the caller to make the correct block
+ * reservation for this operation to succeed. If the
+ * reservation amount is insufficient then we may fail a
+ * block allocation here and corrupt the filesystem.
+ */
+ args.minleft = xfs_trans_get_block_res(args.tp);
+ } else if (cur->bc_private.b.flist->xbf_low)
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.mod = args.minleft = args.alignment = args.total = args.isfl =
+ args.mod = args.alignment = args.total = args.isfl =
args.userdata = args.minalignslop = 0;
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
@@ -1510,6 +1525,21 @@ xfs_bmbt_split(
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
+ if (args.fsbno == NULLFSBLOCK && args.minleft) {
+ /*
+ * Could not find an AG with enough free space to satisfy
+ * a full btree split. Try again without minleft and if
+ * successful activate the lowspace algorithm.
+ */
+ args.fsbno = 0;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.minleft = 0;
+ if ((error = xfs_alloc_vextent(&args))) {
+ XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+ return error;
+ }
+ cur->bc_private.b.flist->xbf_low = 1;
+ }
if (args.fsbno == NULLFSBLOCK) {
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
*stat = 0;
@@ -2029,22 +2059,8 @@ xfs_bmbt_increment(
* Insert the current record at the point referenced by cur.
*
* A multi-level split of the tree on insert will invalidate the original
- * cursor. It appears, however, that some callers assume that the cursor is
- * always valid. Hence if we do a multi-level split we need to revalidate the
- * cursor.
- *
- * When a split occurs, we will see a new cursor returned. Use that as a
- * trigger to determine if we need to revalidate the original cursor. If we get
- * a split, then use the original irec to lookup up the path of the record we
- * just inserted.
- *
- * Note that the fact that the btree root is in the inode means that we can
- * have the level of the tree change without a "split" occurring at the root
- * level. What happens is that the root is migrated to an allocated block and
- * the inode root is pointed to it. This means a single split can change the
- * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
- * the level change should be accounted as a split so as to correctly trigger a
- * revalidation of the old cursor.
+ * cursor. All callers of this function should assume that the cursor is
+ * no longer valid and revalidate it.
*/
int /* error */
xfs_bmbt_insert(
@@ -2057,14 +2073,11 @@ xfs_bmbt_insert(
xfs_fsblock_t nbno;
xfs_btree_cur_t *ncur;
xfs_bmbt_rec_t nrec;
- xfs_bmbt_irec_t oirec; /* original irec */
xfs_btree_cur_t *pcur;
- int splits = 0;
XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
level = 0;
nbno = NULLFSBLOCK;
- oirec = cur->bc_rec.b;
xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
ncur = NULL;
pcur = cur;
@@ -2073,13 +2086,11 @@ xfs_bmbt_insert(
&i))) {
if (pcur != cur)
xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
- goto error0;
+ XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+ return error;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
- /* allocating a new root is effectively a split */
- if (cur->bc_nlevels != pcur->bc_nlevels)
- splits++;
cur->bc_nlevels = pcur->bc_nlevels;
cur->bc_private.b.allocated +=
pcur->bc_private.b.allocated;
@@ -2093,21 +2104,10 @@ xfs_bmbt_insert(
xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
}
if (ncur) {
- splits++;
pcur = ncur;
ncur = NULL;
}
} while (nbno != NULLFSBLOCK);
-
- if (splits > 1) {
- /* revalidate the old cursor as we had a multi-level split */
- error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
- oirec.br_startblock, oirec.br_blockcount, &i);
- if (error)
- goto error0;
- ASSERT(i == 1);
- }
-
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
*stat = i;
return 0;
@@ -2254,7 +2254,9 @@ xfs_bmbt_newroot(
#endif
args.fsbno = be64_to_cpu(*pp);
args.type = XFS_ALLOCTYPE_START_BNO;
- } else
+ } else if (cur->bc_private.b.flist->xbf_low)
+ args.type = XFS_ALLOCTYPE_START_BNO;
+ else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
if ((error = xfs_alloc_vextent(&args))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index aeb87ca69fcc..cc593a84c345 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -46,38 +46,11 @@ kmem_zone_t *xfs_btree_cur_zone;
/*
* Btree magic numbers.
*/
-const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
-{
+const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
};
/*
- * Prototypes for internal routines.
- */
-
-/*
- * Checking routine: return maxrecs for the block.
- */
-STATIC int /* number of records fitting in block */
-xfs_btree_maxrecs(
- xfs_btree_cur_t *cur, /* btree cursor */
- xfs_btree_block_t *block);/* generic btree block pointer */
-
-/*
- * Internal routines.
- */
-
-/*
- * Retrieve the block pointer from the cursor at the given level.
- * This may be a bmap btree root or from a buffer.
- */
-STATIC xfs_btree_block_t * /* generic btree block pointer */
-xfs_btree_get_block(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level, /* level in btree */
- struct xfs_buf **bpp); /* buffer containing the block */
-
-/*
* Checking routine: return maxrecs for the block.
*/
STATIC int /* number of records fitting in block */
@@ -457,35 +430,6 @@ xfs_btree_dup_cursor(
}
/*
- * Change the cursor to point to the first record at the given level.
- * Other levels are unaffected.
- */
-int /* success=1, failure=0 */
-xfs_btree_firstrec(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level) /* level to change */
-{
- xfs_btree_block_t *block; /* generic btree block pointer */
- xfs_buf_t *bp; /* buffer containing block */
-
- /*
- * Get the block pointer for this level.
- */
- block = xfs_btree_get_block(cur, level, &bp);
- xfs_btree_check_block(cur, block, level, bp);
- /*
- * It's empty, there is no such record.
- */
- if (!block->bb_h.bb_numrecs)
- return 0;
- /*
- * Set the ptr value to 1, that's the first record/key.
- */
- cur->bc_ptrs[level] = 1;
- return 1;
-}
-
-/*
* Retrieve the block pointer from the cursor at the given level.
* This may be a bmap btree root or from a buffer.
*/
@@ -626,6 +570,13 @@ xfs_btree_init_cursor(
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
break;
+ case XFS_BTNUM_INO:
+ /*
+ * Inode allocation btree fields.
+ */
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+ break;
case XFS_BTNUM_BMAP:
/*
* Bmap btree fields.
@@ -638,13 +589,6 @@ xfs_btree_init_cursor(
cur->bc_private.b.flags = 0;
cur->bc_private.b.whichfork = whichfork;
break;
- case XFS_BTNUM_INO:
- /*
- * Inode allocation btree fields.
- */
- cur->bc_private.i.agbp = agbp;
- cur->bc_private.i.agno = agno;
- break;
default:
ASSERT(0);
}
@@ -671,6 +615,35 @@ xfs_btree_islastblock(
}
/*
+ * Change the cursor to point to the first record at the given level.
+ * Other levels are unaffected.
+ */
+int /* success=1, failure=0 */
+xfs_btree_firstrec(
+ xfs_btree_cur_t *cur, /* btree cursor */
+ int level) /* level to change */
+{
+ xfs_btree_block_t *block; /* generic btree block pointer */
+ xfs_buf_t *bp; /* buffer containing block */
+
+ /*
+ * Get the block pointer for this level.
+ */
+ block = xfs_btree_get_block(cur, level, &bp);
+ xfs_btree_check_block(cur, block, level, bp);
+ /*
+ * It's empty, there is no such record.
+ */
+ if (!block->bb_h.bb_numrecs)
+ return 0;
+ /*
+ * Set the ptr value to 1, that's the first record/key.
+ */
+ cur->bc_ptrs[level] = 1;
+ return 1;
+}
+
+/*
* Change the cursor to point to the last record in the current block
* at the given level. Other levels are unaffected.
*/
@@ -890,12 +863,12 @@ xfs_btree_readahead_core(
case XFS_BTNUM_INO:
i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
be32_to_cpu(i->bb_leftsib), 1);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
+ xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
be32_to_cpu(i->bb_rightsib), 1);
rval++;
}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 7440b78f9cec..1f528a2a3754 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -158,8 +158,8 @@ typedef struct xfs_btree_cur
__uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */
xfs_btnum_t bc_btnum; /* identifies which btree type */
union {
- struct { /* needed for BNO, CNT */
- struct xfs_buf *agbp; /* agf buffer pointer */
+ struct { /* needed for BNO, CNT, INO */
+ struct xfs_buf *agbp; /* agf/agi buffer pointer */
xfs_agnumber_t agno; /* ag number */
} a;
struct { /* needed for BMAP */
@@ -172,10 +172,6 @@ typedef struct xfs_btree_cur
char flags; /* flags */
#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */
} b;
- struct { /* needed for INO */
- struct xfs_buf *agbp; /* agi buffer pointer */
- xfs_agnumber_t agno; /* ag number */
- } i;
} bc_private; /* per-btree type data */
} xfs_btree_cur_t;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025d..608c30c3f76b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -737,7 +737,7 @@ xfs_buf_item_init(
bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
bip->bli_format.blf_map_size = map_size;
#ifdef XFS_BLI_TRACE
- bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP);
+ bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_TRANS_DEBUG
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
}
#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+ kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
- kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+ kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
@@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks(
anyway. */
XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
XFS_BUF_DONE(bp);
- XFS_BUF_V_IODONESEMA(bp);
+ XFS_BUF_FINISH_IOWAIT(bp);
}
return;
}
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+ kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
- kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+ kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee224..d2ce5dd70d87 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
/* (osyncisdsync is default) */
+#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
* bits of address space */
#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563f..9e561a9cefca 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
}
if (level < 0) {
*result = XFS_ERROR(ENOENT); /* we're out of our tree */
- ASSERT(args->oknoent);
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
return(0);
}
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
}
}
+enum xfs_dacmp
+xfs_da_compname(
+ struct xfs_da_args *args,
+ const char *name,
+ int len)
+{
+ return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
+ XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
+}
+
+static xfs_dahash_t
+xfs_default_hashname(
+ struct xfs_name *name)
+{
+ return xfs_da_hashname(name->name, name->len);
+}
+
+const struct xfs_nameops xfs_default_nameops = {
+ .hashname = xfs_default_hashname,
+ .compname = xfs_da_compname
+};
+
/*
* Add a block to the btree ahead of the file.
* Return the new block number to the caller.
@@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
args->firstblock, args->total,
&mapp[mapi], &nmap, args->flist,
NULL))) {
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return error;
}
if (nmap < 1)
@@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
bno + count) {
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return XFS_ERROR(ENOSPC);
}
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
*new_blkno = (xfs_dablk_t)bno;
return 0;
}
@@ -2090,10 +2112,10 @@ xfs_da_do_buf(
}
}
if (bplist) {
- kmem_free(bplist, sizeof(*bplist) * nmap);
+ kmem_free(bplist);
}
if (mapp != &map) {
- kmem_free(mapp, sizeof(*mapp) * nfsb);
+ kmem_free(mapp);
}
if (bpp)
*bpp = rbp;
@@ -2102,11 +2124,11 @@ exit1:
if (bplist) {
for (i = 0; i < nbplist; i++)
xfs_trans_brelse(trans, bplist[i]);
- kmem_free(bplist, sizeof(*bplist) * nmap);
+ kmem_free(bplist);
}
exit0:
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * nfsb);
+ kmem_free(mapp);
if (bpp)
*bpp = NULL;
return error;
@@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
#ifdef XFS_DABUF_DEBUG
xfs_dabuf_t *xfs_dabuf_global_list;
-spinlock_t xfs_dabuf_global_lock;
+static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
#endif
/*
@@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
if (dabuf->dirty)
xfs_da_buf_clean(dabuf);
if (dabuf->nbuf > 1)
- kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
+ kmem_free(dabuf->data);
#ifdef XFS_DABUF_DEBUG
{
spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
if (dabuf->nbuf == 1)
kmem_zone_free(xfs_dabuf_zone, dabuf);
else
- kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+ kmem_free(dabuf);
}
/*
@@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
for (i = 0; i < nbuf; i++)
xfs_trans_brelse(tp, bplist[i]);
if (bplist != &bp)
- kmem_free(bplist, nbuf * sizeof(*bplist));
+ kmem_free(bplist);
}
/*
@@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
for (i = 0; i < nbuf; i++)
xfs_trans_binval(tp, bplist[i]);
if (bplist != &bp)
- kmem_free(bplist, nbuf * sizeof(*bplist));
+ kmem_free(bplist);
}
/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f9..8be0b00ede9a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
*========================================================================*/
/*
+ * Search comparison results
+ */
+enum xfs_dacmp {
+ XFS_CMP_DIFFERENT, /* names are completely different */
+ XFS_CMP_EXACT, /* names are exactly the same */
+ XFS_CMP_CASE /* names are same but differ in case */
+};
+
+/*
* Structure to ease passing around component names.
*/
typedef struct xfs_da_args {
@@ -123,13 +132,20 @@ typedef struct xfs_da_args {
int index2; /* index of 2nd attr in blk */
xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
int rmtblkcnt2; /* remote attr value block count */
- unsigned char justcheck; /* T/F: check for ok with no space */
- unsigned char rename; /* T/F: this is an atomic rename op */
- unsigned char addname; /* T/F: this is an add operation */
- unsigned char oknoent; /* T/F: ok to return ENOENT, else die */
+ int op_flags; /* operation flags */
+ enum xfs_dacmp cmpresult; /* name compare result for lookups */
} xfs_da_args_t;
/*
+ * Operation flags:
+ */
+#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */
+#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */
+#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
+#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
+#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
+
+/*
* Structure to describe buffer(s) for a block.
* This is needed in the directory version 2 format case, when
* multiple non-contiguous fsblocks might be needed to cover one
@@ -201,6 +217,14 @@ typedef struct xfs_da_state {
(uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
(uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
+/*
+ * Name ops for directory and/or attr name operations
+ */
+struct xfs_nameops {
+ xfs_dahash_t (*hashname)(struct xfs_name *);
+ enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int);
+};
+
#ifdef __KERNEL__
/*========================================================================
@@ -249,6 +273,10 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
xfs_dabuf_t *dead_buf);
uint xfs_da_hashname(const uchar_t *name_string, int name_length);
+enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
+ const char *name, int len);
+
+
xfs_da_state_t *xfs_da_state_alloc(void);
void xfs_da_state_free(xfs_da_state_t *state);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb9885..760f4c5b5160 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
out_put_file:
fput(file);
out_free_sxp:
- kmem_free(sxp, sizeof(xfs_swapext_t));
+ kmem_free(sxp);
out:
return error;
}
@@ -128,10 +128,8 @@ xfs_swap_extents(
xfs_swapext_t *sxp)
{
xfs_mount_t *mp;
- xfs_inode_t *ips[2];
xfs_trans_t *tp;
xfs_bstat_t *sbp = &sxp->sx_stat;
- bhv_vnode_t *vp, *tvp;
xfs_ifork_t *tempifp, *ifp, *tifp;
int ilf_fields, tilf_fields;
static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
@@ -150,19 +148,8 @@ xfs_swap_extents(
}
sbp = &sxp->sx_stat;
- vp = XFS_ITOV(ip);
- tvp = XFS_ITOV(tip);
-
- /* Lock in i_ino order */
- if (ip->i_ino < tip->i_ino) {
- ips[0] = ip;
- ips[1] = tip;
- } else {
- ips[0] = tip;
- ips[1] = ip;
- }
- xfs_lock_inodes(ips, 2, lock_flags);
+ xfs_lock_two_inodes(ip, tip, lock_flags);
locked = 1;
/* Verify that both files have the same format */
@@ -184,7 +171,7 @@ xfs_swap_extents(
goto error0;
}
- if (VN_CACHED(tvp) != 0) {
+ if (VN_CACHED(VFS_I(tip)) != 0) {
xfs_inval_cached_trace(tip, 0, -1, 0, -1);
error = xfs_flushinval_pages(tip, 0, -1,
FI_REMAPF_LOCKED);
@@ -193,7 +180,7 @@ xfs_swap_extents(
}
/* Verify O_DIRECT for ftmp */
- if (VN_CACHED(tvp) != 0) {
+ if (VN_CACHED(VFS_I(tip)) != 0) {
error = XFS_ERROR(EINVAL);
goto error0;
}
@@ -237,7 +224,7 @@ xfs_swap_extents(
* vop_read (or write in the case of autogrow) they block on the iolock
* until we have switched the extents.
*/
- if (VN_MAPPED(vp)) {
+ if (VN_MAPPED(VFS_I(ip))) {
error = XFS_ERROR(EBUSY);
goto error0;
}
@@ -265,7 +252,7 @@ xfs_swap_extents(
locked = 0;
goto error0;
}
- xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
/*
* Count the number of extended attribute blocks
@@ -350,15 +337,11 @@ xfs_swap_extents(
break;
}
- /*
- * Increment vnode ref counts since xfs_trans_commit &
- * xfs_trans_cancel will both unlock the inodes and
- * decrement the associated ref counts.
- */
- VN_HOLD(vp);
- VN_HOLD(tvp);
+ IHOLD(ip);
xfs_trans_ijoin(tp, ip, lock_flags);
+
+ IHOLD(tip);
xfs_trans_ijoin(tp, tip, lock_flags);
xfs_trans_log_inode(tp, ip, ilf_fields);
@@ -381,6 +364,6 @@ xfs_swap_extents(
xfs_iunlock(tip, lock_flags);
}
if (tempifp != NULL)
- kmem_free(tempifp, sizeof(xfs_ifork_t));
+ kmem_free(tempifp);
return error;
}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766b..80e0dc51361c 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,54 @@
struct xfs_name xfs_name_dotdot = {"..", 2};
+extern const struct xfs_nameops xfs_default_nameops;
+
+/*
+ * ASCII case-insensitive (ie. A-Z) support for directories that was
+ * used in IRIX.
+ */
+STATIC xfs_dahash_t
+xfs_ascii_ci_hashname(
+ struct xfs_name *name)
+{
+ xfs_dahash_t hash;
+ int i;
+
+ for (i = 0, hash = 0; i < name->len; i++)
+ hash = tolower(name->name[i]) ^ rol32(hash, 7);
+
+ return hash;
+}
+
+STATIC enum xfs_dacmp
+xfs_ascii_ci_compname(
+ struct xfs_da_args *args,
+ const char *name,
+ int len)
+{
+ enum xfs_dacmp result;
+ int i;
+
+ if (args->namelen != len)
+ return XFS_CMP_DIFFERENT;
+
+ result = XFS_CMP_EXACT;
+ for (i = 0; i < len; i++) {
+ if (args->name[i] == name[i])
+ continue;
+ if (tolower(args->name[i]) != tolower(name[i]))
+ return XFS_CMP_DIFFERENT;
+ result = XFS_CMP_CASE;
+ }
+
+ return result;
+}
+
+static struct xfs_nameops xfs_ascii_ci_nameops = {
+ .hashname = xfs_ascii_ci_hashname,
+ .compname = xfs_ascii_ci_compname,
+};
+
void
xfs_dir_mount(
xfs_mount_t *mp)
@@ -65,6 +113,10 @@ xfs_dir_mount(
(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
(uint)sizeof(xfs_da_node_entry_t);
mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+ if (xfs_sb_version_hasasciici(&mp->m_sb))
+ mp->m_dirnameops = &xfs_ascii_ci_nameops;
+ else
+ mp->m_dirnameops = &xfs_default_nameops;
}
/*
@@ -162,9 +214,10 @@ xfs_dir_createname(
return rval;
XFS_STATS_INC(xs_dir_create);
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = inum;
args.dp = dp;
args.firstblock = first;
@@ -172,8 +225,7 @@ xfs_dir_createname(
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = 0;
- args.addname = args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_addname(&args);
@@ -191,14 +243,43 @@ xfs_dir_createname(
}
/*
+ * If doing a CI lookup and case-insensitive match, dup actual name into
+ * args.value. Return EEXIST for success (ie. name found) or an error.
+ */
+int
+xfs_dir_cilookup_result(
+ struct xfs_da_args *args,
+ const char *name,
+ int len)
+{
+ if (args->cmpresult == XFS_CMP_DIFFERENT)
+ return ENOENT;
+ if (args->cmpresult != XFS_CMP_CASE ||
+ !(args->op_flags & XFS_DA_OP_CILOOKUP))
+ return EEXIST;
+
+ args->value = kmem_alloc(len, KM_MAYFAIL);
+ if (!args->value)
+ return ENOMEM;
+
+ memcpy(args->value, name, len);
+ args->valuelen = len;
+ return EEXIST;
+}
+
+/*
* Lookup a name in a directory, give back the inode number.
+ * If ci_name is not NULL, returns the actual name in ci_name if it differs
+ * to name, or ci_name->name is set to NULL for an exact match.
*/
+
int
xfs_dir_lookup(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name,
- xfs_ino_t *inum) /* out: inode number */
+ xfs_ino_t *inum, /* out: inode number */
+ struct xfs_name *ci_name) /* out: actual name if CI match */
{
xfs_da_args_t args;
int rval;
@@ -206,15 +287,17 @@ xfs_dir_lookup(
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
XFS_STATS_INC(xs_dir_lookup);
- memset(&args, 0, sizeof(xfs_da_args_t));
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.dp = dp;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_OKNOENT;
+ if (ci_name)
+ args.op_flags |= XFS_DA_OP_CILOOKUP;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_lookup(&args);
@@ -230,8 +313,13 @@ xfs_dir_lookup(
rval = xfs_dir2_node_lookup(&args);
if (rval == EEXIST)
rval = 0;
- if (rval == 0)
+ if (!rval) {
*inum = args.inumber;
+ if (ci_name) {
+ ci_name->name = args.value;
+ ci_name->len = args.valuelen;
+ }
+ }
return rval;
}
@@ -255,9 +343,10 @@ xfs_dir_removename(
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
XFS_STATS_INC(xs_dir_remove);
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = ino;
args.dp = dp;
args.firstblock = first;
@@ -265,7 +354,6 @@ xfs_dir_removename(
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = args.addname = args.oknoent = 0;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_removename(&args);
@@ -338,9 +426,10 @@ xfs_dir_replace(
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = inum;
args.dp = dp;
args.firstblock = first;
@@ -348,7 +437,6 @@ xfs_dir_replace(
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = args.addname = args.oknoent = 0;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_replace(&args);
@@ -384,15 +472,16 @@ xfs_dir_canenter(
return 0;
ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
- memset(&args, 0, sizeof(xfs_da_args_t));
+ memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
args.namelen = name->len;
- args.hashval = xfs_da_hashname(name->name, name->len);
+ args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.dp = dp;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
- args.justcheck = args.addname = args.oknoent = 1;
+ args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+ XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_addname(&args);
@@ -493,7 +582,7 @@ xfs_dir2_grow_inode(
args->firstblock, args->total,
&mapp[mapi], &nmap, args->flist,
NULL))) {
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return error;
}
if (nmap < 1)
@@ -525,14 +614,14 @@ xfs_dir2_grow_inode(
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
bno + count) {
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
return XFS_ERROR(ENOSPC);
}
/*
* Done with the temporary mapping table.
*/
if (mapp != &map)
- kmem_free(mapp, sizeof(*mapp) * count);
+ kmem_free(mapp);
*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
/*
* Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029f..1d9ef96f33aa 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_fsblock_t *first,
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name, xfs_ino_t *inum);
+ struct xfs_name *name, xfs_ino_t *inum,
+ struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t ino,
xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_dabuf *bp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
+ int len);
+
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b3..e2fa0a1d8e96 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
/*
* If this isn't a real add, we're done with the buffer.
*/
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
xfs_da_brelse(tp, bp);
/*
* If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
* Not trying to actually do anything, or don't have
* a space reservation: return no-space.
*/
- if (args->justcheck || args->total == 0)
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
return XFS_ERROR(ENOSPC);
/*
* Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
/*
* Just checking, and it would work, so say so.
*/
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
needlog = needscan = 0;
/*
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
/*
* Get the offset from the leaf entry, to point to the data.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ dep = (xfs_dir2_data_entry_t *)((char *)block +
+ xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
/*
- * Fill in inode number, release the block.
+ * Fill in inode number, CI name if appropriate, release the block.
*/
args->inumber = be64_to_cpu(dep->inumber);
+ error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_da_brelse(args->trans, bp);
- return XFS_ERROR(EEXIST);
+ return XFS_ERROR(error);
}
/*
@@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int(
int mid; /* binary search current idx */
xfs_mount_t *mp; /* filesystem mount point */
xfs_trans_t *tp; /* transaction pointer */
+ enum xfs_dacmp cmp; /* comparison result */
dp = args->dp;
tp = args->trans;
@@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int(
else
high = mid - 1;
if (low > high) {
- ASSERT(args->oknoent);
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
xfs_da_brelse(tp, bp);
return XFS_ERROR(ENOENT);
}
@@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int(
dep = (xfs_dir2_data_entry_t *)
((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
/*
- * Compare, if it's right give back buffer & entry number.
+ * Compare name and if it's an exact match, return the index
+ * and buffer. If it's the first case-insensitive match, store
+ * the index and buffer and continue looking for an exact match.
*/
- if (dep->namelen == args->namelen &&
- dep->name[0] == args->name[0] &&
- memcmp(dep->name, args->name, args->namelen) == 0) {
+ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ args->cmpresult = cmp;
*bpp = bp;
*entno = mid;
- return 0;
+ if (cmp == XFS_CMP_EXACT)
+ return 0;
}
- } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash);
+ } while (++mid < be32_to_cpu(btp->count) &&
+ be32_to_cpu(blp[mid].hashval) == hash);
+
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+ /*
+ * Here, we can only be doing a lookup (not a rename or replace).
+ * If a case-insensitive match was found earlier, return success.
+ */
+ if (args->cmpresult == XFS_CMP_CASE)
+ return 0;
/*
* No match, release the buffer and return ENOENT.
*/
- ASSERT(args->oknoent);
xfs_da_brelse(tp, bp);
return XFS_ERROR(ENOENT);
}
@@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block(
xfs_dir2_sf_t *sfp; /* shortform structure */
__be16 *tagp; /* end of data entry */
xfs_trans_t *tp; /* transaction pointer */
+ struct xfs_name name;
xfs_dir2_trace_args("sf_to_block", args);
dp = args->dp;
@@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block(
*/
error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
if (error) {
- kmem_free(buf, buf_len);
+ kmem_free(buf);
return error;
}
/*
@@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block(
*/
error = xfs_dir2_data_init(args, blkno, &bp);
if (error) {
- kmem_free(buf, buf_len);
+ kmem_free(buf);
return error;
}
block = bp->data;
@@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block(
tagp = xfs_dir2_data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)block);
xfs_dir2_data_log_entry(tp, bp, dep);
- blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
- (char *)sfep->name, sfep->namelen));
+ name.name = sfep->name;
+ name.len = sfep->namelen;
+ blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
+ hashname(&name));
blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
(char *)dep - (char *)block));
offset = (int)((char *)(tagp + 1) - (char *)block);
@@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block(
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
}
/* Done with the temporary buffer */
- kmem_free(buf, buf_len);
+ kmem_free(buf);
/*
* Sort the leaf entries by hash value.
*/
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23d..498f8d694330 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
xfs_mount_t *mp; /* filesystem mount point */
char *p; /* current data position */
int stale; /* count of stale leaves */
+ struct xfs_name name;
mp = dp->i_mount;
d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
(xfs_dir2_data_aoff_t)
((char *)dep - (char *)d));
- hash = xfs_da_hashname((char *)dep->name, dep->namelen);
+ name.name = dep->name;
+ name.len = dep->namelen;
+ hash = mp->m_dirnameops->hashname(&name);
for (i = 0; i < be32_to_cpu(btp->count); i++) {
if (be32_to_cpu(lep[i].address) == addr &&
be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79b..93535992cb60 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
* If we don't have enough free bytes but we can make enough
* by compacting out stale entries, we'll do that.
*/
- if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes &&
- be16_to_cpu(leaf->hdr.stale) > 1) {
+ if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
+ needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
compact = 1;
}
/*
* Otherwise if we don't have enough free bytes we need to
* convert to node form.
*/
- else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
- needbytes) {
+ else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
+ leaf->hdr.count)] < needbytes) {
/*
* Just checking or no space reservation, give up.
*/
- if (args->justcheck || args->total == 0) {
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+ args->total == 0) {
xfs_da_brelse(tp, lbp);
return XFS_ERROR(ENOSPC);
}
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
* If just checking, then it will fit unless we needed to allocate
* a new data block.
*/
- if (args->justcheck) {
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_da_brelse(tp, lbp);
return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
}
@@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents(
*offset = XFS_DIR2_MAX_DATAPTR;
else
*offset = xfs_dir2_byte_to_dataptr(mp, curoff);
- kmem_free(map, map_size * sizeof(*map));
+ kmem_free(map);
if (bp)
xfs_da_brelse(NULL, bp);
return error;
@@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup(
((char *)dbp->data +
xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
/*
- * Return the found inode number.
+ * Return the found inode number & CI name if appropriate
*/
args->inumber = be64_to_cpu(dep->inumber);
+ error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_da_brelse(tp, dbp);
xfs_da_brelse(tp, lbp);
- return XFS_ERROR(EEXIST);
+ return XFS_ERROR(error);
}
/*
@@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
int *indexp, /* out: index in leaf block */
xfs_dabuf_t **dbpp) /* out: data buffer */
{
- xfs_dir2_db_t curdb; /* current data block number */
- xfs_dabuf_t *dbp; /* data buffer */
+ xfs_dir2_db_t curdb = -1; /* current data block number */
+ xfs_dabuf_t *dbp = NULL; /* data buffer */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
@@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int(
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_db_t newdb; /* new data block number */
xfs_trans_t *tp; /* transaction pointer */
+ xfs_dir2_db_t cidb = -1; /* case match data block no. */
+ enum xfs_dacmp cmp; /* name compare result */
dp = args->dp;
tp = args->trans;
@@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
/*
* Read the leaf block into the buffer.
*/
- if ((error =
- xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
- XFS_DATA_FORK))) {
+ error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+ XFS_DATA_FORK);
+ if (error)
return error;
- }
*lbpp = lbp;
leaf = lbp->data;
xfs_dir2_leaf_check(dp, lbp);
@@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int(
* Loop over all the entries with the right hash value
* looking to match the name.
*/
- for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
- index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
- lep++, index++) {
+ for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+ be32_to_cpu(lep->hashval) == args->hashval;
+ lep++, index++) {
/*
* Skip over stale leaf entries.
*/
@@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int(
if (newdb != curdb) {
if (dbp)
xfs_da_brelse(tp, dbp);
- if ((error =
- xfs_da_read_buf(tp, dp,
- xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
- XFS_DATA_FORK))) {
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, newdb),
+ -1, &dbp, XFS_DATA_FORK);
+ if (error) {
xfs_da_brelse(tp, lbp);
return error;
}
@@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int(
/*
* Point to the data entry.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)dbp->data +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
+ xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
/*
- * If it matches then return it.
+ * Compare name and if it's an exact match, return the index
+ * and buffer. If it's the first case-insensitive match, store
+ * the index and buffer and continue looking for an exact match.
*/
- if (dep->namelen == args->namelen &&
- dep->name[0] == args->name[0] &&
- memcmp(dep->name, args->name, args->namelen) == 0) {
- *dbpp = dbp;
+ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ args->cmpresult = cmp;
*indexp = index;
- return 0;
+ /* case exact match: return the current buffer. */
+ if (cmp == XFS_CMP_EXACT) {
+ *dbpp = dbp;
+ return 0;
+ }
+ cidb = curdb;
}
}
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+ /*
+ * Here, we can only be doing a lookup (not a rename or remove).
+ * If a case-insensitive match was found earlier, re-read the
+ * appropriate data block if required and return it.
+ */
+ if (args->cmpresult == XFS_CMP_CASE) {
+ ASSERT(cidb != -1);
+ if (cidb != curdb) {
+ xfs_da_brelse(tp, dbp);
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, cidb),
+ -1, &dbp, XFS_DATA_FORK);
+ if (error) {
+ xfs_da_brelse(tp, lbp);
+ return error;
+ }
+ }
+ *dbpp = dbp;
+ return 0;
+ }
/*
* No match found, return ENOENT.
*/
- ASSERT(args->oknoent);
+ ASSERT(cidb == -1);
if (dbp)
xfs_da_brelse(tp, dbp);
xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f099..fa6c3a5ddbc6 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
/*
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
}
/*
- * Look up a leaf entry in a node-format leaf block.
- * If this is an addname then the extrablk in state is a freespace block,
- * otherwise it's a data block.
+ * Look up a leaf entry for space to add a name in a node-format leaf block.
+ * The extrablk in state is a freespace block.
*/
-int
-xfs_dir2_leafn_lookup_int(
+STATIC int
+xfs_dir2_leafn_lookup_for_addname(
xfs_dabuf_t *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int *indexp, /* out: leaf entry index */
xfs_da_state_t *state) /* state to fill in */
{
- xfs_dabuf_t *curbp; /* current data/free buffer */
- xfs_dir2_db_t curdb; /* current data block number */
- xfs_dir2_db_t curfdb; /* current free block number */
- xfs_dir2_data_entry_t *dep; /* data block entry */
+ xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
+ xfs_dir2_db_t curdb = -1; /* current data block number */
+ xfs_dir2_db_t curfdb = -1; /* current free block number */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
int fi; /* free entry index */
- xfs_dir2_free_t *free=NULL; /* free block structure */
+ xfs_dir2_free_t *free = NULL; /* free block structure */
int index; /* leaf entry index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
- int length=0; /* length of new data entry */
+ int length; /* length of new data entry */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_db_t newdb; /* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
/*
* Do we have a buffer coming in?
*/
- if (state->extravalid)
+ if (state->extravalid) {
+ /* If so, it's a free block buffer, get the block number. */
curbp = state->extrablk.bp;
- else
- curbp = NULL;
- /*
- * For addname, it's a free block buffer, get the block number.
- */
- if (args->addname) {
- curfdb = curbp ? state->extrablk.blkno : -1;
- curdb = -1;
- length = xfs_dir2_data_entsize(args->namelen);
- if ((free = (curbp ? curbp->data : NULL)))
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
- }
- /*
- * For others, it's a data block buffer, get the block number.
- */
- else {
- curfdb = -1;
- curdb = curbp ? state->extrablk.blkno : -1;
+ curfdb = state->extrablk.blkno;
+ free = curbp->data;
+ ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
}
+ length = xfs_dir2_data_entsize(args->namelen);
/*
* Loop over leaf entries with the right hash value.
*/
- for (lep = &leaf->ents[index];
- index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
- lep++, index++) {
+ for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+ be32_to_cpu(lep->hashval) == args->hashval;
+ lep++, index++) {
/*
* Skip stale leaf entries.
*/
@@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int(
* For addname, we're looking for a place to put the new entry.
* We want to use a data block with an entry of equal
* hash value to ours if there is one with room.
+ *
+ * If this block isn't the data block we already have
+ * in hand, take a look at it.
*/
- if (args->addname) {
+ if (newdb != curdb) {
+ curdb = newdb;
/*
- * If this block isn't the data block we already have
- * in hand, take a look at it.
+ * Convert the data block to the free block
+ * holding its freespace information.
*/
- if (newdb != curdb) {
- curdb = newdb;
- /*
- * Convert the data block to the free block
- * holding its freespace information.
- */
- newfdb = xfs_dir2_db_to_fdb(mp, newdb);
- /*
- * If it's not the one we have in hand,
- * read it in.
- */
- if (newfdb != curfdb) {
- /*
- * If we had one before, drop it.
- */
- if (curbp)
- xfs_da_brelse(tp, curbp);
- /*
- * Read the free block.
- */
- if ((error = xfs_da_read_buf(tp, dp,
- xfs_dir2_db_to_da(mp,
- newfdb),
- -1, &curbp,
- XFS_DATA_FORK))) {
- return error;
- }
- free = curbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) ==
- XFS_DIR2_FREE_MAGIC);
- ASSERT((be32_to_cpu(free->hdr.firstdb) %
- XFS_DIR2_MAX_FREE_BESTS(mp)) ==
- 0);
- ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
- ASSERT(curdb <
- be32_to_cpu(free->hdr.firstdb) +
- be32_to_cpu(free->hdr.nvalid));
- }
- /*
- * Get the index for our entry.
- */
- fi = xfs_dir2_db_to_fdindex(mp, curdb);
- /*
- * If it has room, return it.
- */
- if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
- XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
- XFS_ERRLEVEL_LOW, mp);
- if (curfdb != newfdb)
- xfs_da_brelse(tp, curbp);
- return XFS_ERROR(EFSCORRUPTED);
- }
- curfdb = newfdb;
- if (be16_to_cpu(free->bests[fi]) >= length) {
- *indexp = index;
- state->extravalid = 1;
- state->extrablk.bp = curbp;
- state->extrablk.blkno = curfdb;
- state->extrablk.index = fi;
- state->extrablk.magic =
- XFS_DIR2_FREE_MAGIC;
- ASSERT(args->oknoent);
- return XFS_ERROR(ENOENT);
- }
- }
- }
- /*
- * Not adding a new entry, so we really want to find
- * the name given to us.
- */
- else {
+ newfdb = xfs_dir2_db_to_fdb(mp, newdb);
/*
- * If it's a different data block, go get it.
+ * If it's not the one we have in hand, read it in.
*/
- if (newdb != curdb) {
+ if (newfdb != curfdb) {
/*
- * If we had a block before, drop it.
+ * If we had one before, drop it.
*/
if (curbp)
xfs_da_brelse(tp, curbp);
/*
- * Read the data block.
+ * Read the free block.
*/
- if ((error =
- xfs_da_read_buf(tp, dp,
- xfs_dir2_db_to_da(mp, newdb), -1,
- &curbp, XFS_DATA_FORK))) {
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, newfdb),
+ -1, &curbp, XFS_DATA_FORK);
+ if (error)
return error;
- }
- xfs_dir2_data_check(dp, curbp);
- curdb = newdb;
+ free = curbp->data;
+ ASSERT(be32_to_cpu(free->hdr.magic) ==
+ XFS_DIR2_FREE_MAGIC);
+ ASSERT((be32_to_cpu(free->hdr.firstdb) %
+ XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+ ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
+ ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
+ be32_to_cpu(free->hdr.nvalid));
}
/*
- * Point to the data entry.
+ * Get the index for our entry.
*/
- dep = (xfs_dir2_data_entry_t *)
- ((char *)curbp->data +
- xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ fi = xfs_dir2_db_to_fdindex(mp, curdb);
/*
- * Compare the entry, return it if it matches.
+ * If it has room, return it.
*/
- if (dep->namelen == args->namelen &&
- dep->name[0] == args->name[0] &&
- memcmp(dep->name, args->name, args->namelen) == 0) {
- args->inumber = be64_to_cpu(dep->inumber);
- *indexp = index;
- state->extravalid = 1;
- state->extrablk.bp = curbp;
- state->extrablk.blkno = curdb;
- state->extrablk.index =
- (int)((char *)dep -
- (char *)curbp->data);
- state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
- return XFS_ERROR(EEXIST);
+ if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+ XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
+ XFS_ERRLEVEL_LOW, mp);
+ if (curfdb != newfdb)
+ xfs_da_brelse(tp, curbp);
+ return XFS_ERROR(EFSCORRUPTED);
}
+ curfdb = newfdb;
+ if (be16_to_cpu(free->bests[fi]) >= length)
+ goto out;
}
}
+ /* Didn't find any space */
+ fi = -1;
+out:
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+ if (curbp) {
+ /* Giving back a free block. */
+ state->extravalid = 1;
+ state->extrablk.bp = curbp;
+ state->extrablk.index = fi;
+ state->extrablk.blkno = curfdb;
+ state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+ } else {
+ state->extravalid = 0;
+ }
/*
- * Didn't find a match.
- * If we are holding a buffer, give it back in case our caller
- * finds it useful.
+ * Return the index, that will be the insertion point.
*/
- if ((state->extravalid = (curbp != NULL))) {
- state->extrablk.bp = curbp;
- state->extrablk.index = -1;
+ *indexp = index;
+ return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * The extrablk in state a data block.
+ */
+STATIC int
+xfs_dir2_leafn_lookup_for_entry(
+ xfs_dabuf_t *bp, /* leaf buffer */
+ xfs_da_args_t *args, /* operation arguments */
+ int *indexp, /* out: leaf entry index */
+ xfs_da_state_t *state) /* state to fill in */
+{
+ xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
+ xfs_dir2_db_t curdb = -1; /* current data block number */
+ xfs_dir2_data_entry_t *dep; /* data block entry */
+ xfs_inode_t *dp; /* incore directory inode */
+ int error; /* error return value */
+ int index; /* leaf entry index */
+ xfs_dir2_leaf_t *leaf; /* leaf structure */
+ xfs_dir2_leaf_entry_t *lep; /* leaf entry */
+ xfs_mount_t *mp; /* filesystem mount point */
+ xfs_dir2_db_t newdb; /* new data block number */
+ xfs_trans_t *tp; /* transaction pointer */
+ enum xfs_dacmp cmp; /* comparison result */
+
+ dp = args->dp;
+ tp = args->trans;
+ mp = dp->i_mount;
+ leaf = bp->data;
+ ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+#ifdef __KERNEL__
+ ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
+#endif
+ xfs_dir2_leafn_check(dp, bp);
+ /*
+ * Look up the hash value in the leaf entries.
+ */
+ index = xfs_dir2_leaf_search_hash(args, bp);
+ /*
+ * Do we have a buffer coming in?
+ */
+ if (state->extravalid) {
+ curbp = state->extrablk.bp;
+ curdb = state->extrablk.blkno;
+ }
+ /*
+ * Loop over leaf entries with the right hash value.
+ */
+ for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+ be32_to_cpu(lep->hashval) == args->hashval;
+ lep++, index++) {
/*
- * For addname, giving back a free block.
+ * Skip stale leaf entries.
*/
- if (args->addname) {
- state->extrablk.blkno = curfdb;
- state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+ if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+ continue;
+ /*
+ * Pull the data block number from the entry.
+ */
+ newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+ /*
+ * Not adding a new entry, so we really want to find
+ * the name given to us.
+ *
+ * If it's a different data block, go get it.
+ */
+ if (newdb != curdb) {
+ /*
+ * If we had a block before that we aren't saving
+ * for a CI name, drop it
+ */
+ if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
+ curdb != state->extrablk.blkno))
+ xfs_da_brelse(tp, curbp);
+ /*
+ * If needing the block that is saved with a CI match,
+ * use it otherwise read in the new data block.
+ */
+ if (args->cmpresult != XFS_CMP_DIFFERENT &&
+ newdb == state->extrablk.blkno) {
+ ASSERT(state->extravalid);
+ curbp = state->extrablk.bp;
+ } else {
+ error = xfs_da_read_buf(tp, dp,
+ xfs_dir2_db_to_da(mp, newdb),
+ -1, &curbp, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
+ xfs_dir2_data_check(dp, curbp);
+ curdb = newdb;
}
/*
- * For other callers, giving back a data block.
+ * Point to the data entry.
*/
- else {
+ dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+ xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ /*
+ * Compare the entry and if it's an exact match, return
+ * EEXIST immediately. If it's the first case-insensitive
+ * match, store the block & inode number and continue looking.
+ */
+ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ /* If there is a CI match block, drop it */
+ if (args->cmpresult != XFS_CMP_DIFFERENT &&
+ curdb != state->extrablk.blkno)
+ xfs_da_brelse(tp, state->extrablk.bp);
+ args->cmpresult = cmp;
+ args->inumber = be64_to_cpu(dep->inumber);
+ *indexp = index;
+ state->extravalid = 1;
+ state->extrablk.bp = curbp;
state->extrablk.blkno = curdb;
+ state->extrablk.index = (int)((char *)dep -
+ (char *)curbp->data);
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+ if (cmp == XFS_CMP_EXACT)
+ return XFS_ERROR(EEXIST);
}
}
- /*
- * Return the final index, that will be the insertion point.
- */
+ ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
+ (args->op_flags & XFS_DA_OP_OKNOENT));
+ if (curbp) {
+ if (args->cmpresult == XFS_CMP_DIFFERENT) {
+ /* Giving back last used data block. */
+ state->extravalid = 1;
+ state->extrablk.bp = curbp;
+ state->extrablk.index = -1;
+ state->extrablk.blkno = curdb;
+ state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+ } else {
+ /* If the curbp is not the CI match block, drop it */
+ if (state->extrablk.bp != curbp)
+ xfs_da_brelse(tp, curbp);
+ }
+ } else {
+ state->extravalid = 0;
+ }
*indexp = index;
- ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
return XFS_ERROR(ENOENT);
}
/*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+ xfs_dabuf_t *bp, /* leaf buffer */
+ xfs_da_args_t *args, /* operation arguments */
+ int *indexp, /* out: leaf entry index */
+ xfs_da_state_t *state) /* state to fill in */
+{
+ if (args->op_flags & XFS_DA_OP_ADDNAME)
+ return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
+ state);
+ return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
+}
+
+/*
* Move count leaf entries from source to destination leaf.
* Log entries and headers. Stale entries are preserved.
*/
@@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance(
*/
if (!state->inleaf)
blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
-
- /*
- * Finally sanity check just to make sure we are not returning a negative index
+
+ /*
+ * Finally sanity check just to make sure we are not returning a
+ * negative index
*/
if(blk2->index < 0) {
state->inleaf = 1;
@@ -1332,7 +1401,7 @@ xfs_dir2_node_addname(
/*
* It worked, fix the hash values up the btree.
*/
- if (!args->justcheck)
+ if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
xfs_da_fixhashpath(state, &state->path);
} else {
/*
@@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int(
/*
* Not allowed to allocate, return failure.
*/
- if (args->justcheck || args->total == 0) {
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+ args->total == 0) {
/*
* Drop the freespace buffer unless it came from our
* caller.
@@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int(
/*
* If just checking, we succeeded.
*/
- if (args->justcheck) {
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
return 0;
@@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup(
error = xfs_da_node_lookup_int(state, &rval);
if (error)
rval = error;
+ else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
+ /* If a CI match, dup the actual name and return EEXIST */
+ xfs_dir2_data_entry_t *dep;
+
+ dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
+ data + state->extrablk.index);
+ rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+ }
/*
* Release the btree blocks and leaf block.
*/
@@ -1810,9 +1888,8 @@ xfs_dir2_node_removename(
* Look up the entry we're deleting, set up the cursor.
*/
error = xfs_da_node_lookup_int(state, &rval);
- if (error) {
+ if (error)
rval = error;
- }
/*
* Didn't find it, upper layer screwed up.
*/
@@ -1829,9 +1906,8 @@ xfs_dir2_node_removename(
*/
error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
&state->extrablk, &rval);
- if (error) {
+ if (error)
return error;
- }
/*
* Fix the hash values up the btree.
*/
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1cef..b46af0013ec9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(block, mp->m_dirblksize);
+ kmem_free(block);
return error;
}
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
/*
* Just checking or no space reservation, it doesn't fit.
*/
- if (args->justcheck || args->total == 0)
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
return XFS_ERROR(ENOSPC);
/*
* Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
/*
* Just checking, it fits.
*/
- if (args->justcheck)
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
/*
* Do it the easy way - just add it at the end.
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
memcpy(sfep, oldsfep, old_isize - nbytes);
}
- kmem_free(buf, old_isize);
+ kmem_free(buf);
dp->i_d.di_size = new_isize;
xfs_dir2_sf_check(args);
}
@@ -812,8 +812,11 @@ xfs_dir2_sf_lookup(
{
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
+ int error;
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_t *sfp; /* shortform structure */
+ enum xfs_dacmp cmp; /* comparison result */
+ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
xfs_dir2_trace_args("sf_lookup", args);
xfs_dir2_sf_check(args);
@@ -836,6 +839,7 @@ xfs_dir2_sf_lookup(
*/
if (args->namelen == 1 && args->name[0] == '.') {
args->inumber = dp->i_ino;
+ args->cmpresult = XFS_CMP_EXACT;
return XFS_ERROR(EEXIST);
}
/*
@@ -844,28 +848,41 @@ xfs_dir2_sf_lookup(
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ args->cmpresult = XFS_CMP_EXACT;
return XFS_ERROR(EEXIST);
}
/*
* Loop over all the entries trying to match ours.
*/
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
- i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
- if (sfep->namelen == args->namelen &&
- sfep->name[0] == args->name[0] &&
- memcmp(args->name, sfep->name, args->namelen) == 0) {
- args->inumber =
- xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep));
- return XFS_ERROR(EEXIST);
+ ci_sfep = NULL;
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+ /*
+ * Compare name and if it's an exact match, return the inode
+ * number. If it's the first case-insensitive match, store the
+ * inode number and continue looking for an exact match.
+ */
+ cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
+ sfep->namelen);
+ if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+ args->cmpresult = cmp;
+ args->inumber = xfs_dir2_sf_get_inumber(sfp,
+ xfs_dir2_sf_inumberp(sfep));
+ if (cmp == XFS_CMP_EXACT)
+ return XFS_ERROR(EEXIST);
+ ci_sfep = sfep;
}
}
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
/*
- * Didn't find it.
+ * Here, we can only be doing a lookup (not a rename or replace).
+ * If a case-insensitive match was not found, return ENOENT.
*/
- ASSERT(args->oknoent);
- return XFS_ERROR(ENOENT);
+ if (!ci_sfep)
+ return XFS_ERROR(ENOENT);
+ /* otherwise process the CI match as required by the caller */
+ error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
+ return XFS_ERROR(error);
}
/*
@@ -904,24 +921,21 @@ xfs_dir2_sf_removename(
* Loop over the old directory entries.
* Find the one we're deleting.
*/
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
- i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
- if (sfep->namelen == args->namelen &&
- sfep->name[0] == args->name[0] &&
- memcmp(sfep->name, args->name, args->namelen) == 0) {
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+ if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+ XFS_CMP_EXACT) {
ASSERT(xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep)) ==
- args->inumber);
+ xfs_dir2_sf_inumberp(sfep)) ==
+ args->inumber);
break;
}
}
/*
* Didn't find it.
*/
- if (i == sfp->hdr.count) {
+ if (i == sfp->hdr.count)
return XFS_ERROR(ENOENT);
- }
/*
* Calculate sizes.
*/
@@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace(
*/
else {
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
- i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
- if (sfep->namelen == args->namelen &&
- sfep->name[0] == args->name[0] &&
- memcmp(args->name, sfep->name, args->namelen) == 0) {
+ i < sfp->hdr.count;
+ i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+ if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+ XFS_CMP_EXACT) {
#if XFS_BIG_INUMS || defined(DEBUG)
ino = xfs_dir2_sf_get_inumber(sfp,
xfs_dir2_sf_inumberp(sfep));
@@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace(
* Didn't find it.
*/
if (i == sfp->hdr.count) {
- ASSERT(args->oknoent);
+ ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
#if XFS_BIG_INUMS
if (i8elevated)
xfs_dir2_sf_toino4(args);
@@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4(
/*
* Clean up the inode.
*/
- kmem_free(buf, oldsize);
+ kmem_free(buf);
dp->i_d.di_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
@@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8(
/*
* Clean up the inode.
*/
- kmem_free(buf, oldsize);
+ kmem_free(buf);
dp->i_d.di_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d2..deecc9d238f8 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
* Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
* Only need 16 bits, this is the byte offset into the single block form.
*/
-typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
/*
* The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
__uint8_t count; /* count of entries */
__uint8_t i8count; /* count of 8-byte inode #s */
xfs_dir2_inou_t parent; /* parent dir inode number */
-} xfs_dir2_sf_hdr_t;
+} __arch_pack xfs_dir2_sf_hdr_t;
typedef struct xfs_dir2_sf_entry {
__uint8_t namelen; /* actual name length */
xfs_dir2_sf_off_t offset; /* saved offset */
__uint8_t name[1]; /* name, variable size */
xfs_dir2_inou_t inumber; /* inode number, var. offset */
-} xfs_dir2_sf_entry_t;
+} __arch_pack xfs_dir2_sf_entry_t;
typedef struct xfs_dir2_sf {
xfs_dir2_sf_hdr_t hdr; /* shortform header */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5c..6cc7c0c681ac 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, NULL, NULL);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ NULL, NULL);
}
void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck,
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
(void *)(bp ? bp->bps[0] : NULL), NULL);
}
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck,
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
(void *)(lbp ? lbp->bps[0] : NULL),
(void *)(dbp ? dbp->bps[0] : NULL));
}
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, (void *)(long)db,
- (void *)dbp);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ (void *)(long)db, (void *)dbp);
}
void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck,
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
(void *)((unsigned long)(i >> 32)),
(void *)((unsigned long)(i & 0xFFFFFFFF)));
}
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ (void *)(long)s, NULL);
}
void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
(void *)((unsigned long)(args->inumber >> 32)),
(void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
(void *)args->dp, (void *)args->trans,
- (void *)(unsigned long)args->justcheck, (void *)(long)s,
- (void *)dbp);
+ (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+ (void *)(long)s, (void *)dbp);
}
#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a60..2813cdd72375 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,7 +18,6 @@
#ifndef __XFS_DMAPI_H__
#define __XFS_DMAPI_H__
-#include <linux/version.h>
/* Values used to define the on-disk version of dm_attrname_t. All
* on-disk attribute names start with the 8-byte string "SGI_DMI_".
*
@@ -166,6 +165,6 @@ typedef enum {
#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
+#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c31..f227ecd1a294 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,22 +58,11 @@ xfs_error_trap(int e)
}
return e;
}
-#endif
-
-#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-void
-xfs_error_test_init(void)
-{
- memset(xfs_etest, 0, sizeof(xfs_etest));
- memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
- memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
-}
-
int
xfs_error_test(int error_tag, int *fsidp, char *expression,
int line, char *file, unsigned long randfactor)
@@ -150,8 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
xfs_etest[i]);
xfs_etest[i] = 0;
xfs_etest_fsid[i] = 0LL;
- kmem_free(xfs_etest_fsname[i],
- strlen(xfs_etest_fsname[i]) + 1);
+ kmem_free(xfs_etest_fsname[i]);
xfs_etest_fsname[i] = NULL;
}
}
@@ -163,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
return 0;
}
-#endif /* DEBUG || INDUCE_IO_ERROR */
+#endif /* DEBUG */
static void
xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
@@ -175,7 +163,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
newfmt = kmem_alloc(len, KM_SLEEP);
sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
icmn_err(level, newfmt, ap);
- kmem_free(newfmt, len);
+ kmem_free(newfmt);
} else {
icmn_err(level, fmt, ap);
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e1..11543f10b0c6 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -125,23 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
#define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10)
#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
-#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
+#ifdef DEBUG
extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-extern void xfs_error_test_init(void);
#define XFS_NUM_INJECT_ERROR 10
-
-#ifdef __ANSI_CPP__
-#define XFS_TEST_ERROR(expr, mp, tag, rf) \
- ((expr) || \
- xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \
- (rf)))
-#else
#define XFS_TEST_ERROR(expr, mp, tag, rf) \
((expr) || \
xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
(rf)))
-#endif /* __ANSI_CPP__ */
extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
@@ -149,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
#define XFS_TEST_ERROR(expr, mp, tag, rf) (expr)
#define xfs_errortag_add(tag, mp) (ENOSYS)
#define xfs_errortag_clearall(mp, loud) (ENOSYS)
-#endif /* (DEBUG || INDUCE_IO_ERROR) */
+#endif /* DEBUG */
/*
* XFS panic tags -- allow a call to xfs_cmn_err() be turned into
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb8..8aa28f751b2a 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
int nexts = efip->efi_format.efi_nextents;
if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
- kmem_free(efip, sizeof(xfs_efi_log_item_t) +
- (nexts - 1) * sizeof(xfs_extent_t));
+ kmem_free(efip);
} else {
kmem_zone_free(xfs_efi_zone, efip);
}
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
int nexts = efdp->efd_format.efd_nextents;
if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
- kmem_free(efdp, sizeof(xfs_efd_log_item_t) +
- (nexts - 1) * sizeof(xfs_extent_t));
+ kmem_free(efdp);
} else {
kmem_zone_free(xfs_efd_zone, efdp);
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b10804..f3bb75da384e 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
xfs_filestream_init(void)
{
item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+ if (!item_zone)
+ return -ENOMEM;
#ifdef XFS_FILESTREAMS_TRACE
- xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
+ xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
#endif
- return item_zone ? 0 : -ENOMEM;
+ return 0;
}
/*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d050..01c0cc88d3f3 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
@@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
typedef struct xfs_attr_multiop {
__u32 am_opcode;
+#define ATTR_OP_GET 1 /* return the indicated attr's value */
+#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
+#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
__s32 am_error;
void __user *am_attrname;
void __user *am_attrvalue;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7bc..84583cf73db3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
(xfs_sb_version_hassector(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+ (xfs_sb_version_hasasciici(&mp->m_sb) ?
+ XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
(xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
thaw_bdev(sb->s_bdev, sb);
}
-
+
break;
}
case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index e5310c90e50f..83502f3edef0 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -181,7 +181,7 @@ xfs_inobt_delrec(
* then we can get rid of this level.
*/
if (numrecs == 1 && level > 0) {
- agbp = cur->bc_private.i.agbp;
+ agbp = cur->bc_private.a.agbp;
agi = XFS_BUF_TO_AGI(agbp);
/*
* pp is still set to the first pointer in the block.
@@ -194,7 +194,7 @@ xfs_inobt_delrec(
* Free the block.
*/
if ((error = xfs_free_extent(cur->bc_tp,
- XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
+ XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
return error;
xfs_trans_binval(cur->bc_tp, bp);
xfs_ialloc_log_agi(cur->bc_tp, agbp,
@@ -379,7 +379,7 @@ xfs_inobt_delrec(
rrecs = be16_to_cpu(right->bb_numrecs);
rbp = bp;
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.i.agno, lbno, 0, &lbp,
+ cur->bc_private.a.agno, lbno, 0, &lbp,
XFS_INO_BTREE_REF)))
return error;
left = XFS_BUF_TO_INOBT_BLOCK(lbp);
@@ -401,7 +401,7 @@ xfs_inobt_delrec(
lrecs = be16_to_cpu(left->bb_numrecs);
lbp = bp;
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.i.agno, rbno, 0, &rbp,
+ cur->bc_private.a.agno, rbno, 0, &rbp,
XFS_INO_BTREE_REF)))
return error;
right = XFS_BUF_TO_INOBT_BLOCK(rbp);
@@ -484,7 +484,7 @@ xfs_inobt_delrec(
xfs_buf_t *rrbp;
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
- cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0,
+ cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
&rrbp, XFS_INO_BTREE_REF)))
return error;
rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
@@ -497,7 +497,7 @@ xfs_inobt_delrec(
* Free the deleting block.
*/
if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
- cur->bc_private.i.agno, rbno), 1)))
+ cur->bc_private.a.agno, rbno), 1)))
return error;
xfs_trans_binval(cur->bc_tp, rbp);
/*
@@ -854,7 +854,7 @@ xfs_inobt_lookup(
{
xfs_agi_t *agi; /* a.g. inode header */
- agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
+ agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
agno = be32_to_cpu(agi->agi_seqno);
agbno = be32_to_cpu(agi->agi_root);
}
@@ -1089,7 +1089,7 @@ xfs_inobt_lshift(
* Set up the left neighbor as "left".
*/
if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib),
+ cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
0, &lbp, XFS_INO_BTREE_REF)))
return error;
left = XFS_BUF_TO_INOBT_BLOCK(lbp);
@@ -1207,10 +1207,10 @@ xfs_inobt_newroot(
/*
* Get a block & a buffer.
*/
- agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
+ agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,
be32_to_cpu(agi->agi_root));
args.mod = args.minleft = args.alignment = args.total = args.wasdel =
args.isfl = args.userdata = args.minalignslop = 0;
@@ -1233,7 +1233,7 @@ xfs_inobt_newroot(
*/
agi->agi_root = cpu_to_be32(args.agbno);
be32_add_cpu(&agi->agi_level, 1);
- xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
+ xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
XFS_AGI_ROOT | XFS_AGI_LEVEL);
/*
* At the previous root level there are now two blocks: the old
@@ -1376,7 +1376,7 @@ xfs_inobt_rshift(
* Set up the right neighbor as "right".
*/
if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib),
+ cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
0, &rbp, XFS_INO_BTREE_REF)))
return error;
right = XFS_BUF_TO_INOBT_BLOCK(rbp);
@@ -1492,7 +1492,7 @@ xfs_inobt_split(
* Allocate the new block.
* If we can't do it, we're toast. Give up.
*/
- args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
args.mod = args.minleft = args.alignment = args.total = args.wasdel =
args.isfl = args.userdata = args.minalignslop = 0;
args.minlen = args.maxlen = args.prod = 1;
@@ -1725,7 +1725,7 @@ xfs_inobt_decrement(
agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.i.agno, agbno, 0, &bp,
+ cur->bc_private.a.agno, agbno, 0, &bp,
XFS_INO_BTREE_REF)))
return error;
lev--;
@@ -1897,7 +1897,7 @@ xfs_inobt_increment(
agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
- cur->bc_private.i.agno, agbno, 0, &bp,
+ cur->bc_private.a.agno, agbno, 0, &bp,
XFS_INO_BTREE_REF)))
return error;
lev--;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b07604b94d9f..e229e9e001c2 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -216,7 +216,14 @@ finish_inode:
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
init_waitqueue_head(&ip->i_ipin_wait);
atomic_set(&ip->i_pincount, 0);
- initnsema(&ip->i_flock, 1, "xfsfino");
+
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&ip->i_flush);
+ complete(&ip->i_flush);
if (lock_flags)
xfs_ilock(ip, lock_flags);
@@ -288,10 +295,17 @@ finish_inode:
*ipp = ip;
/*
+ * Set up the Linux with the Linux inode.
+ */
+ ip->i_vnode = inode;
+ inode->i_private = ip;
+
+ /*
* If we have a real type for an on-disk inode, we can set ops(&unlock)
* now. If it's a new inode being created, xfs_ialloc will handle it.
*/
- xfs_initialize_vnode(mp, inode, ip);
+ if (ip->i_d.di_mode != 0)
+ xfs_setup_inode(ip);
return 0;
}
@@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t *ip,
* Special iput for brand-new inodes that are still locked
*/
void
-xfs_iput_new(xfs_inode_t *ip,
- uint lock_flags)
+xfs_iput_new(
+ xfs_inode_t *ip,
+ uint lock_flags)
{
- struct inode *inode = ip->i_vnode;
+ struct inode *inode = VFS_I(ip);
xfs_itrace_entry(ip);
@@ -775,26 +790,3 @@ xfs_isilocked(
}
#endif
-/*
- * The following three routines simply manage the i_flock
- * semaphore embedded in the inode. This semaphore synchronizes
- * processes attempting to flush the in-core inode back to disk.
- */
-void
-xfs_iflock(xfs_inode_t *ip)
-{
- psema(&(ip->i_flock), PINOD|PLTWAIT);
-}
-
-int
-xfs_iflock_nowait(xfs_inode_t *ip)
-{
- return (cpsema(&(ip->i_flock)));
-}
-
-void
-xfs_ifunlock(xfs_inode_t *ip)
-{
- ASSERT(issemalocked(&(ip->i_flock)));
- vsema(&(ip->i_flock));
-}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf0..00e80df9dd9d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -580,8 +580,8 @@ xfs_iformat_extents(
xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
for (i = 0; i < nex; i++, dp++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
- ep->l0 = be64_to_cpu(get_unaligned(&dp->l0));
- ep->l1 = be64_to_cpu(get_unaligned(&dp->l1));
+ ep->l0 = get_unaligned_be64(&dp->l0);
+ ep->l1 = get_unaligned_be64(&dp->l1);
}
XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
if (whichfork != XFS_DATA_FORK ||
@@ -835,22 +835,22 @@ xfs_iread(
* Do this before xfs_iformat in case it adds entries.
*/
#ifdef XFS_INODE_TRACE
- ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP);
+ ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_BMAP_TRACE
- ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
+ ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_BMBT_TRACE
- ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
+ ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_RW_TRACE
- ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
+ ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_ILOCK_TRACE
- ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
+ ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
#endif
#ifdef XFS_DIR2_TRACE
- ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
+ ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
#endif
/*
@@ -1046,9 +1046,9 @@ xfs_ialloc(
{
xfs_ino_t ino;
xfs_inode_t *ip;
- bhv_vnode_t *vp;
uint flags;
int error;
+ timespec_t tv;
/*
* Call the space management code to pick
@@ -1077,13 +1077,12 @@ xfs_ialloc(
}
ASSERT(ip != NULL);
- vp = XFS_ITOV(ip);
ip->i_d.di_mode = (__uint16_t)mode;
ip->i_d.di_onlink = 0;
ip->i_d.di_nlink = nlink;
ASSERT(ip->i_d.di_nlink == nlink);
- ip->i_d.di_uid = current_fsuid(cr);
- ip->i_d.di_gid = current_fsgid(cr);
+ ip->i_d.di_uid = current_fsuid();
+ ip->i_d.di_gid = current_fsgid();
ip->i_d.di_projid = prid;
memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
@@ -1130,7 +1129,13 @@ xfs_ialloc(
ip->i_size = 0;
ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0);
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
+
+ nanotime(&tv);
+ ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+ ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+ ip->i_d.di_atime = ip->i_d.di_mtime;
+ ip->i_d.di_ctime = ip->i_d.di_mtime;
+
/*
* di_gen will have been taken care of in xfs_iread.
*/
@@ -1220,7 +1225,7 @@ xfs_ialloc(
xfs_trans_log_inode(tp, ip, flags);
/* now that we have an i_mode we can setup inode ops and unlock */
- xfs_initialize_vnode(tp->t_mountp, vp, ip);
+ xfs_setup_inode(ip);
*ipp = ip;
return 0;
@@ -1399,7 +1404,6 @@ xfs_itruncate_start(
xfs_fsize_t last_byte;
xfs_off_t toss_start;
xfs_mount_t *mp;
- bhv_vnode_t *vp;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
@@ -1408,7 +1412,6 @@ xfs_itruncate_start(
(flags == XFS_ITRUNC_MAYBE));
mp = ip->i_mount;
- vp = XFS_ITOV(ip);
/* wait for the completion of any pending DIOs */
if (new_size < ip->i_size)
@@ -1457,7 +1460,7 @@ xfs_itruncate_start(
#ifdef DEBUG
if (new_size == 0) {
- ASSERT(VN_CACHED(vp) == 0);
+ ASSERT(VN_CACHED(VFS_I(ip)) == 0);
}
#endif
return error;
@@ -1763,67 +1766,6 @@ xfs_itruncate_finish(
return 0;
}
-
-/*
- * xfs_igrow_start
- *
- * Do the first part of growing a file: zero any data in the last
- * block that is beyond the old EOF. We need to do this before
- * the inode is joined to the transaction to modify the i_size.
- * That way we can drop the inode lock and call into the buffer
- * cache to get the buffer mapping the EOF.
- */
-int
-xfs_igrow_start(
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- cred_t *credp)
-{
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(new_size > ip->i_size);
-
- /*
- * Zero any pages that may have been created by
- * xfs_write_file() beyond the end of the file
- * and any blocks between the old and new file sizes.
- */
- return xfs_zero_eof(ip, new_size, ip->i_size);
-}
-
-/*
- * xfs_igrow_finish
- *
- * This routine is called to extend the size of a file.
- * The inode must have both the iolock and the ilock locked
- * for update and it must be a part of the current transaction.
- * The xfs_igrow_start() function must have been called previously.
- * If the change_flag is not zero, the inode change timestamp will
- * be updated.
- */
-void
-xfs_igrow_finish(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- int change_flag)
-{
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(ip->i_transp == tp);
- ASSERT(new_size > ip->i_size);
-
- /*
- * Update the file size. Update the inode change timestamp
- * if change_flag set.
- */
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- if (change_flag)
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-}
-
-
/*
* This is called when the inode's link count goes to 0.
* We place the on-disk inode on a list in the AGI. It
@@ -2258,7 +2200,7 @@ xfs_ifree_cluster(
xfs_trans_binval(tp, bp);
}
- kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+ kmem_free(ip_found);
xfs_put_perag(mp, pag);
}
@@ -2470,7 +2412,7 @@ xfs_iroot_realloc(
(int)new_size);
memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
}
- kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+ kmem_free(ifp->if_broot);
ifp->if_broot = new_broot;
ifp->if_broot_bytes = (int)new_size;
ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2456,7 @@ xfs_idata_realloc(
if (new_size == 0) {
if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
- kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_data);
}
ifp->if_u1.if_data = NULL;
real_size = 0;
@@ -2529,7 +2471,7 @@ xfs_idata_realloc(
ASSERT(ifp->if_real_bytes != 0);
memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
new_size);
- kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_data);
ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
}
real_size = 0;
@@ -2636,7 +2578,7 @@ xfs_idestroy_fork(
ifp = XFS_IFORK_PTR(ip, whichfork);
if (ifp->if_broot != NULL) {
- kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+ kmem_free(ifp->if_broot);
ifp->if_broot = NULL;
}
@@ -2650,7 +2592,7 @@ xfs_idestroy_fork(
if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
(ifp->if_u1.if_data != NULL)) {
ASSERT(ifp->if_real_bytes != 0);
- kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_data);
ifp->if_u1.if_data = NULL;
ifp->if_real_bytes = 0;
}
@@ -2691,7 +2633,6 @@ xfs_idestroy(
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
mrfree(&ip->i_lock);
mrfree(&ip->i_iolock);
- freesema(&ip->i_flock);
#ifdef XFS_INODE_TRACE
ktrace_free(ip->i_trace);
@@ -3058,7 +2999,7 @@ xfs_iflush_cluster(
out_free:
read_unlock(&pag->pag_ici_lock);
- kmem_free(ilist, ilist_size);
+ kmem_free(ilist);
return 0;
@@ -3102,17 +3043,17 @@ cluster_corrupt_out:
* Unlocks the flush lock
*/
xfs_iflush_abort(iq);
- kmem_free(ilist, ilist_size);
+ kmem_free(ilist);
return XFS_ERROR(EFSCORRUPTED);
}
/*
* xfs_iflush() will write a modified inode's changes out to the
* inode's on disk home. The caller must have the inode lock held
- * in at least shared mode and the inode flush semaphore must be
- * held as well. The inode lock will still be held upon return from
+ * in at least shared mode and the inode flush completion must be
+ * active as well. The inode lock will still be held upon return from
* the call and the caller is free to unlock it.
- * The inode flush lock will be unlocked when the inode reaches the disk.
+ * The inode flush will be completed when the inode reaches the disk.
* The flags indicate how the inode's buffer should be written out.
*/
int
@@ -3131,7 +3072,7 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3143,8 +3084,6 @@ xfs_iflush(
* flush lock and do nothing.
*/
if (xfs_inode_clean(ip)) {
- ASSERT((iip != NULL) ?
- !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
xfs_ifunlock(ip);
return 0;
}
@@ -3296,7 +3235,7 @@ xfs_iflush_int(
#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3528,7 +3467,6 @@ xfs_iflush_all(
xfs_mount_t *mp)
{
xfs_inode_t *ip;
- bhv_vnode_t *vp;
again:
XFS_MOUNT_ILOCK(mp);
@@ -3543,14 +3481,13 @@ xfs_iflush_all(
continue;
}
- vp = XFS_ITOV_NULL(ip);
- if (!vp) {
+ if (!VFS_I(ip)) {
XFS_MOUNT_IUNLOCK(mp);
xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
goto again;
}
- ASSERT(vn_count(vp) == 0);
+ ASSERT(vn_count(VFS_I(ip)) == 0);
ip = ip->i_mnext;
} while (ip != mp->m_inodes);
@@ -3770,7 +3707,7 @@ xfs_iext_add_indirect_multi(
* (all extents past */
if (nex2) {
byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
- nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP);
+ nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
erp->er_extcount -= nex2;
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
@@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi(
erp = xfs_iext_irec_new(ifp, erp_idx);
}
memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
- kmem_free(nex2_ep, byte_diff);
+ kmem_free(nex2_ep);
erp->er_extcount += nex2;
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
}
@@ -4070,8 +4007,7 @@ xfs_iext_realloc_direct(
ifp->if_u1.if_extents =
kmem_realloc(ifp->if_u1.if_extents,
rnew_size,
- ifp->if_real_bytes,
- KM_SLEEP);
+ ifp->if_real_bytes, KM_NOFS);
}
if (rnew_size > ifp->if_real_bytes) {
memset(&ifp->if_u1.if_extents[ifp->if_bytes /
@@ -4112,7 +4048,7 @@ xfs_iext_direct_to_inline(
*/
memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
nextents * sizeof(xfs_bmbt_rec_t));
- kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_extents);
ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
ifp->if_real_bytes = 0;
}
@@ -4130,7 +4066,7 @@ xfs_iext_inline_to_direct(
xfs_ifork_t *ifp, /* inode fork pointer */
int new_size) /* number of extents in file */
{
- ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP);
+ ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
memset(ifp->if_u1.if_extents, 0, new_size);
if (ifp->if_bytes) {
memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
@@ -4162,7 +4098,7 @@ xfs_iext_realloc_indirect(
} else {
ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
kmem_realloc(ifp->if_u1.if_ext_irec,
- new_size, size, KM_SLEEP);
+ new_size, size, KM_NOFS);
}
}
@@ -4186,7 +4122,7 @@ xfs_iext_indirect_to_direct(
ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
ep = ifp->if_u1.if_ext_irec->er_extbuf;
- kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
+ kmem_free(ifp->if_u1.if_ext_irec);
ifp->if_flags &= ~XFS_IFEXTIREC;
ifp->if_u1.if_extents = ep;
ifp->if_bytes = size;
@@ -4212,7 +4148,7 @@ xfs_iext_destroy(
}
ifp->if_flags &= ~XFS_IFEXTIREC;
} else if (ifp->if_real_bytes) {
- kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+ kmem_free(ifp->if_u1.if_extents);
} else if (ifp->if_bytes) {
memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
sizeof(xfs_bmbt_rec_t));
@@ -4404,11 +4340,10 @@ xfs_iext_irec_init(
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
ASSERT(nextents <= XFS_LINEAR_EXTS);
- erp = (xfs_ext_irec_t *)
- kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
+ erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
if (nextents == 0) {
- ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+ ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
} else if (!ifp->if_real_bytes) {
xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
@@ -4456,7 +4391,7 @@ xfs_iext_irec_new(
/* Initialize new extent record */
erp = ifp->if_u1.if_ext_irec;
- erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+ erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
erp[erp_idx].er_extcount = 0;
@@ -4483,7 +4418,7 @@ xfs_iext_irec_remove(
if (erp->er_extbuf) {
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
-erp->er_extcount);
- kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
+ kmem_free(erp->er_extbuf);
}
/* Compact extent records */
erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4436,7 @@ xfs_iext_irec_remove(
xfs_iext_realloc_indirect(ifp,
nlists * sizeof(xfs_ext_irec_t));
} else {
- kmem_free(ifp->if_u1.if_ext_irec,
- sizeof(xfs_ext_irec_t));
+ kmem_free(ifp->if_u1.if_ext_irec);
}
ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
}
@@ -4571,7 +4505,7 @@ xfs_iext_irec_compact_pages(
* so er_extoffs don't get modified in
* xfs_iext_irec_remove.
*/
- kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
+ kmem_free(erp_next->er_extbuf);
erp_next->er_extbuf = NULL;
xfs_iext_irec_remove(ifp, erp_idx + 1);
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4596,40 +4530,63 @@ xfs_iext_irec_compact_full(
int nlists; /* number of irec's (ex lists) */
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+
nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
erp = ifp->if_u1.if_ext_irec;
ep = &erp->er_extbuf[erp->er_extcount];
erp_next = erp + 1;
ep_next = erp_next->er_extbuf;
+
while (erp_idx < nlists - 1) {
+ /*
+ * Check how many extent records are available in this irec.
+ * If there is none skip the whole exercise.
+ */
ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
- ext_diff = MIN(ext_avail, erp_next->er_extcount);
- memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
- erp->er_extcount += ext_diff;
- erp_next->er_extcount -= ext_diff;
- /* Remove next page */
- if (erp_next->er_extcount == 0) {
+ if (ext_avail) {
+
/*
- * Free page before removing extent record
- * so er_extoffs don't get modified in
- * xfs_iext_irec_remove.
+ * Copy over as many as possible extent records into
+ * the previous page.
*/
- kmem_free(erp_next->er_extbuf,
- erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
- erp_next->er_extbuf = NULL;
- xfs_iext_irec_remove(ifp, erp_idx + 1);
- erp = &ifp->if_u1.if_ext_irec[erp_idx];
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
- /* Update next page */
- } else {
- /* Move rest of page up to become next new page */
- memmove(erp_next->er_extbuf, ep_next,
- erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
- ep_next = erp_next->er_extbuf;
- memset(&ep_next[erp_next->er_extcount], 0,
- (XFS_LINEAR_EXTS - erp_next->er_extcount) *
- sizeof(xfs_bmbt_rec_t));
+ ext_diff = MIN(ext_avail, erp_next->er_extcount);
+ memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
+ erp->er_extcount += ext_diff;
+ erp_next->er_extcount -= ext_diff;
+
+ /*
+ * If the next irec is empty now we can simply
+ * remove it.
+ */
+ if (erp_next->er_extcount == 0) {
+ /*
+ * Free page before removing extent record
+ * so er_extoffs don't get modified in
+ * xfs_iext_irec_remove.
+ */
+ kmem_free(erp_next->er_extbuf);
+ erp_next->er_extbuf = NULL;
+ xfs_iext_irec_remove(ifp, erp_idx + 1);
+ erp = &ifp->if_u1.if_ext_irec[erp_idx];
+ nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+ /*
+ * If the next irec is not empty move up the content
+ * that has not been copied to the previous page to
+ * the beggining of this one.
+ */
+ } else {
+ memmove(erp_next->er_extbuf, &ep_next[ext_diff],
+ erp_next->er_extcount *
+ sizeof(xfs_bmbt_rec_t));
+ ep_next = erp_next->er_extbuf;
+ memset(&ep_next[erp_next->er_extcount], 0,
+ (XFS_LINEAR_EXTS -
+ erp_next->er_extcount) *
+ sizeof(xfs_bmbt_rec_t));
+ }
}
+
if (erp->er_extcount == XFS_LINEAR_EXTS) {
erp_idx++;
if (erp_idx < nlists)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f03..1420c49674d7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -87,8 +87,7 @@ typedef struct xfs_ifork {
* Flags for xfs_ichgtime().
*/
#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
-#define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */
-#define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */
+#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
/*
* Per-fork incore inode flags.
@@ -204,7 +203,7 @@ typedef struct xfs_inode {
struct xfs_inode *i_mprev; /* ptr to prev inode */
struct xfs_mount *i_mount; /* fs mount struct ptr */
struct list_head i_reclaim; /* reclaim list */
- bhv_vnode_t *i_vnode; /* vnode backpointer */
+ struct inode *i_vnode; /* vnode backpointer */
struct xfs_dquot *i_udquot; /* user dquot */
struct xfs_dquot *i_gdquot; /* group dquot */
@@ -223,7 +222,7 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
- sema_t i_flock; /* inode flush lock */
+ struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */
@@ -263,6 +262,18 @@ typedef struct xfs_inode {
#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
(ip)->i_size : (ip)->i_d.di_size;
+/* Convert from vfs inode to xfs inode */
+static inline struct xfs_inode *XFS_I(struct inode *inode)
+{
+ return (struct xfs_inode *)inode->i_private;
+}
+
+/* convert from xfs inode to vfs inode */
+static inline struct inode *VFS_I(struct xfs_inode *ip)
+{
+ return (struct inode *)ip->i_vnode;
+}
+
/*
* i_flags helper functions
*/
@@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
#define XFS_ITRUNC_DEFINITE 0x1
#define XFS_ITRUNC_MAYBE 0x2
-#define XFS_ITOV(ip) ((ip)->i_vnode)
-#define XFS_ITOV_NULL(ip) ((ip)->i_vnode)
-
/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
@@ -473,11 +481,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
void xfs_ilock_demote(xfs_inode_t *, uint);
int xfs_isilocked(xfs_inode_t *, uint);
-void xfs_iflock(xfs_inode_t *);
-int xfs_iflock_nowait(xfs_inode_t *);
uint xfs_ilock_map_shared(xfs_inode_t *);
void xfs_iunlock_map_shared(xfs_inode_t *, uint);
-void xfs_ifunlock(xfs_inode_t *);
void xfs_ireclaim(xfs_inode_t *);
int xfs_finish_reclaim(xfs_inode_t *, int, int);
int xfs_finish_reclaim_all(struct xfs_mount *, int);
@@ -507,9 +512,6 @@ int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
xfs_fsize_t, int, int);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
-void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
- xfs_fsize_t, int);
void xfs_idestroy_fork(xfs_inode_t *, int);
void xfs_idestroy(xfs_inode_t *);
@@ -525,6 +527,7 @@ void xfs_iflush_all(struct xfs_mount *);
void xfs_ichgtime(xfs_inode_t *, int);
xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
+void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
void xfs_synchronize_atime(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
@@ -573,6 +576,26 @@ extern struct kmem_zone *xfs_ifork_zone;
extern struct kmem_zone *xfs_inode_zone;
extern struct kmem_zone *xfs_ili_zone;
+/*
+ * Manage the i_flush queue embedded in the inode. This completion
+ * queue synchronizes processes attempting to flush the in-core
+ * inode back to disk.
+ */
+static inline void xfs_iflock(xfs_inode_t *ip)
+{
+ wait_for_completion(&ip->i_flush);
+}
+
+static inline int xfs_iflock_nowait(xfs_inode_t *ip)
+{
+ return try_wait_for_completion(&ip->i_flush);
+}
+
+static inline void xfs_ifunlock(xfs_inode_t *ip)
+{
+ complete(&ip->i_flush);
+}
+
#endif /* __KERNEL__ */
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f15772..97c7452e2620 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
ASSERT(ip->i_d.di_nextents > 0);
ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
ASSERT(ip->i_df.if_bytes > 0);
- kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
+ kmem_free(iip->ili_extents_buf);
iip->ili_extents_buf = NULL;
}
if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
ASSERT(ip->i_d.di_anextents > 0);
ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
ASSERT(ip->i_afp->if_bytes > 0);
- kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
+ kmem_free(iip->ili_aextents_buf);
iip->ili_aextents_buf = NULL;
}
@@ -779,11 +779,10 @@ xfs_inode_item_pushbuf(
ASSERT(iip->ili_push_owner == current_pid());
/*
- * If flushlock isn't locked anymore, chances are that the
- * inode flush completed and the inode was taken off the AIL.
- * So, just get out.
+ * If a flush is not in progress anymore, chances are that the
+ * inode was taken off the AIL. So, just get out.
*/
- if (!issemalocked(&(ip->i_flock)) ||
+ if (completion_done(&ip->i_flush) ||
((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
iip->ili_pushbuf_flag = 0;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -805,7 +804,7 @@ xfs_inode_item_pushbuf(
* If not, we can flush it async.
*/
dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
- issemalocked(&(ip->i_flock)));
+ !completion_done(&ip->i_flush));
iip->ili_pushbuf_flag = 0;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_buftrace("INODE ITEM PUSH", bp);
@@ -858,7 +857,7 @@ xfs_inode_item_push(
ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
/*
* Since we were able to lock the inode's flush lock and
* we found it on the AIL, the inode must be dirty. This
@@ -957,8 +956,7 @@ xfs_inode_item_destroy(
{
#ifdef XFS_TRANS_DEBUG
if (ip->i_itemp->ili_root_size != 0) {
- kmem_free(ip->i_itemp->ili_orig_root,
- ip->i_itemp->ili_root_size);
+ kmem_free(ip->i_itemp->ili_orig_root);
}
#endif
kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1a..67f22b2b44b3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
+ /*
+ * Reserve enough blocks in this transaction for two complete extent
+ * btree splits. We may be converting the middle part of an unwritten
+ * extent and in this case we will insert two new extents in the btree
+ * each of which could cause a full split.
+ *
+ * This reservation amount will be used in the first call to
+ * xfs_bmbt_split() to select an AG with enough space to satisfy the
+ * rest of the operation.
+ */
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
do {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb43..cf6754a3c5b3 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -59,7 +59,6 @@ xfs_bulkstat_one_iget(
{
xfs_icdinode_t *dic; /* dinode core info pointer */
xfs_inode_t *ip; /* incore inode pointer */
- bhv_vnode_t *vp;
int error;
error = xfs_iget(mp, NULL, ino,
@@ -72,7 +71,6 @@ xfs_bulkstat_one_iget(
ASSERT(ip != NULL);
ASSERT(ip->i_blkno != (xfs_daddr_t)0);
- vp = XFS_ITOV(ip);
dic = &ip->i_d;
/* xfs_iget returns the following without needing
@@ -85,7 +83,7 @@ xfs_bulkstat_one_iget(
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
- vn_atime_to_bstime(vp, &buf->bs_atime);
+ vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime);
buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
@@ -257,7 +255,7 @@ xfs_bulkstat_one(
*ubused = error;
out_free:
- kmem_free(buf, sizeof(*buf));
+ kmem_free(buf);
return error;
}
@@ -708,7 +706,7 @@ xfs_bulkstat(
/*
* Done, we're either out of filesystem or space to put the data.
*/
- kmem_free(irbuf, irbsize);
+ kmem_free(irbuf);
*ubcountp = ubelem;
/*
* Found some inodes, return them now and return the error next time.
@@ -914,7 +912,7 @@ xfs_inumbers(
}
*lastino = XFS_AGINO_TO_INO(mp, agno, agino);
}
- kmem_free(buffer, bcount * sizeof(*buffer));
+ kmem_free(buffer);
if (cur)
xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe31..ccba14eb9dbe 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -160,7 +160,7 @@ void
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
{
if (!iclog->ic_trace)
- iclog->ic_trace = ktrace_alloc(256, KM_SLEEP);
+ iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
ktrace_enter(iclog->ic_trace,
(void *)((unsigned long)state),
(void *)((unsigned long)current_pid()),
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
static void
xlog_grant_add_space_write(struct log *log, int bytes)
{
- log->l_grant_write_bytes += bytes;
- if (log->l_grant_write_bytes > log->l_logsize) {
- log->l_grant_write_bytes -= log->l_logsize;
+ int tmp = log->l_logsize - log->l_grant_write_bytes;
+ if (tmp > bytes)
+ log->l_grant_write_bytes += bytes;
+ else {
log->l_grant_write_cycle++;
+ log->l_grant_write_bytes = bytes - tmp;
}
}
static void
xlog_grant_add_space_reserve(struct log *log, int bytes)
{
- log->l_grant_reserve_bytes += bytes;
- if (log->l_grant_reserve_bytes > log->l_logsize) {
- log->l_grant_reserve_bytes -= log->l_logsize;
+ int tmp = log->l_logsize - log->l_grant_reserve_bytes;
+ if (tmp > bytes)
+ log->l_grant_reserve_bytes += bytes;
+ else {
log->l_grant_reserve_cycle++;
+ log->l_grant_reserve_bytes = bytes - tmp;
}
}
@@ -332,15 +336,12 @@ xfs_log_done(xfs_mount_t *mp,
} else {
xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
xlog_regrant_reserve_log_space(log, ticket);
- }
-
- /* If this ticket was a permanent reservation and we aren't
- * trying to release it, reset the inited flags; so next time
- * we write, a start record will be written out.
- */
- if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) &&
- (flags & XFS_LOG_REL_PERM_RESERV) == 0)
+ /* If this ticket was a permanent reservation and we aren't
+ * trying to release it, reset the inited flags; so next time
+ * we write, a start record will be written out.
+ */
ticket->t_flags |= XLOG_TIC_INITED;
+ }
return lsn;
} /* xfs_log_done */
@@ -353,11 +354,11 @@ xfs_log_done(xfs_mount_t *mp,
* Asynchronous forces are implemented by setting the WANT_SYNC
* bit in the appropriate in-core log and then returning.
*
- * Synchronous forces are implemented with a semaphore. All callers
- * to force a given lsn to disk will wait on a semaphore attached to the
+ * Synchronous forces are implemented with a signal variable. All callers
+ * to force a given lsn to disk will wait on a the sv attached to the
* specific in-core log. When given in-core log finally completes its
* write to disk, that thread will wake up all threads waiting on the
- * semaphore.
+ * sv.
*/
int
_xfs_log_force(
@@ -584,12 +585,12 @@ error:
* mp - ubiquitous xfs mount point structure
*/
int
-xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags)
+xfs_log_mount_finish(xfs_mount_t *mp)
{
int error;
if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
- error = xlog_recover_finish(mp->m_log, mfsi_flags);
+ error = xlog_recover_finish(mp->m_log);
else {
error = 0;
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -703,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -744,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -834,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
@@ -855,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
@@ -1228,7 +1229,7 @@ xlog_alloc_log(xfs_mount_t *mp,
spin_lock_init(&log->l_icloglock);
spin_lock_init(&log->l_grant_lock);
- initnsema(&log->l_flushsema, 0, "ic-flush");
+ sv_init(&log->l_flush_wait, 0, "flush_wait");
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1281,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write");
+ sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
+ sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
iclogp = &iclog->ic_next;
}
@@ -1561,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_forcesema);
- sv_destroy(&iclog->ic_writesema);
+ sv_destroy(&iclog->ic_force_wait);
+ sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
#ifdef XFS_LOG_TRACE
if (iclog->ic_trace != NULL) {
@@ -1570,10 +1571,9 @@ xlog_dealloc_log(xlog_t *log)
}
#endif
next_iclog = iclog->ic_next;
- kmem_free(iclog, sizeof(xlog_in_core_t));
+ kmem_free(iclog);
iclog = next_iclog;
}
- freesema(&log->l_flushsema);
spinlock_destroy(&log->l_icloglock);
spinlock_destroy(&log->l_grant_lock);
@@ -1587,7 +1587,7 @@ xlog_dealloc_log(xlog_t *log)
}
#endif
log->l_mp->m_log = NULL;
- kmem_free(log, sizeof(xlog_t));
+ kmem_free(log);
} /* xlog_dealloc_log */
/*
@@ -1973,7 +1973,7 @@ xlog_write(xfs_mount_t * mp,
/* Clean iclogs starting from the head. This ordering must be
* maintained, so an iclog doesn't become ACTIVE beyond one that
* is SYNCING. This is also required to maintain the notion that we use
- * a counting semaphore to hold off would be writers to the log when every
+ * a ordered wait queue to hold off would be writers to the log when every
* iclog is trying to sync to disk.
*
* State Change: DIRTY -> ACTIVE
@@ -2097,6 +2097,7 @@ xlog_state_do_callback(
int funcdidcallbacks; /* flag: function did callbacks */
int repeats; /* for issuing console warnings if
* looping too many times */
+ int wake = 0;
spin_lock(&log->l_icloglock);
first_iclog = iclog = log->l_iclog;
@@ -2236,7 +2237,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_forcesema);
+ sv_broadcast(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2278,15 +2279,13 @@ xlog_state_do_callback(
}
#endif
- flushcnt = 0;
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
- flushcnt = log->l_flushcnt;
- log->l_flushcnt = 0;
- }
+ if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
+ wake = 1;
spin_unlock(&log->l_icloglock);
- while (flushcnt--)
- vsema(&log->l_flushsema);
-} /* xlog_state_do_callback */
+
+ if (wake)
+ sv_broadcast(&log->l_flush_wait);
+}
/*
@@ -2300,8 +2299,7 @@ xlog_state_do_callback(
* the second completion goes through.
*
* Callbacks could take time, so they are done outside the scope of the
- * global state machine log lock. Assume that the calls to cvsema won't
- * take a long time. At least we know it won't sleep.
+ * global state machine log lock.
*/
STATIC void
xlog_state_done_syncing(
@@ -2337,7 +2335,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_writesema);
+ sv_broadcast(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2345,11 +2343,9 @@ xlog_state_done_syncing(
/*
* If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
- * sleep. The flush semaphore is set to the number of in-core buffers and
- * decremented around disk syncing. Therefore, if all buffers are syncing,
- * this semaphore will cause new writes to sleep until a sync completes.
- * Otherwise, this code just does p() followed by v(). This approximates
- * a sleep/wakeup except we can't race.
+ * sleep. We wait on the flush queue on the head iclog as that should be
+ * the first iclog to complete flushing. Hence if all iclogs are syncing,
+ * we will wait here and all new writes will sleep until a sync completes.
*
* The in-core logs are used in a circular fashion. They are not used
* out-of-order even when an iclog past the head is free.
@@ -2384,16 +2380,15 @@ restart:
}
iclog = log->l_iclog;
- if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
- log->l_flushcnt++;
- spin_unlock(&log->l_icloglock);
+ if (iclog->ic_state != XLOG_STATE_ACTIVE) {
xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
XFS_STATS_INC(xs_log_noiclogs);
- /* Ensure that log writes happen */
- psema(&log->l_flushsema, PINOD);
+
+ /* Wait for log writes to have flushed */
+ sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
goto restart;
}
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+
head = &iclog->ic_header;
atomic_inc(&iclog->ic_refcnt); /* prevents sync */
@@ -2507,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log,
goto error_return;
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/*
* If we got an error, and the filesystem is shutting down,
* we'll catch it down below. So just continue...
@@ -2533,7 +2528,7 @@ redo:
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
@@ -2632,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t *log,
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_sema);
+ sv_signal(&ntic->t_wait);
ntic = ntic->t_next;
} while (ntic != log->l_write_headq);
@@ -2643,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT,
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT,
&log->l_grant_lock, s);
/* If we're shutting down, this tic is already
@@ -2672,7 +2667,7 @@ redo:
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
xlog_ins_ticketq(&log->l_write_headq, tic);
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/* If we're shutting down, this tic is already off the queue */
if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2915,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t *log,
* 2. the current iclog is drity, and the previous iclog is in the
* active or dirty state.
*
- * We may sleep (call psema) if:
+ * We may sleep if:
*
* 1. the current iclog is not in the active nor dirty state.
* 2. the current iclog dirty, and the previous iclog is not in the
@@ -3012,7 +3007,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3095,7 +3090,7 @@ try_again:
XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
+ sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
&log->l_icloglock, s);
*log_flushed = 1;
already_slept = 1;
@@ -3115,7 +3110,7 @@ try_again:
!(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
/*
- * Don't wait on the forcesema if we know that we've
+ * Don't wait on completion if we know that we've
* gotten a log write error.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3123,7 +3118,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3179,7 +3174,7 @@ STATIC void
xlog_ticket_put(xlog_t *log,
xlog_ticket_t *ticket)
{
- sv_destroy(&ticket->t_sema);
+ sv_destroy(&ticket->t_wait);
kmem_zone_free(xfs_log_ticket_zone, ticket);
} /* xlog_ticket_put */
@@ -3269,7 +3264,7 @@ xlog_ticket_get(xlog_t *log,
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
+ sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
xlog_tic_reset_res(tic);
@@ -3556,14 +3551,14 @@ xfs_log_force_umount(
*/
if ((tic = log->l_reserve_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
if ((tic = log->l_write_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d1d678ecb63e..d47b91f10822 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -149,7 +149,7 @@ int xfs_log_mount(struct xfs_mount *mp,
struct xfs_buftarg *log_target,
xfs_daddr_t start_block,
int num_bblocks);
-int xfs_log_mount_finish(struct xfs_mount *mp, int);
+int xfs_log_mount_finish(struct xfs_mount *mp);
void xfs_log_move_tail(struct xfs_mount *mp,
xfs_lsn_t tail_lsn);
int xfs_log_notify(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f3..c8a5b22ee3e3 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -241,7 +241,7 @@ typedef struct xlog_res {
} xlog_res_t;
typedef struct xlog_ticket {
- sv_t t_sema; /* sleep on this semaphore : 20 */
+ sv_t t_wait; /* ticket wait queue : 20 */
struct xlog_ticket *t_next; /* :4|8 */
struct xlog_ticket *t_prev; /* :4|8 */
xlog_tid_t t_tid; /* transaction identifier : 4 */
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {
* xlog_rec_header_t into the reserved space.
* - ic_data follows, so a write to disk can start at the beginning of
* the iclog.
- * - ic_forcesema is used to implement synchronous forcing of the iclog to disk.
+ * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
* - ic_next is the pointer to the next iclog in the ring.
* - ic_bp is a pointer to the buffer used to write this incore log to disk.
* - ic_log is a pointer back to the global log structure.
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {
* and move everything else out to subsequent cachelines.
*/
typedef struct xlog_iclog_fields {
- sv_t ic_forcesema;
- sv_t ic_writesema;
+ sv_t ic_force_wait;
+ sv_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp;
@@ -377,8 +377,8 @@ typedef struct xlog_in_core {
/*
* Defines to save our code from this glop.
*/
-#define ic_forcesema hic_fields.ic_forcesema
-#define ic_writesema hic_fields.ic_writesema
+#define ic_force_wait hic_fields.ic_force_wait
+#define ic_write_wait hic_fields.ic_write_wait
#define ic_next hic_fields.ic_next
#define ic_prev hic_fields.ic_prev
#define ic_bp hic_fields.ic_bp
@@ -423,10 +423,8 @@ typedef struct log {
int l_logBBsize; /* size of log in BB chunks */
/* The following block of fields are changed while holding icloglock */
- sema_t l_flushsema ____cacheline_aligned_in_smp;
- /* iclog flushing semaphore */
- int l_flushcnt; /* # of procs waiting on this
- * sema */
+ sv_t l_flush_wait ____cacheline_aligned_in_smp;
+ /* waiting for iclog flush */
int l_covered_state;/* state of "covering disk
* log entries" */
xlog_in_core_t *l_iclog; /* head log queue */
@@ -470,7 +468,7 @@ extern int xlog_find_tail(xlog_t *log,
xfs_daddr_t *head_blk,
xfs_daddr_t *tail_blk);
extern int xlog_recover(xlog_t *log);
-extern int xlog_recover_finish(xlog_t *log, int mfsi_flags);
+extern int xlog_recover_finish(xlog_t *log);
extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
extern void xlog_recover_process_iunlinks(xlog_t *log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af0955..82d46ce69d5f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
} else {
prevp->bc_next = bcp->bc_next;
}
- kmem_free(bcp,
- sizeof(xfs_buf_cancel_t));
+ kmem_free(bcp);
}
}
return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
error:
if (need_free)
- kmem_free(in_f, sizeof(*in_f));
+ kmem_free(in_f);
return XFS_ERROR(error);
}
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
item = item->ri_next;
/* Free the regions in the item. */
for (i = 0; i < free_item->ri_cnt; i++) {
- kmem_free(free_item->ri_buf[i].i_addr,
- free_item->ri_buf[i].i_len);
+ kmem_free(free_item->ri_buf[i].i_addr);
}
/* Free the item itself */
- kmem_free(free_item->ri_buf,
- (free_item->ri_total * sizeof(xfs_log_iovec_t)));
- kmem_free(free_item, sizeof(xlog_recover_item_t));
+ kmem_free(free_item->ri_buf);
+ kmem_free(free_item);
} while (first_item != item);
/* Free the transaction recover structure */
- kmem_free(trans, sizeof(xlog_recover_t));
+ kmem_free(trans);
}
STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1);
if (error != 0) {
- kmem_free(log->l_buf_cancel_table,
- XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+ kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error;
}
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
}
#endif /* DEBUG */
- kmem_free(log->l_buf_cancel_table,
- XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
+ kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
return error;
@@ -3945,8 +3940,7 @@ xlog_recover(
*/
int
xlog_recover_finish(
- xlog_t *log,
- int mfsi_flags)
+ xlog_t *log)
{
/*
* Now we're ready to do the transactions needed for the
@@ -3974,9 +3968,7 @@ xlog_recover_finish(
xfs_log_force(log->l_mp, (xfs_lsn_t)0,
(XFS_LOG_FORCE | XFS_LOG_SYNC));
- if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) {
- xlog_recover_process_iunlinks(log);
- }
+ xlog_recover_process_iunlinks(log);
xlog_recover_check_summary(log);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b71..a4503f5e9497 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,12 +47,10 @@
STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
STATIC int xfs_uuid_mount(xfs_mount_t *);
-STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
STATIC void xfs_unmountfs_wait(xfs_mount_t *);
#ifdef HAVE_PERCPU_SB
-STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
int);
STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -63,7 +61,6 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
#else
-#define xfs_icsb_destroy_counters(mp) do { } while (0)
#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
@@ -126,34 +123,12 @@ static const struct {
};
/*
- * Return a pointer to an initialized xfs_mount structure.
- */
-xfs_mount_t *
-xfs_mount_init(void)
-{
- xfs_mount_t *mp;
-
- mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
-
- if (xfs_icsb_init_counters(mp)) {
- mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
- }
-
- spin_lock_init(&mp->m_sb_lock);
- mutex_init(&mp->m_ilock);
- mutex_init(&mp->m_growlock);
- atomic_set(&mp->m_active_trans, 0);
-
- return mp;
-}
-
-/*
* Free up the resources associated with a mount structure. Assume that
* the structure was initially zeroed, so we can tell which fields got
* initialized.
*/
-void
-xfs_mount_free(
+STATIC void
+xfs_free_perag(
xfs_mount_t *mp)
{
if (mp->m_perag) {
@@ -161,28 +136,9 @@ xfs_mount_free(
for (agno = 0; agno < mp->m_maxagi; agno++)
if (mp->m_perag[agno].pagb_list)
- kmem_free(mp->m_perag[agno].pagb_list,
- sizeof(xfs_perag_busy_t) *
- XFS_PAGB_NUM_SLOTS);
- kmem_free(mp->m_perag,
- sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
+ kmem_free(mp->m_perag[agno].pagb_list);
+ kmem_free(mp->m_perag);
}
-
- spinlock_destroy(&mp->m_ail_lock);
- spinlock_destroy(&mp->m_sb_lock);
- mutex_destroy(&mp->m_ilock);
- mutex_destroy(&mp->m_growlock);
- if (mp->m_quotainfo)
- XFS_QM_DONE(mp);
-
- if (mp->m_fsname != NULL)
- kmem_free(mp->m_fsname, mp->m_fsname_len);
- if (mp->m_rtname != NULL)
- kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
- if (mp->m_logname != NULL)
- kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
-
- xfs_icsb_destroy_counters(mp);
}
/*
@@ -288,6 +244,19 @@ xfs_mount_validate_sb(
return XFS_ERROR(EFSCORRUPTED);
}
+ /*
+ * Until this is fixed only page-sized or smaller data blocks work.
+ */
+ if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
+ xfs_fs_mount_cmn_err(flags,
+ "file system with blocksize %d bytes",
+ sbp->sb_blocksize);
+ xfs_fs_mount_cmn_err(flags,
+ "only pagesize (%ld) or less will currently work.",
+ PAGE_SIZE);
+ return XFS_ERROR(ENOSYS);
+ }
+
if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_fs_mount_cmn_err(flags,
@@ -309,19 +278,6 @@ xfs_mount_validate_sb(
return XFS_ERROR(ENOSYS);
}
- /*
- * Until this is fixed only page-sized or smaller data blocks work.
- */
- if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
- xfs_fs_mount_cmn_err(flags,
- "file system with blocksize %d bytes",
- sbp->sb_blocksize);
- xfs_fs_mount_cmn_err(flags,
- "only pagesize (%ld) or less will currently work.",
- PAGE_SIZE);
- return XFS_ERROR(ENOSYS);
- }
-
return 0;
}
@@ -734,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
* Update alignment values based on mount options and sb values
*/
STATIC int
-xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
+xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags)
{
xfs_sb_t *sbp = &(mp->m_sb);
- if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
+ if (mp->m_dalign) {
/*
* If stripe unit and stripe width are not multiples
* of the fs blocksize turn off alignment.
@@ -894,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp)
* Check that the data (and log if separate) are an ok size.
*/
STATIC int
-xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
+xfs_check_sizes(xfs_mount_t *mp)
{
xfs_buf_t *bp;
xfs_daddr_t d;
@@ -917,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
return error;
}
- if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
- mp->m_logdev_targp != mp->m_ddev_targp) {
+ if (mp->m_logdev_targp != mp->m_ddev_targp) {
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
cmn_err(CE_WARN, "XFS: size check 3 failed");
@@ -953,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
*/
int
xfs_mountfs(
- xfs_mount_t *mp,
- int mfsi_flags)
+ xfs_mount_t *mp)
{
xfs_sb_t *sbp = &(mp->m_sb);
xfs_inode_t *rip;
__uint64_t resblks;
__int64_t update_flags = 0LL;
uint quotamount, quotaflags;
- int agno;
int uuid_mounted = 0;
int error = 0;
@@ -994,9 +947,19 @@ xfs_mountfs(
* Re-check for ATTR2 in case it was found in bad_features2
* slot.
*/
- if (xfs_sb_version_hasattr2(&mp->m_sb))
+ if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+ !(mp->m_flags & XFS_MOUNT_NOATTR2))
mp->m_flags |= XFS_MOUNT_ATTR2;
+ }
+
+ if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+ (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+ xfs_sb_version_removeattr2(&mp->m_sb);
+ update_flags |= XFS_SB_FEATURES2;
+ /* update sb_versionnum for the clearing of the morebits */
+ if (!sbp->sb_features2)
+ update_flags |= XFS_SB_VERSIONNUM;
}
/*
@@ -1005,7 +968,7 @@ xfs_mountfs(
* allocator alignment is within an ag, therefore ag has
* to be aligned at stripe boundary.
*/
- error = xfs_update_alignment(mp, mfsi_flags, &update_flags);
+ error = xfs_update_alignment(mp, &update_flags);
if (error)
goto error1;
@@ -1024,8 +987,7 @@ xfs_mountfs(
* since a single partition filesystem is identical to a single
* partition volume/filesystem.
*/
- if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
- (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+ if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
if (xfs_uuid_mount(mp)) {
error = XFS_ERROR(EINVAL);
goto error1;
@@ -1053,7 +1015,7 @@ xfs_mountfs(
/*
* Check that the data (and log if separate) are an ok size.
*/
- error = xfs_check_sizes(mp, mfsi_flags);
+ error = xfs_check_sizes(mp);
if (error)
goto error1;
@@ -1067,13 +1029,6 @@ xfs_mountfs(
}
/*
- * For client case we are done now
- */
- if (mfsi_flags & XFS_MFSI_CLIENT) {
- return 0;
- }
-
- /*
* Copies the low order bits of the timestamp and the randomly
* set "sequence" number out of a UUID.
*/
@@ -1097,8 +1052,10 @@ xfs_mountfs(
* Allocate and initialize the per-ag data.
*/
init_rwsem(&mp->m_peraglock);
- mp->m_perag =
- kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
+ mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
+ KM_MAYFAIL);
+ if (!mp->m_perag)
+ goto error1;
mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
@@ -1210,7 +1167,7 @@ xfs_mountfs(
* delayed until after the root and real-time bitmap inodes
* were consistently read in.
*/
- error = xfs_log_mount_finish(mp, mfsi_flags);
+ error = xfs_log_mount_finish(mp);
if (error) {
cmn_err(CE_WARN, "XFS: log mount finish failed");
goto error4;
@@ -1219,7 +1176,7 @@ xfs_mountfs(
/*
* Complete the quota initialisation, post-log-replay component.
*/
- error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags);
+ error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
if (error)
goto error4;
@@ -1253,31 +1210,25 @@ xfs_mountfs(
error3:
xfs_log_unmount_dealloc(mp);
error2:
- for (agno = 0; agno < sbp->sb_agcount; agno++)
- if (mp->m_perag[agno].pagb_list)
- kmem_free(mp->m_perag[agno].pagb_list,
- sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
- kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
- mp->m_perag = NULL;
- /* FALLTHROUGH */
+ xfs_free_perag(mp);
error1:
if (uuid_mounted)
- xfs_uuid_unmount(mp);
- xfs_freesb(mp);
+ uuid_table_remove(&mp->m_sb.sb_uuid);
return error;
}
/*
- * xfs_unmountfs
- *
* This flushes out the inodes,dquots and the superblock, unmounts the
* log and makes sure that incore structures are freed.
*/
-int
-xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
+void
+xfs_unmountfs(
+ struct xfs_mount *mp)
{
- __uint64_t resblks;
- int error = 0;
+ __uint64_t resblks;
+ int error;
+
+ IRELE(mp->m_rootip);
/*
* We can potentially deadlock here if we have an inode cluster
@@ -1334,32 +1285,20 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
xfs_unmountfs_wait(mp); /* wait for async bufs */
xfs_log_unmount(mp); /* Done! No more fs ops. */
- xfs_freesb(mp);
-
/*
* All inodes from this mount point should be freed.
*/
ASSERT(mp->m_inodes == NULL);
- xfs_unmountfs_close(mp, cr);
if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
- xfs_uuid_unmount(mp);
+ uuid_table_remove(&mp->m_sb.sb_uuid);
-#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
+#if defined(DEBUG)
xfs_errortag_clearall(mp, 0);
#endif
- xfs_mount_free(mp);
- return 0;
-}
-
-void
-xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
-{
- if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_free_buftarg(mp->m_logdev_targp, 1);
- if (mp->m_rtdev_targp)
- xfs_free_buftarg(mp->m_rtdev_targp, 1);
- xfs_free_buftarg(mp->m_ddev_targp, 0);
+ xfs_free_perag(mp);
+ if (mp->m_quotainfo)
+ XFS_QM_DONE(mp);
}
STATIC void
@@ -1905,16 +1844,6 @@ xfs_uuid_mount(
}
/*
- * Remove filesystem from the UUID table.
- */
-STATIC void
-xfs_uuid_unmount(
- xfs_mount_t *mp)
-{
- uuid_table_remove(&mp->m_sb.sb_uuid);
-}
-
-/*
* Used to log changes to the superblock unit and width fields which could
* be altered by the mount options, as well as any potential sb_features2
* fixup. Only the first superblock is updated.
@@ -1928,7 +1857,8 @@ xfs_mount_log_sb(
int error;
ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
- XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
+ XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
+ XFS_SB_VERSIONNUM));
tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
@@ -2109,7 +2039,7 @@ xfs_icsb_reinit_counters(
xfs_icsb_unlock(mp);
}
-STATIC void
+void
xfs_icsb_destroy_counters(
xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358a..f3c1024b1241 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
struct xfs_extdelta;
struct xfs_swapext;
struct xfs_mru_cache;
+struct xfs_nameops;
/*
* Prototypes and functions for the Data Migration subsystem.
@@ -113,7 +114,7 @@ struct xfs_dqtrxops;
struct xfs_quotainfo;
typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int);
+typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
typedef int (*xfs_qmunmount_t)(struct xfs_mount *);
typedef void (*xfs_qmdone_t)(struct xfs_mount *);
typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
@@ -157,8 +158,8 @@ typedef struct xfs_qmops {
#define XFS_QM_INIT(mp, mnt, fl) \
(*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \
- (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags)
+#define XFS_QM_MOUNT(mp, mnt, fl) \
+ (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
#define XFS_QM_UNMOUNT(mp) \
(*(mp)->m_qm_ops->xfs_qmunmount)(mp)
#define XFS_QM_DONE(mp) \
@@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts {
extern int xfs_icsb_init_counters(struct xfs_mount *);
extern void xfs_icsb_reinit_counters(struct xfs_mount *);
+extern void xfs_icsb_destroy_counters(struct xfs_mount *);
extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
#else
-#define xfs_icsb_init_counters(mp) (0)
-#define xfs_icsb_reinit_counters(mp) do { } while (0)
+#define xfs_icsb_init_counters(mp) (0)
+#define xfs_icsb_destroy_counters(mp) do { } while (0)
+#define xfs_icsb_reinit_counters(mp) do { } while (0)
#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
#endif
@@ -313,6 +316,7 @@ typedef struct xfs_mount {
__uint8_t m_inode_quiesce;/* call quiesce on new inodes.
field governed by m_ilock */
__uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
+ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
int m_dirblksize; /* directory block sz--bytes */
int m_dirblkfsbs; /* directory block sz--fsbs */
xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
@@ -378,6 +382,7 @@ typedef struct xfs_mount {
counters */
#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
allocator */
+#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
/*
@@ -437,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
/*
* Flags for xfs_mountfs
*/
-#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */
-#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */
-/* XFS_MFSI_RRINODES */
-#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */
- /* log recovery */
-#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */
-/* XFS_MFSI_CONVERT_SUNIT */
#define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */
#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d)
@@ -510,15 +508,12 @@ typedef struct xfs_mod_sb {
#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
-extern xfs_mount_t *xfs_mount_init(void);
extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
extern int xfs_log_sbcount(xfs_mount_t *, uint);
-extern void xfs_mount_free(xfs_mount_t *mp);
-extern int xfs_mountfs(xfs_mount_t *mp, int);
+extern int xfs_mountfs(xfs_mount_t *mp);
extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
-extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
-extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
+extern void xfs_unmountfs(xfs_mount_t *);
extern int xfs_unmountfs_writesb(xfs_mount_t *);
extern int xfs_unmount_flush(xfs_mount_t *, int);
extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -544,9 +539,6 @@ extern void xfs_qmops_put(struct xfs_mount *);
extern struct xfs_dmops xfs_dmcore_xfs;
-extern int xfs_init(void);
-extern void xfs_cleanup(void);
-
#endif /* __KERNEL__ */
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589a..afee7eb24323 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
"xfs_mru_cache_elem");
if (!xfs_mru_elem_zone)
- return ENOMEM;
+ goto out;
xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
- if (!xfs_mru_reap_wq) {
- kmem_zone_destroy(xfs_mru_elem_zone);
- return ENOMEM;
- }
+ if (!xfs_mru_reap_wq)
+ goto out_destroy_mru_elem_zone;
return 0;
+
+ out_destroy_mru_elem_zone:
+ kmem_zone_destroy(xfs_mru_elem_zone);
+ out:
+ return -ENOMEM;
}
void
@@ -382,9 +385,9 @@ xfs_mru_cache_create(
exit:
if (err && mru && mru->lists)
- kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+ kmem_free(mru->lists);
if (err && mru)
- kmem_free(mru, sizeof(*mru));
+ kmem_free(mru);
return err;
}
@@ -424,8 +427,8 @@ xfs_mru_cache_destroy(
xfs_mru_cache_flush(mru);
- kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
- kmem_free(mru, sizeof(*mru));
+ kmem_free(mru->lists);
+ kmem_free(mru);
}
/*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad298..d700dacdb10e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,22 +336,18 @@ xfs_rename(
ASSERT(error != EEXIST);
if (error)
goto abort_return;
- xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- } else {
- /*
- * We always want to hit the ctime on the source inode.
- * We do it in the if clause above for the 'new_parent &&
- * src_is_directory' case, and here we get all the other
- * cases. This isn't strictly required by the standards
- * since the source inode isn't really being changed,
- * but old unix file systems did it and some incremental
- * backup programs won't work without it.
- */
- xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
}
/*
+ * We always want to hit the ctime on the source inode.
+ *
+ * This isn't strictly required by the standards since the source
+ * inode isn't really being changed, but old unix file systems did
+ * it and some incremental backup programs won't work without it.
+ */
+ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+
+ /*
* Adjust the link count on src_dp. This is necessary when
* renaming a directory, either within one parent when
* the target existed, or across two parent directories.
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b9..e2f68de16159 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
*/
/*
- * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
- */
-STATIC int
-xfs_lowbit32(
- __uint32_t v)
-{
- if (v)
- return ffs(v) - 1;
- return -1;
-}
-
-/*
* Allocate space to the bitmap or summary file, and zero it, for growfs.
*/
STATIC int /* error */
@@ -450,6 +438,7 @@ xfs_rtallocate_extent_near(
}
bbno = XFS_BITTOBLOCK(mp, bno);
i = 0;
+ ASSERT(minlen != 0);
log2len = xfs_highbit32(minlen);
/*
* Loop over all bitmap blocks (bbno + i is current block).
@@ -618,6 +607,8 @@ xfs_rtallocate_extent_size(
xfs_suminfo_t sum; /* summary information for extents */
ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+ ASSERT(maxlen != 0);
+
/*
* Loop over all the levels starting with maxlen.
* At each level, look at all the bitmap blocks, to see if there
@@ -675,6 +666,9 @@ xfs_rtallocate_extent_size(
*rtblock = NULLRTBLOCK;
return 0;
}
+ ASSERT(minlen != 0);
+ ASSERT(maxlen != 0);
+
/*
* Loop over sizes, from maxlen down to minlen.
* This time, when we do the allocations, allow smaller ones
@@ -1961,6 +1955,7 @@ xfs_growfs_rt(
nsbp->sb_blocksize * nsbp->sb_rextsize);
nsbp->sb_rextents = nsbp->sb_rblocks;
do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
+ ASSERT(nsbp->sb_rextents != 0);
nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
nrsumsize =
@@ -2062,7 +2057,7 @@ xfs_growfs_rt(
/*
* Free the fake mp structure.
*/
- kmem_free(nmp, sizeof(*nmp));
+ kmem_free(nmp);
return error;
}
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index b0f31c09a76d..3a82576dde9a 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -314,7 +314,7 @@ xfs_bioerror_relse(
* ASYNC buffers.
*/
XFS_BUF_ERROR(bp, EIO);
- XFS_BUF_V_IODONESEMA(bp);
+ XFS_BUF_FINISH_IOWAIT(bp);
} else {
xfs_buf_relse(bp);
}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f871..3f8cf1587f4c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
#define XFS_SB_VERSION_SECTORBIT 0x0800
#define XFS_SB_VERSION_EXTFLGBIT 0x1000
#define XFS_SB_VERSION_DIRV2BIT 0x2000
+#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
#define XFS_SB_VERSION_MOREBITSBIT 0x8000
#define XFS_SB_VERSION_OKSASHFBITS \
(XFS_SB_VERSION_EXTFLGBIT | \
- XFS_SB_VERSION_DIRV2BIT)
+ XFS_SB_VERSION_DIRV2BIT | \
+ XFS_SB_VERSION_BORGBIT)
#define XFS_SB_VERSION_OKREALFBITS \
(XFS_SB_VERSION_ATTRBIT | \
XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
}
+static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+ (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
+}
+
static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
}
+static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
+{
+ sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
+ if (!sbp->sb_features2)
+ sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
+}
+
/*
* end of superblock version macros
*/
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa3..4e1c22a23be5 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -43,6 +43,7 @@
#include "xfs_quota.h"
#include "xfs_trans_priv.h"
#include "xfs_trans_space.h"
+#include "xfs_inode_item.h"
STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *);
@@ -253,7 +254,7 @@ _xfs_trans_alloc(
tp->t_mountp = mp;
tp->t_items_free = XFS_LIC_NUM_SLOTS;
tp->t_busy_free = XFS_LBC_NUM_SLOTS;
- XFS_LIC_INIT(&(tp->t_items));
+ xfs_lic_init(&(tp->t_items));
XFS_LBC_INIT(&(tp->t_busy));
return tp;
}
@@ -282,7 +283,7 @@ xfs_trans_dup(
ntp->t_mountp = tp->t_mountp;
ntp->t_items_free = XFS_LIC_NUM_SLOTS;
ntp->t_busy_free = XFS_LBC_NUM_SLOTS;
- XFS_LIC_INIT(&(ntp->t_items));
+ xfs_lic_init(&(ntp->t_items));
XFS_LBC_INIT(&(ntp->t_busy));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -889,7 +890,7 @@ shut_us_down:
tp->t_commit_lsn = commit_lsn;
if (nvec > XFS_TRANS_LOGVEC_COUNT) {
- kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
+ kmem_free(log_vector);
}
/*
@@ -1169,7 +1170,7 @@ xfs_trans_cancel(
while (licp != NULL) {
lidp = licp->lic_descs;
for (i = 0; i < licp->lic_unused; i++, lidp++) {
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
@@ -1216,6 +1217,68 @@ xfs_trans_free(
kmem_zone_free(xfs_trans_zone, tp);
}
+/*
+ * Roll from one trans in the sequence of PERMANENT transactions to
+ * the next: permanent transactions are only flushed out when
+ * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon
+ * as possible to let chunks of it go to the log. So we commit the
+ * chunk we've been working on and get a new transaction to continue.
+ */
+int
+xfs_trans_roll(
+ struct xfs_trans **tpp,
+ struct xfs_inode *dp)
+{
+ struct xfs_trans *trans;
+ unsigned int logres, count;
+ int error;
+
+ /*
+ * Ensure that the inode is always logged.
+ */
+ trans = *tpp;
+ xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
+
+ /*
+ * Copy the critical parameters from one trans to the next.
+ */
+ logres = trans->t_log_res;
+ count = trans->t_log_count;
+ *tpp = xfs_trans_dup(trans);
+
+ /*
+ * Commit the current transaction.
+ * If this commit failed, then it'd just unlock those items that
+ * are not marked ihold. That also means that a filesystem shutdown
+ * is in progress. The caller takes the responsibility to cancel
+ * the duplicate transaction that gets returned.
+ */
+ error = xfs_trans_commit(trans, 0);
+ if (error)
+ return (error);
+
+ trans = *tpp;
+
+ /*
+ * Reserve space in the log for th next transaction.
+ * This also pushes items in the "AIL", the list of logged items,
+ * out to disk if they are taking up space at the tail of the log
+ * that we want to use. This requires that either nothing be locked
+ * across this call, or that anything that is locked be logged in
+ * the prior and the next transactions.
+ */
+ error = xfs_trans_reserve(trans, 0, logres, 0,
+ XFS_TRANS_PERM_LOG_RES, count);
+ /*
+ * Ensure that the inode is in the new transaction and locked.
+ */
+ if (error)
+ return error;
+
+ xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(trans, dp);
+ return 0;
+}
/*
* THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
@@ -1253,7 +1316,7 @@ xfs_trans_committed(
* Special case the chunk embedded in the transaction.
*/
licp = &(tp->t_items);
- if (!(XFS_LIC_ARE_ALL_FREE(licp))) {
+ if (!(xfs_lic_are_all_free(licp))) {
xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
}
@@ -1262,10 +1325,10 @@ xfs_trans_committed(
*/
licp = licp->lic_next;
while (licp != NULL) {
- ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+ ASSERT(!xfs_lic_are_all_free(licp));
xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
next_licp = licp->lic_next;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
licp = next_licp;
}
@@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed(
lidp = licp->lic_descs;
for (i = 0; i < licp->lic_unused; i++, lidp++) {
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 0804207c7391..74c80bd2b0ec 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk {
* lic_unused to the right value (0 matches all free). The
* lic_descs.lid_index values are set up as each desc is allocated.
*/
-#define XFS_LIC_INIT(cp) xfs_lic_init(cp)
static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
{
cp->lic_free = XFS_LIC_FREEMASK;
}
-#define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot)
static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
{
cp->lic_descs[slot].lid_index = (unsigned char)(slot);
}
-#define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp)
static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
{
return cp->lic_free & XFS_LIC_FREEMASK;
}
-#define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp)
static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
{
cp->lic_free = XFS_LIC_FREEMASK;
}
-#define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp)
static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
{
return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
}
-#define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot)
static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
{
return (cp->lic_free & (1 << slot));
}
-#define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot)
static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
{
cp->lic_free &= ~(1 << slot);
}
-#define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot)
static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
{
cp->lic_free |= 1 << slot;
}
-#define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot)
static inline xfs_log_item_desc_t *
xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
{
return &(cp->lic_descs[slot]);
}
-#define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp)
static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
{
return (uint)dp->lid_index;
@@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
* All of this yields the address of the chunk, which is
* cast to a chunk pointer.
*/
-#define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp)
static inline xfs_log_item_chunk_t *
xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
{
@@ -986,6 +975,7 @@ int _xfs_trans_commit(xfs_trans_t *,
int *);
#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
void xfs_trans_cancel(xfs_trans_t *, int);
+int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index cb0c5839154b..4e855b5ced66 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match(
bp = NULL;
len = BBTOB(len);
licp = &tp->t_items;
- if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (!xfs_lic_are_all_free(licp)) {
for (i = 0; i < licp->lic_unused; i++) {
/*
* Skip unoccupied slots.
*/
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
- lidp = XFS_LIC_SLOT(licp, i);
+ lidp = xfs_lic_slot(licp, i);
blip = (xfs_buf_log_item_t *)lidp->lid_item;
if (blip->bli_item.li_type != XFS_LI_BUF) {
continue;
@@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all(
bp = NULL;
len = BBTOB(len);
for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
- if (XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (xfs_lic_are_all_free(licp)) {
ASSERT(licp == &tp->t_items);
ASSERT(licp->lic_next == NULL);
return NULL;
@@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all(
/*
* Skip unoccupied slots.
*/
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
- lidp = XFS_LIC_SLOT(licp, i);
+ lidp = xfs_lic_slot(licp, i);
blip = (xfs_buf_log_item_t *)lidp->lid_item;
if (blip->bli_item.li_type != XFS_LI_BUF) {
continue;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e9985..2a1c0f071f91 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
iip = ip->i_itemp;
if (iip->ili_root_size != 0) {
ASSERT(iip->ili_orig_root != NULL);
- kmem_free(iip->ili_orig_root, iip->ili_root_size);
+ kmem_free(iip->ili_orig_root);
iip->ili_root_size = 0;
iip->ili_orig_root = NULL;
}
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894b..3c666e8317f8 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
* Initialize the chunk, and then
* claim the first slot in the newly allocated chunk.
*/
- XFS_LIC_INIT(licp);
- XFS_LIC_CLAIM(licp, 0);
+ xfs_lic_init(licp);
+ xfs_lic_claim(licp, 0);
licp->lic_unused = 1;
- XFS_LIC_INIT_SLOT(licp, 0);
- lidp = XFS_LIC_SLOT(licp, 0);
+ xfs_lic_init_slot(licp, 0);
+ lidp = xfs_lic_slot(licp, 0);
/*
* Link in the new chunk and update the free count.
@@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
*/
licp = &tp->t_items;
while (licp != NULL) {
- if (XFS_LIC_VACANCY(licp)) {
+ if (xfs_lic_vacancy(licp)) {
if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
i = licp->lic_unused;
- ASSERT(XFS_LIC_ISFREE(licp, i));
+ ASSERT(xfs_lic_isfree(licp, i));
break;
}
for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
- if (XFS_LIC_ISFREE(licp, i))
+ if (xfs_lic_isfree(licp, i))
break;
}
ASSERT(i <= XFS_LIC_MAX_SLOT);
@@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
* If we find a free descriptor, claim it,
* initialize it, and return it.
*/
- XFS_LIC_CLAIM(licp, i);
+ xfs_lic_claim(licp, i);
if (licp->lic_unused <= i) {
licp->lic_unused = i + 1;
- XFS_LIC_INIT_SLOT(licp, i);
+ xfs_lic_init_slot(licp, i);
}
- lidp = XFS_LIC_SLOT(licp, i);
+ lidp = xfs_lic_slot(licp, i);
tp->t_items_free--;
lidp->lid_item = lip;
lidp->lid_flags = 0;
@@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
xfs_log_item_chunk_t *licp;
xfs_log_item_chunk_t **licpp;
- slot = XFS_LIC_DESC_TO_SLOT(lidp);
- licp = XFS_LIC_DESC_TO_CHUNK(lidp);
- XFS_LIC_RELSE(licp, slot);
+ slot = xfs_lic_desc_to_slot(lidp);
+ licp = xfs_lic_desc_to_chunk(lidp);
+ xfs_lic_relse(licp, slot);
lidp->lid_item->li_desc = NULL;
tp->t_items_free++;
@@ -154,14 +154,14 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
* Also decrement the transaction structure's count of free items
* by the number in a chunk since we are freeing an empty chunk.
*/
- if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) {
+ if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) {
licpp = &(tp->t_items.lic_next);
while (*licpp != licp) {
ASSERT(*licpp != NULL);
licpp = &((*licpp)->lic_next);
}
*licpp = licp->lic_next;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
tp->t_items_free -= XFS_LIC_NUM_SLOTS;
}
}
@@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp)
/*
* If it's not in the first chunk, skip to the second.
*/
- if (XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (xfs_lic_are_all_free(licp)) {
licp = licp->lic_next;
}
/*
* Return the first non-free descriptor in the chunk.
*/
- ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+ ASSERT(!xfs_lic_are_all_free(licp));
for (i = 0; i < licp->lic_unused; i++) {
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
- return XFS_LIC_SLOT(licp, i);
+ return xfs_lic_slot(licp, i);
}
cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
return NULL;
@@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
xfs_log_item_chunk_t *licp;
int i;
- licp = XFS_LIC_DESC_TO_CHUNK(lidp);
+ licp = xfs_lic_desc_to_chunk(lidp);
/*
* First search the rest of the chunk. The for loop keeps us
* from referencing things beyond the end of the chunk.
*/
- for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) {
- if (XFS_LIC_ISFREE(licp, i)) {
+ for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
- return XFS_LIC_SLOT(licp, i);
+ return xfs_lic_slot(licp, i);
}
/*
@@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
}
licp = licp->lic_next;
- ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+ ASSERT(!xfs_lic_are_all_free(licp));
for (i = 0; i < licp->lic_unused; i++) {
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
- return XFS_LIC_SLOT(licp, i);
+ return xfs_lic_slot(licp, i);
}
ASSERT(0);
/* NOTREACHED */
@@ -300,9 +300,9 @@ xfs_trans_free_items(
/*
* Special case the embedded chunk so we don't free it below.
*/
- if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (!xfs_lic_are_all_free(licp)) {
(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
- XFS_LIC_ALL_FREE(licp);
+ xfs_lic_all_free(licp);
licp->lic_unused = 0;
}
licp = licp->lic_next;
@@ -311,10 +311,10 @@ xfs_trans_free_items(
* Unlock each item in each chunk and free the chunks.
*/
while (licp != NULL) {
- ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+ ASSERT(!xfs_lic_are_all_free(licp));
(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
next_licp = licp->lic_next;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
licp = next_licp;
}
@@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
/*
* Special case the embedded chunk so we don't free.
*/
- if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (!xfs_lic_are_all_free(licp)) {
freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
}
licpp = &(tp->t_items.lic_next);
@@ -358,12 +358,12 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
* and free empty chunks.
*/
while (licp != NULL) {
- ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+ ASSERT(!xfs_lic_are_all_free(licp));
freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn);
next_licp = licp->lic_next;
- if (XFS_LIC_ARE_ALL_FREE(licp)) {
+ if (xfs_lic_are_all_free(licp)) {
*licpp = next_licp;
- kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+ kmem_free(licp);
freed -= XFS_LIC_NUM_SLOTS;
} else {
licpp = &(licp->lic_next);
@@ -402,7 +402,7 @@ xfs_trans_unlock_chunk(
freed = 0;
lidp = licp->lic_descs;
for (i = 0; i < licp->lic_unused; i++, lidp++) {
- if (XFS_LIC_ISFREE(licp, i)) {
+ if (xfs_lic_isfree(licp, i)) {
continue;
}
lip = lidp->lid_item;
@@ -421,7 +421,7 @@ xfs_trans_unlock_chunk(
*/
if (!(freeing_chunk) &&
(!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
- XFS_LIC_RELSE(licp, i);
+ xfs_lic_relse(licp, i);
freed++;
}
}
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
lbcp = tp->t_busy.lbc_next;
while (lbcp != NULL) {
lbcq = lbcp->lbc_next;
- kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t));
+ kmem_free(lbcp);
lbcp = lbcq;
}
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 98e5f110ba5f..35d4d414bcc2 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -237,7 +237,7 @@ xfs_droplink(
ASSERT (ip->i_d.di_nlink > 0);
ip->i_d.di_nlink--;
- drop_nlink(ip->i_vnode);
+ drop_nlink(VFS_I(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = 0;
@@ -301,7 +301,7 @@ xfs_bumplink(
ASSERT(ip->i_d.di_nlink > 0);
ip->i_d.di_nlink++;
- inc_nlink(ip->i_vnode);
+ inc_nlink(VFS_I(ip));
if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) &&
(ip->i_d.di_nlink > XFS_MAXLINK_1)) {
/*
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f316cb85d8e2..ef321225d269 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,9 +18,6 @@
#ifndef __XFS_UTILS_H__
#define __XFS_UTILS_H__
-#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
-#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
-
extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e5..439dd3939dda 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,586 +58,6 @@
#include "xfs_utils.h"
-int __init
-xfs_init(void)
-{
-#ifdef XFS_DABUF_DEBUG
- extern spinlock_t xfs_dabuf_global_lock;
- spin_lock_init(&xfs_dabuf_global_lock);
-#endif
-
- /*
- * Initialize all of the zone allocators we use.
- */
- xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
- "xfs_log_ticket");
- xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
- "xfs_bmap_free_item");
- xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
- "xfs_btree_cur");
- xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
- "xfs_da_state");
- xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
- xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
- xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
- xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
- xfs_mru_cache_init();
- xfs_filestream_init();
-
- /*
- * The size of the zone allocated buf log item is the maximum
- * size possible under XFS. This wastes a little bit of memory,
- * but it is much faster.
- */
- xfs_buf_item_zone =
- kmem_zone_init((sizeof(xfs_buf_log_item_t) +
- (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
- NBWORD) * sizeof(int))),
- "xfs_buf_item");
- xfs_efd_zone =
- kmem_zone_init((sizeof(xfs_efd_log_item_t) +
- ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))),
- "xfs_efd_item");
- xfs_efi_zone =
- kmem_zone_init((sizeof(xfs_efi_log_item_t) +
- ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))),
- "xfs_efi_item");
-
- /*
- * These zones warrant special memory allocator hints
- */
- xfs_inode_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
- KM_ZONE_SPREAD, NULL);
- xfs_ili_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
- KM_ZONE_SPREAD, NULL);
-
- /*
- * Allocate global trace buffers.
- */
-#ifdef XFS_ALLOC_TRACE
- xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMAP_TRACE
- xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMBT_TRACE
- xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_ATTR_TRACE
- xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_DIR2_TRACE
- xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
-#endif
-
- xfs_dir_startup();
-
-#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
- xfs_error_test_init();
-#endif /* DEBUG || INDUCE_IO_ERROR */
-
- xfs_init_procfs();
- xfs_sysctl_register();
- return 0;
-}
-
-void __exit
-xfs_cleanup(void)
-{
- extern kmem_zone_t *xfs_inode_zone;
- extern kmem_zone_t *xfs_efd_zone;
- extern kmem_zone_t *xfs_efi_zone;
-
- xfs_cleanup_procfs();
- xfs_sysctl_unregister();
- xfs_filestream_uninit();
- xfs_mru_cache_uninit();
- xfs_acl_zone_destroy(xfs_acl_zone);
-
-#ifdef XFS_DIR2_TRACE
- ktrace_free(xfs_dir2_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
- ktrace_free(xfs_attr_trace_buf);
-#endif
-#ifdef XFS_BMBT_TRACE
- ktrace_free(xfs_bmbt_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
- ktrace_free(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_ALLOC_TRACE
- ktrace_free(xfs_alloc_trace_buf);
-#endif
-
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- kmem_zone_destroy(xfs_btree_cur_zone);
- kmem_zone_destroy(xfs_inode_zone);
- kmem_zone_destroy(xfs_trans_zone);
- kmem_zone_destroy(xfs_da_state_zone);
- kmem_zone_destroy(xfs_dabuf_zone);
- kmem_zone_destroy(xfs_buf_item_zone);
- kmem_zone_destroy(xfs_efd_zone);
- kmem_zone_destroy(xfs_efi_zone);
- kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_log_ticket_zone);
-}
-
-/*
- * xfs_start_flags
- *
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
- struct xfs_mount_args *ap,
- struct xfs_mount *mp)
-{
- /* Values are in BBs */
- if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- mp->m_dalign = ap->sunit;
- mp->m_swidth = ap->swidth;
- }
-
- if (ap->logbufs != -1 &&
- ap->logbufs != 0 &&
- (ap->logbufs < XLOG_MIN_ICLOGS ||
- ap->logbufs > XLOG_MAX_ICLOGS)) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufs value: %d [not %d-%d]",
- ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
- }
- mp->m_logbufs = ap->logbufs;
- if (ap->logbufsize != -1 &&
- ap->logbufsize != 0 &&
- (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
- ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(ap->logbufsize))) {
- cmn_err(CE_WARN,
- "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- ap->logbufsize);
- return XFS_ERROR(EINVAL);
- }
- mp->m_logbsize = ap->logbufsize;
- mp->m_fsname_len = strlen(ap->fsname) + 1;
- mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
- strcpy(mp->m_fsname, ap->fsname);
- if (ap->rtname[0]) {
- mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
- strcpy(mp->m_rtname, ap->rtname);
- }
- if (ap->logname[0]) {
- mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
- strcpy(mp->m_logname, ap->logname);
- }
-
- if (ap->flags & XFSMNT_WSYNC)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
- if (ap->flags & XFSMNT_INO64) {
- mp->m_flags |= XFS_MOUNT_INO64;
- mp->m_inoadd = XFS_INO64_OFFSET;
- }
-#endif
- if (ap->flags & XFSMNT_RETERR)
- mp->m_flags |= XFS_MOUNT_RETERR;
- if (ap->flags & XFSMNT_NOALIGN)
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- if (ap->flags & XFSMNT_SWALLOC)
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- if (ap->flags & XFSMNT_OSYNCISOSYNC)
- mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
- if (ap->flags & XFSMNT_32BITINODES)
- mp->m_flags |= XFS_MOUNT_32BITINODES;
-
- if (ap->flags & XFSMNT_IOSIZE) {
- if (ap->iosizelog > XFS_MAX_IO_LOG ||
- ap->iosizelog < XFS_MIN_IO_LOG) {
- cmn_err(CE_WARN,
- "XFS: invalid log iosize: %d [not %d-%d]",
- ap->iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
- }
-
- if (ap->flags & XFSMNT_IKEEP)
- mp->m_flags |= XFS_MOUNT_IKEEP;
- if (ap->flags & XFSMNT_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (ap->flags & XFSMNT_ATTR2)
- mp->m_flags |= XFS_MOUNT_ATTR2;
-
- if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
- /*
- * no recovery flag requires a read-only mount
- */
- if (ap->flags & XFSMNT_NORECOVERY) {
- if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
- cmn_err(CE_WARN,
- "XFS: tried to mount a FS read-write without recovery!");
- return XFS_ERROR(EINVAL);
- }
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- }
-
- if (ap->flags & XFSMNT_NOUUID)
- mp->m_flags |= XFS_MOUNT_NOUUID;
- if (ap->flags & XFSMNT_BARRIER)
- mp->m_flags |= XFS_MOUNT_BARRIER;
- else
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
-
- if (ap->flags2 & XFSMNT2_FILESTREAMS)
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-
- if (ap->flags & XFSMNT_DMAPI)
- mp->m_flags |= XFS_MOUNT_DMAPI;
- return 0;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
- struct xfs_mount_args *ap,
- struct xfs_mount *mp)
-{
- int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
- /* Fail a mount where the logbuf is smaller then the log stripe */
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- if ((ap->logbufsize <= 0) &&
- (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
- mp->m_logbsize = mp->m_sb.sb_logsunit;
- } else if (ap->logbufsize > 0 &&
- ap->logbufsize < mp->m_sb.sb_logsunit) {
- cmn_err(CE_WARN,
- "XFS: logbuf size must be greater than or equal to log stripe size");
- return XFS_ERROR(EINVAL);
- }
- } else {
- /* Fail a mount if the logbuf is larger than 32K */
- if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
- cmn_err(CE_WARN,
- "XFS: logbuf size for version 1 logs must be 16K or 32K");
- return XFS_ERROR(EINVAL);
- }
- }
-
- if (xfs_sb_version_hasattr2(&mp->m_sb))
- mp->m_flags |= XFS_MOUNT_ATTR2;
-
- /*
- * prohibit r/w mounts of read-only filesystems
- */
- if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
- cmn_err(CE_WARN,
- "XFS: cannot mount a read-only filesystem as read-write");
- return XFS_ERROR(EROFS);
- }
-
- /*
- * check for shared mount.
- */
- if (ap->flags & XFSMNT_SHARED) {
- if (!xfs_sb_version_hasshared(&mp->m_sb))
- return XFS_ERROR(EINVAL);
-
- /*
- * For IRIX 6.5, shared mounts must have the shared
- * version bit set, have the persistent readonly
- * field set, must be version 0 and can only be mounted
- * read-only.
- */
- if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
- (mp->m_sb.sb_shared_vn != 0))
- return XFS_ERROR(EINVAL);
-
- mp->m_flags |= XFS_MOUNT_SHARED;
-
- /*
- * Shared XFS V0 can't deal with DMI. Return EINVAL.
- */
- if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
- return XFS_ERROR(EINVAL);
- }
-
- if (ap->flags & XFSMNT_UQUOTA) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_UQUOTAENF)
- mp->m_qflags |= XFS_UQUOTA_ENFD;
- }
-
- if (ap->flags & XFSMNT_GQUOTA) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_GQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- } else if (ap->flags & XFSMNT_PQUOTA) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- if (ap->flags & XFSMNT_PQUOTAENF)
- mp->m_qflags |= XFS_OQUOTA_ENFD;
- }
-
- return 0;
-}
-
-/*
- * xfs_mount
- *
- * The file system configurations are:
- * (1) device (partition) with data and internal log
- * (2) logical volume with data and log subvolumes.
- * (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present. The data subvolume has already been opened by
- * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
- */
-int
-xfs_mount(
- struct xfs_mount *mp,
- struct xfs_mount_args *args,
- cred_t *credp)
-{
- struct block_device *ddev, *logdev, *rtdev;
- int flags = 0, error;
-
- ddev = mp->m_super->s_bdev;
- logdev = rtdev = NULL;
-
- error = xfs_dmops_get(mp, args);
- if (error)
- return error;
- error = xfs_qmops_get(mp, args);
- if (error)
- return error;
-
- if (args->flags & XFSMNT_QUIET)
- flags |= XFS_MFSI_QUIET;
-
- /*
- * Open real time and log devices - order is important.
- */
- if (args->logname[0]) {
- error = xfs_blkdev_get(mp, args->logname, &logdev);
- if (error)
- return error;
- }
- if (args->rtname[0]) {
- error = xfs_blkdev_get(mp, args->rtname, &rtdev);
- if (error) {
- xfs_blkdev_put(logdev);
- return error;
- }
-
- if (rtdev == ddev || rtdev == logdev) {
- cmn_err(CE_WARN,
- "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- return EINVAL;
- }
- }
-
- /*
- * Setup xfs_mount buffer target pointers
- */
- error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
- if (!mp->m_ddev_targp) {
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- return error;
- }
- if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
- if (!mp->m_rtdev_targp) {
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- goto error0;
- }
- }
- mp->m_logdev_targp = (logdev && logdev != ddev) ?
- xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
- if (!mp->m_logdev_targp) {
- xfs_blkdev_put(logdev);
- xfs_blkdev_put(rtdev);
- goto error0;
- }
-
- /*
- * Setup flags based on mount(2) options and then the superblock
- */
- error = xfs_start_flags(args, mp);
- if (error)
- goto error1;
- error = xfs_readsb(mp, flags);
- if (error)
- goto error1;
- error = xfs_finish_flags(args, mp);
- if (error)
- goto error2;
-
- /*
- * Setup xfs_mount buffer target pointers based on superblock
- */
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
- if (!error && logdev && logdev != ddev) {
- unsigned int log_sector_size = BBSIZE;
-
- if (xfs_sb_version_hassector(&mp->m_sb))
- log_sector_size = mp->m_sb.sb_logsectsize;
- error = xfs_setsize_buftarg(mp->m_logdev_targp,
- mp->m_sb.sb_blocksize,
- log_sector_size);
- }
- if (!error && rtdev)
- error = xfs_setsize_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
- if (error)
- goto error2;
-
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
-
- if ((error = xfs_filestream_mount(mp)))
- goto error2;
-
- error = xfs_mountfs(mp, flags);
- if (error)
- goto error2;
-
- XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
-
- return 0;
-
-error2:
- if (mp->m_sb_bp)
- xfs_freesb(mp);
-error1:
- xfs_binval(mp->m_ddev_targp);
- if (logdev && logdev != ddev)
- xfs_binval(mp->m_logdev_targp);
- if (rtdev)
- xfs_binval(mp->m_rtdev_targp);
-error0:
- xfs_unmountfs_close(mp, credp);
- xfs_qmops_put(mp);
- xfs_dmops_put(mp);
- return error;
-}
-
-int
-xfs_unmount(
- xfs_mount_t *mp,
- int flags,
- cred_t *credp)
-{
- xfs_inode_t *rip;
- bhv_vnode_t *rvp;
- int unmount_event_wanted = 0;
- int unmount_event_flags = 0;
- int xfs_unmountfs_needed = 0;
- int error;
-
- rip = mp->m_rootip;
- rvp = XFS_ITOV(rip);
-
-#ifdef HAVE_DMAPI
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- error = XFS_SEND_PREUNMOUNT(mp,
- rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
- NULL, NULL, 0, 0,
- (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
- 0:DM_FLAGS_UNWANTED);
- if (error)
- return XFS_ERROR(error);
- unmount_event_wanted = 1;
- unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
- 0 : DM_FLAGS_UNWANTED;
- }
-#endif
-
- /*
- * Blow away any referenced inode in the filestreams cache.
- * This can and will cause log traffic as inodes go inactive
- * here.
- */
- xfs_filestream_unmount(mp);
-
- XFS_bflush(mp->m_ddev_targp);
- error = xfs_unmount_flush(mp, 0);
- if (error)
- goto out;
-
- ASSERT(vn_count(rvp) == 1);
-
- /*
- * Drop the reference count
- */
- IRELE(rip);
-
- /*
- * If we're forcing a shutdown, typically because of a media error,
- * we want to make sure we invalidate dirty pages that belong to
- * referenced vnodes as well.
- */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
- ASSERT(error != EFSCORRUPTED);
- }
- xfs_unmountfs_needed = 1;
-
-out:
- /* Send DMAPI event, if required.
- * Then do xfs_unmountfs() if needed.
- * Then return error (or zero).
- */
- if (unmount_event_wanted) {
- /* Note: mp structure must still exist for
- * XFS_SEND_UNMOUNT() call.
- */
- XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
- DM_RIGHT_NULL, 0, error, unmount_event_flags);
- }
- if (xfs_unmountfs_needed) {
- /*
- * Call common unmount function to flush to disk
- * and free the super block buffer & mount structures.
- */
- xfs_unmountfs(mp, credp);
- xfs_qmops_put(mp);
- xfs_dmops_put(mp);
- kmem_free(mp, sizeof(xfs_mount_t));
- }
-
- return XFS_ERROR(error);
-}
-
STATIC void
xfs_quiesce_fs(
xfs_mount_t *mp)
@@ -694,30 +114,6 @@ xfs_attr_quiesce(
xfs_unmountfs_writesb(mp);
}
-int
-xfs_mntupdate(
- struct xfs_mount *mp,
- int *flags,
- struct xfs_mount_args *args)
-{
- if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- mp->m_flags &= ~XFS_MOUNT_RDONLY;
- if (args->flags & XFSMNT_BARRIER) {
- mp->m_flags |= XFS_MOUNT_BARRIER;
- xfs_mountfs_check_barriers(mp);
- } else {
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- }
- } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
- xfs_filestream_flush(mp);
- xfs_sync(mp, SYNC_DATA_QUIESCE);
- xfs_attr_quiesce(mp);
- mp->m_flags |= XFS_MOUNT_RDONLY;
- }
- return 0;
-}
-
/*
* xfs_unmount_flush implements a set of flush operation on special
* inodes, which are needed as a separate set of operations so that
@@ -732,7 +128,6 @@ xfs_unmount_flush(
xfs_inode_t *rip = mp->m_rootip;
xfs_inode_t *rbmip;
xfs_inode_t *rsumip = NULL;
- bhv_vnode_t *rvp = XFS_ITOV(rip);
int error;
xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -750,7 +145,7 @@ xfs_unmount_flush(
if (error == EFSCORRUPTED)
goto fscorrupt_out;
- ASSERT(vn_count(XFS_ITOV(rbmip)) == 1);
+ ASSERT(vn_count(VFS_I(rbmip)) == 1);
rsumip = mp->m_rsumip;
xfs_ilock(rsumip, XFS_ILOCK_EXCL);
@@ -761,7 +156,7 @@ xfs_unmount_flush(
if (error == EFSCORRUPTED)
goto fscorrupt_out;
- ASSERT(vn_count(XFS_ITOV(rsumip)) == 1);
+ ASSERT(vn_count(VFS_I(rsumip)) == 1);
}
/*
@@ -771,7 +166,7 @@ xfs_unmount_flush(
if (error == EFSCORRUPTED)
goto fscorrupt_out2;
- if (vn_count(rvp) != 1 && !relocation) {
+ if (vn_count(VFS_I(rip)) != 1 && !relocation) {
xfs_iunlock(rip, XFS_ILOCK_EXCL);
return XFS_ERROR(EBUSY);
}
@@ -888,7 +283,7 @@ xfs_sync_inodes(
int *bypassed)
{
xfs_inode_t *ip = NULL;
- bhv_vnode_t *vp = NULL;
+ struct inode *vp = NULL;
int error;
int last_error;
uint64_t fflag;
@@ -1008,7 +403,7 @@ xfs_sync_inodes(
continue;
}
- vp = XFS_ITOV_NULL(ip);
+ vp = VFS_I(ip);
/*
* If the vnode is gone then this is being torn down,
@@ -1048,7 +443,7 @@ xfs_sync_inodes(
if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
XFS_MOUNT_IUNLOCK(mp);
- kmem_free(ipointer, sizeof(xfs_iptr_t));
+ kmem_free(ipointer);
return 0;
}
@@ -1083,7 +478,7 @@ xfs_sync_inodes(
IPOINTER_INSERT(ip, mp);
xfs_ilock(ip, lock_flags);
- ASSERT(vp == XFS_ITOV(ip));
+ ASSERT(vp == VFS_I(ip));
ASSERT(ip->i_mount == mp);
vnode_refed = B_TRUE;
@@ -1194,7 +589,7 @@ xfs_sync_inodes(
}
XFS_MOUNT_IUNLOCK(mp);
ASSERT(ipointer_in == B_FALSE);
- kmem_free(ipointer, sizeof(xfs_iptr_t));
+ kmem_free(ipointer);
return XFS_ERROR(error);
}
@@ -1224,7 +619,7 @@ xfs_sync_inodes(
ASSERT(ipointer_in == B_FALSE);
- kmem_free(ipointer, sizeof(xfs_iptr_t));
+ kmem_free(ipointer);
return XFS_ERROR(last_error);
}
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55ed..a74b05087da4 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,11 +8,6 @@ struct kstatfs;
struct xfs_mount;
struct xfs_mount_args;
-int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
- struct cred *credp);
-int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
-int xfs_mntupdate(struct xfs_mount *mp, int *flags,
- struct xfs_mount_args *args);
int xfs_sync(struct xfs_mount *mp, int flags);
void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
int lnnum);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb3..aa238c8fbd7a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,26 +75,23 @@ xfs_open(
return 0;
}
-/*
- * xfs_setattr
- */
int
xfs_setattr(
- xfs_inode_t *ip,
- bhv_vattr_t *vap,
+ struct xfs_inode *ip,
+ struct iattr *iattr,
int flags,
cred_t *credp)
{
xfs_mount_t *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
+ int mask = iattr->ia_valid;
xfs_trans_t *tp;
- int mask;
int code;
uint lock_flags;
uint commit_flags=0;
uid_t uid=0, iuid=0;
gid_t gid=0, igid=0;
int timeflags = 0;
- xfs_prid_t projid=0, iprojid=0;
struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
int file_owner;
int need_iolock = 1;
@@ -104,30 +101,9 @@ xfs_setattr(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return XFS_ERROR(EROFS);
- /*
- * Cannot set certain attributes.
- */
- mask = vap->va_mask;
- if (mask & XFS_AT_NOSET) {
- return XFS_ERROR(EINVAL);
- }
-
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- /*
- * Timestamps do not need to be logged and hence do not
- * need to be done within a transaction.
- */
- if (mask & XFS_AT_UPDTIMES) {
- ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
- timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
- ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
- ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
- xfs_ichgtime(ip, timeflags);
- return 0;
- }
-
olddquot1 = olddquot2 = NULL;
udqp = gdqp = NULL;
@@ -139,28 +115,22 @@ xfs_setattr(
* If the IDs do change before we take the ilock, we're covered
* because the i_*dquot fields will get updated anyway.
*/
- if (XFS_IS_QUOTA_ON(mp) &&
- (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
+ if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
uint qflags = 0;
- if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = vap->va_uid;
+ if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+ uid = iattr->ia_uid;
qflags |= XFS_QMOPT_UQUOTA;
} else {
uid = ip->i_d.di_uid;
}
- if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = vap->va_gid;
+ if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+ gid = iattr->ia_gid;
qflags |= XFS_QMOPT_GQUOTA;
} else {
gid = ip->i_d.di_gid;
}
- if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
- projid = vap->va_projid;
- qflags |= XFS_QMOPT_PQUOTA;
- } else {
- projid = ip->i_d.di_projid;
- }
+
/*
* We take a reference when we initialize udqp and gdqp,
* so it is important that we never blindly double trip on
@@ -168,8 +138,8 @@ xfs_setattr(
*/
ASSERT(udqp == NULL);
ASSERT(gdqp == NULL);
- code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
- &udqp, &gdqp);
+ code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+ qflags, &udqp, &gdqp);
if (code)
return code;
}
@@ -180,10 +150,10 @@ xfs_setattr(
*/
tp = NULL;
lock_flags = XFS_ILOCK_EXCL;
- if (flags & ATTR_NOLOCK)
+ if (flags & XFS_ATTR_NOLOCK)
need_iolock = 0;
- if (!(mask & XFS_AT_SIZE)) {
- if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
+ if (!(mask & ATTR_SIZE)) {
+ if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
(mp->m_flags & XFS_MOUNT_WSYNC)) {
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
commit_flags = 0;
@@ -196,10 +166,10 @@ xfs_setattr(
}
} else {
if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
- !(flags & ATTR_DMI)) {
+ !(flags & XFS_ATTR_DMI)) {
int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
- vap->va_size, 0, dmflags, NULL);
+ iattr->ia_size, 0, dmflags, NULL);
if (code) {
lock_flags = 0;
goto error_return;
@@ -212,16 +182,14 @@ xfs_setattr(
xfs_ilock(ip, lock_flags);
/* boolean: are we the file owner? */
- file_owner = (current_fsuid(credp) == ip->i_d.di_uid);
+ file_owner = (current_fsuid() == ip->i_d.di_uid);
/*
* Change various properties of a file.
* Only the owner or users with CAP_FOWNER
* capability may do these things.
*/
- if (mask &
- (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
- XFS_AT_GID|XFS_AT_PROJID)) {
+ if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
/*
* CAP_FOWNER overrides the following restrictions:
*
@@ -245,21 +213,21 @@ xfs_setattr(
* IDs of the calling process shall match the group owner of
* the file when setting the set-group-ID bit on that file
*/
- if (mask & XFS_AT_MODE) {
+ if (mask & ATTR_MODE) {
mode_t m = 0;
- if ((vap->va_mode & S_ISUID) && !file_owner)
+ if ((iattr->ia_mode & S_ISUID) && !file_owner)
m |= S_ISUID;
- if ((vap->va_mode & S_ISGID) &&
+ if ((iattr->ia_mode & S_ISGID) &&
!in_group_p((gid_t)ip->i_d.di_gid))
m |= S_ISGID;
#if 0
/* Linux allows this, Irix doesn't. */
- if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
+ if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
m |= S_ISVTX;
#endif
if (m && !capable(CAP_FSETID))
- vap->va_mode &= ~m;
+ iattr->ia_mode &= ~m;
}
}
@@ -270,7 +238,7 @@ xfs_setattr(
* and can change the group id only to a group of which he
* or she is a member.
*/
- if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+ if (mask & (ATTR_UID|ATTR_GID)) {
/*
* These IDs could have changed since we last looked at them.
* But, we're assured that if the ownership did change
@@ -278,12 +246,9 @@ xfs_setattr(
* would have changed also.
*/
iuid = ip->i_d.di_uid;
- iprojid = ip->i_d.di_projid;
igid = ip->i_d.di_gid;
- gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
- uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
- projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
- iprojid;
+ gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+ uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
/*
* CAP_CHOWN overrides the following restrictions:
@@ -303,11 +268,10 @@ xfs_setattr(
goto error_return;
}
/*
- * Do a quota reservation only if uid/projid/gid is actually
+ * Do a quota reservation only if uid/gid is actually
* going to change.
*/
if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
(XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
ASSERT(tp);
code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -321,13 +285,13 @@ xfs_setattr(
/*
* Truncate file. Must have write permission and not be a directory.
*/
- if (mask & XFS_AT_SIZE) {
+ if (mask & ATTR_SIZE) {
/* Short circuit the truncate case for zero length files */
- if ((vap->va_size == 0) &&
- (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
+ if (iattr->ia_size == 0 &&
+ ip->i_size == 0 && ip->i_d.di_nextents == 0) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
lock_flags &= ~XFS_ILOCK_EXCL;
- if (mask & XFS_AT_CTIME)
+ if (mask & ATTR_CTIME)
xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
code = 0;
goto error_return;
@@ -350,9 +314,9 @@ xfs_setattr(
/*
* Change file access or modified times.
*/
- if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+ if (mask & (ATTR_ATIME|ATTR_MTIME)) {
if (!file_owner) {
- if ((flags & ATTR_UTIME) &&
+ if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
!capable(CAP_FOWNER)) {
code = XFS_ERROR(EPERM);
goto error_return;
@@ -361,90 +325,23 @@ xfs_setattr(
}
/*
- * Change extent size or realtime flag.
- */
- if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
- /*
- * Can't change extent size if any extents are allocated.
- */
- if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
- ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
- vap->va_extsize) ) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
- goto error_return;
- }
-
- /*
- * Can't change realtime flag if any extents are allocated.
- */
- if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- (mask & XFS_AT_XFLAGS) &&
- (XFS_IS_REALTIME_INODE(ip)) !=
- (vap->va_xflags & XFS_XFLAG_REALTIME)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
- goto error_return;
- }
- /*
- * Extent size must be a multiple of the appropriate block
- * size, if set at all.
- */
- if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
- xfs_extlen_t size;
-
- if (XFS_IS_REALTIME_INODE(ip) ||
- ((mask & XFS_AT_XFLAGS) &&
- (vap->va_xflags & XFS_XFLAG_REALTIME))) {
- size = mp->m_sb.sb_rextsize <<
- mp->m_sb.sb_blocklog;
- } else {
- size = mp->m_sb.sb_blocksize;
- }
- if (vap->va_extsize % size) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
- /*
- * If realtime flag is set then must have realtime data.
- */
- if ((mask & XFS_AT_XFLAGS) &&
- (vap->va_xflags & XFS_XFLAG_REALTIME)) {
- if ((mp->m_sb.sb_rblocks == 0) ||
- (mp->m_sb.sb_rextsize == 0) ||
- (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
-
- /*
- * Can't modify an immutable/append-only file unless
- * we have appropriate permission.
- */
- if ((mask & XFS_AT_XFLAGS) &&
- (ip->i_d.di_flags &
- (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
- (vap->va_xflags &
- (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
- !capable(CAP_LINUX_IMMUTABLE)) {
- code = XFS_ERROR(EPERM);
- goto error_return;
- }
- }
-
- /*
* Now we can make the changes. Before we join the inode
- * to the transaction, if XFS_AT_SIZE is set then take care of
+ * to the transaction, if ATTR_SIZE is set then take care of
* the part of the truncation that must be done without the
* inode lock. This needs to be done before joining the inode
* to the transaction, because the inode cannot be unlocked
* once it is a part of the transaction.
*/
- if (mask & XFS_AT_SIZE) {
+ if (mask & ATTR_SIZE) {
code = 0;
- if ((vap->va_size > ip->i_size) &&
- (flags & ATTR_NOSIZETOK) == 0) {
- code = xfs_igrow_start(ip, vap->va_size, credp);
+ if (iattr->ia_size > ip->i_size) {
+ /*
+ * Do the first part of growing a file: zero any data
+ * in the last block that is beyond the old EOF. We
+ * need to do this before the inode is joined to the
+ * transaction to modify the i_size.
+ */
+ code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -461,10 +358,10 @@ xfs_setattr(
* not within the range we care about here.
*/
if (!code &&
- (ip->i_size != ip->i_d.di_size) &&
- (vap->va_size > ip->i_d.di_size)) {
+ ip->i_size != ip->i_d.di_size &&
+ iattr->ia_size > ip->i_d.di_size) {
code = xfs_flush_pages(ip,
- ip->i_d.di_size, vap->va_size,
+ ip->i_d.di_size, iattr->ia_size,
XFS_B_ASYNC, FI_NONE);
}
@@ -472,7 +369,7 @@ xfs_setattr(
vn_iowait(ip);
if (!code)
- code = xfs_itruncate_data(ip, vap->va_size);
+ code = xfs_itruncate_data(ip, iattr->ia_size);
if (code) {
ASSERT(tp == NULL);
lock_flags &= ~XFS_ILOCK_EXCL;
@@ -501,28 +398,30 @@ xfs_setattr(
/*
* Truncate file. Must have write permission and not be a directory.
*/
- if (mask & XFS_AT_SIZE) {
+ if (mask & ATTR_SIZE) {
/*
* Only change the c/mtime if we are changing the size
* or we are explicitly asked to change it. This handles
* the semantic difference between truncate() and ftruncate()
* as implemented in the VFS.
*/
- if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+ if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- if (vap->va_size > ip->i_size) {
- xfs_igrow_finish(tp, ip, vap->va_size,
- !(flags & ATTR_DMI));
- } else if ((vap->va_size <= ip->i_size) ||
- ((vap->va_size == 0) && ip->i_d.di_nextents)) {
+ if (iattr->ia_size > ip->i_size) {
+ ip->i_d.di_size = iattr->ia_size;
+ ip->i_size = iattr->ia_size;
+ if (!(flags & XFS_ATTR_DMI))
+ xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ } else if (iattr->ia_size <= ip->i_size ||
+ (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
/*
* signal a sync transaction unless
* we're truncating an already unlinked
* file on a wsync filesystem
*/
- code = xfs_itruncate_finish(&tp, ip,
- (xfs_fsize_t)vap->va_size,
+ code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
XFS_DATA_FORK,
((ip->i_d.di_nlink != 0 ||
!(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -544,9 +443,12 @@ xfs_setattr(
/*
* Change file access modes.
*/
- if (mask & XFS_AT_MODE) {
+ if (mask & ATTR_MODE) {
ip->i_d.di_mode &= S_IFMT;
- ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
+ ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
+
+ inode->i_mode &= S_IFMT;
+ inode->i_mode |= iattr->ia_mode & ~S_IFMT;
xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
timeflags |= XFS_ICHGTIME_CHG;
@@ -559,7 +461,7 @@ xfs_setattr(
* and can change the group id only to a group of which he
* or she is a member.
*/
- if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+ if (mask & (ATTR_UID|ATTR_GID)) {
/*
* CAP_FSETID overrides the following restrictions:
*
@@ -577,39 +479,24 @@ xfs_setattr(
*/
if (iuid != uid) {
if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(mask & XFS_AT_UID);
+ ASSERT(mask & ATTR_UID);
ASSERT(udqp);
olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
&ip->i_udquot, udqp);
}
ip->i_d.di_uid = uid;
+ inode->i_uid = uid;
}
if (igid != gid) {
if (XFS_IS_GQUOTA_ON(mp)) {
ASSERT(!XFS_IS_PQUOTA_ON(mp));
- ASSERT(mask & XFS_AT_GID);
+ ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_gid = gid;
- }
- if (iprojid != projid) {
- if (XFS_IS_PQUOTA_ON(mp)) {
- ASSERT(!XFS_IS_GQUOTA_ON(mp));
- ASSERT(mask & XFS_AT_PROJID);
- ASSERT(gdqp);
- olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
- &ip->i_gdquot, gdqp);
- }
- ip->i_d.di_projid = projid;
- /*
- * We may have to rev the inode as well as
- * the superblock version number since projids didn't
- * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
- */
- if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
- xfs_bump_ino_vers2(tp, ip);
+ inode->i_gid = gid;
}
xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -620,82 +507,33 @@ xfs_setattr(
/*
* Change file access or modified times.
*/
- if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
- if (mask & XFS_AT_ATIME) {
- ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
+ if (mask & (ATTR_ATIME|ATTR_MTIME)) {
+ if (mask & ATTR_ATIME) {
+ inode->i_atime = iattr->ia_atime;
+ ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
ip->i_update_core = 1;
- timeflags &= ~XFS_ICHGTIME_ACC;
}
- if (mask & XFS_AT_MTIME) {
- ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
timeflags &= ~XFS_ICHGTIME_MOD;
timeflags |= XFS_ICHGTIME_CHG;
}
- if (tp && (flags & ATTR_UTIME))
+ if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
}
/*
- * Change XFS-added attributes.
- */
- if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
- if (mask & XFS_AT_EXTSIZE) {
- /*
- * Converting bytes to fs blocks.
- */
- ip->i_d.di_extsize = vap->va_extsize >>
- mp->m_sb.sb_blocklog;
- }
- if (mask & XFS_AT_XFLAGS) {
- uint di_flags;
-
- /* can't set PREALLOC this way, just preserve it */
- di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
- di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (vap->va_xflags & XFS_XFLAG_APPEND)
- di_flags |= XFS_DIFLAG_APPEND;
- if (vap->va_xflags & XFS_XFLAG_SYNC)
- di_flags |= XFS_DIFLAG_SYNC;
- if (vap->va_xflags & XFS_XFLAG_NOATIME)
- di_flags |= XFS_DIFLAG_NOATIME;
- if (vap->va_xflags & XFS_XFLAG_NODUMP)
- di_flags |= XFS_DIFLAG_NODUMP;
- if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
- if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
- if (vap->va_xflags & XFS_XFLAG_REALTIME)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
- di_flags |= XFS_DIFLAG_EXTSIZE;
- }
- ip->i_d.di_flags = di_flags;
- }
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- timeflags |= XFS_ICHGTIME_CHG;
- }
-
- /*
- * Change file inode change time only if XFS_AT_CTIME set
+ * Change file inode change time only if ATTR_CTIME set
* AND we have been called by a DMI function.
*/
- if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
- ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
+ if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
ip->i_update_core = 1;
timeflags &= ~XFS_ICHGTIME_CHG;
}
@@ -704,7 +542,7 @@ xfs_setattr(
* Send out timestamp changes that need to be set to the
* current time. Not done when called by a DMI function.
*/
- if (timeflags && !(flags & ATTR_DMI))
+ if (timeflags && !(flags & XFS_ATTR_DMI))
xfs_ichgtime(ip, timeflags);
XFS_STATS_INC(xs_ig_attrchg);
@@ -742,7 +580,7 @@ xfs_setattr(
}
if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
- !(flags & ATTR_DMI)) {
+ !(flags & XFS_ATTR_DMI)) {
(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
NULL, DM_RIGHT_NULL, NULL, NULL,
0, 0, AT_DELAY_FLAG(flags));
@@ -875,7 +713,7 @@ xfs_fsync(
return XFS_ERROR(EIO);
/* capture size updates in I/O completion before writing the inode. */
- error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
+ error = filemap_fdatawait(VFS_I(ip)->i_mapping);
if (error)
return XFS_ERROR(error);
@@ -1321,7 +1159,6 @@ int
xfs_release(
xfs_inode_t *ip)
{
- bhv_vnode_t *vp = XFS_ITOV(ip);
xfs_mount_t *mp = ip->i_mount;
int error;
@@ -1356,13 +1193,13 @@ xfs_release(
* be exposed to that problem.
*/
truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
- if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
+ if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
}
if (ip->i_d.di_nlink != 0) {
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
- ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
+ ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
(!(ip->i_d.di_flags &
@@ -1388,7 +1225,6 @@ int
xfs_inactive(
xfs_inode_t *ip)
{
- bhv_vnode_t *vp = XFS_ITOV(ip);
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
int committed;
@@ -1403,7 +1239,7 @@ xfs_inactive(
* If the inode is already free, then there can be nothing
* to clean up here.
*/
- if (ip->i_d.di_mode == 0 || VN_BAD(vp)) {
+ if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) {
ASSERT(ip->i_df.if_real_bytes == 0);
ASSERT(ip->i_df.if_broot_bytes == 0);
return VN_INACTIVE_CACHE;
@@ -1433,7 +1269,7 @@ xfs_inactive(
if (ip->i_d.di_nlink != 0) {
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
- ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
+ ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) &&
(!(ip->i_d.di_flags &
@@ -1601,12 +1437,18 @@ xfs_inactive(
return VN_INACTIVE_CACHE;
}
-
+/*
+ * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
+ * is allowed, otherwise it has to be an exact match. If a CI match is found,
+ * ci_name->name will point to a the actual name (caller must free) or
+ * will be set to NULL if an exact match is found.
+ */
int
xfs_lookup(
xfs_inode_t *dp,
struct xfs_name *name,
- xfs_inode_t **ipp)
+ xfs_inode_t **ipp,
+ struct xfs_name *ci_name)
{
xfs_ino_t inum;
int error;
@@ -1618,7 +1460,7 @@ xfs_lookup(
return XFS_ERROR(EIO);
lock_mode = xfs_ilock_map_shared(dp);
- error = xfs_dir_lookup(NULL, dp, name, &inum);
+ error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
xfs_iunlock_map_shared(dp, lock_mode);
if (error)
@@ -1626,12 +1468,15 @@ xfs_lookup(
error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
if (error)
- goto out;
+ goto out_free_name;
xfs_itrace_ref(*ipp);
return 0;
- out:
+out_free_name:
+ if (ci_name)
+ kmem_free(ci_name->name);
+out:
*ipp = NULL;
return error;
}
@@ -1688,7 +1533,7 @@ xfs_create(
* Make sure that we have allocated dquot(s) on disk.
*/
error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(credp), current_fsgid(credp), prid,
+ current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -1860,111 +1705,6 @@ std_return:
}
#ifdef DEBUG
-/*
- * Some counters to see if (and how often) we are hitting some deadlock
- * prevention code paths.
- */
-
-int xfs_rm_locks;
-int xfs_rm_lock_delays;
-int xfs_rm_attempts;
-#endif
-
-/*
- * The following routine will lock the inodes associated with the
- * directory and the named entry in the directory. The locks are
- * acquired in increasing inode number.
- *
- * If the entry is "..", then only the directory is locked. The
- * vnode ref count will still include that from the .. entry in
- * this case.
- *
- * There is a deadlock we need to worry about. If the locked directory is
- * in the AIL, it might be blocking up the log. The next inode we lock
- * could be already locked by another thread waiting for log space (e.g
- * a permanent log reservation with a long running transaction (see
- * xfs_itruncate_finish)). To solve this, we must check if the directory
- * is in the ail and use lock_nowait. If we can't lock, we need to
- * drop the inode lock on the directory and try again. xfs_iunlock will
- * potentially push the tail if we were holding up the log.
- */
-STATIC int
-xfs_lock_dir_and_entry(
- xfs_inode_t *dp,
- xfs_inode_t *ip) /* inode of entry 'name' */
-{
- int attempts;
- xfs_ino_t e_inum;
- xfs_inode_t *ips[2];
- xfs_log_item_t *lp;
-
-#ifdef DEBUG
- xfs_rm_locks++;
-#endif
- attempts = 0;
-
-again:
- xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
-
- e_inum = ip->i_ino;
-
- xfs_itrace_ref(ip);
-
- /*
- * We want to lock in increasing inum. Since we've already
- * acquired the lock on the directory, we may need to release
- * if if the inum of the entry turns out to be less.
- */
- if (e_inum > dp->i_ino) {
- /*
- * We are already in the right order, so just
- * lock on the inode of the entry.
- * We need to use nowait if dp is in the AIL.
- */
-
- lp = (xfs_log_item_t *)dp->i_itemp;
- if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- attempts++;
-#ifdef DEBUG
- xfs_rm_attempts++;
-#endif
-
- /*
- * Unlock dp and try again.
- * xfs_iunlock will try to push the tail
- * if the inode is in the AIL.
- */
-
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
- if ((attempts % 5) == 0) {
- delay(1); /* Don't just spin the CPU */
-#ifdef DEBUG
- xfs_rm_lock_delays++;
-#endif
- }
- goto again;
- }
- } else {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- }
- } else if (e_inum < dp->i_ino) {
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
- ips[0] = ip;
- ips[1] = dp;
- xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
- }
- /* else e_inum == dp->i_ino */
- /* This can happen if we're asked to lock /x/..
- * the entry is "..", which is also the parent directory.
- */
-
- return 0;
-}
-
-#ifdef DEBUG
int xfs_locked_n;
int xfs_small_retries;
int xfs_middle_retries;
@@ -2098,12 +1838,44 @@ again:
#endif
}
-#ifdef DEBUG
-#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);}
-int remove_which_error_return = 0;
-#else /* ! DEBUG */
-#define REMOVE_DEBUG_TRACE(x)
-#endif /* ! DEBUG */
+void
+xfs_lock_two_inodes(
+ xfs_inode_t *ip0,
+ xfs_inode_t *ip1,
+ uint lock_mode)
+{
+ xfs_inode_t *temp;
+ int attempts = 0;
+ xfs_log_item_t *lp;
+
+ ASSERT(ip0->i_ino != ip1->i_ino);
+
+ if (ip0->i_ino > ip1->i_ino) {
+ temp = ip0;
+ ip0 = ip1;
+ ip1 = temp;
+ }
+
+ again:
+ xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
+
+ /*
+ * If the first lock we have locked is in the AIL, we must TRY to get
+ * the second lock. If we can't get it, we must release the first one
+ * and try again.
+ */
+ lp = (xfs_log_item_t *)ip0->i_itemp;
+ if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+ if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
+ xfs_iunlock(ip0, lock_mode);
+ if ((++attempts % 5) == 0)
+ delay(1); /* Don't just spin the CPU */
+ goto again;
+ }
+ } else {
+ xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
+ }
+}
int
xfs_remove(
@@ -2113,6 +1885,7 @@ xfs_remove(
{
xfs_mount_t *mp = dp->i_mount;
xfs_trans_t *tp = NULL;
+ int is_dir = S_ISDIR(ip->i_d.di_mode);
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
@@ -2120,8 +1893,10 @@ xfs_remove(
int committed;
int link_zero;
uint resblks;
+ uint log_count;
xfs_itrace_entry(dp);
+ xfs_itrace_entry(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
@@ -2134,19 +1909,23 @@ xfs_remove(
return error;
}
- xfs_itrace_entry(ip);
- xfs_itrace_ref(ip);
-
error = XFS_QM_DQATTACH(mp, dp, 0);
- if (!error)
- error = XFS_QM_DQATTACH(mp, ip, 0);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
+ if (error)
goto std_return;
- }
- tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+ error = XFS_QM_DQATTACH(mp, ip, 0);
+ if (error)
+ goto std_return;
+
+ if (is_dir) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
+ log_count = XFS_DEFAULT_LOG_COUNT;
+ } else {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+ log_count = XFS_REMOVE_LOG_COUNT;
+ }
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+
/*
* We try to get the real space reservation first,
* allowing for directory btree deletion(s) implying
@@ -2158,25 +1937,19 @@ xfs_remove(
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+ XFS_TRANS_PERM_LOG_RES, log_count);
if (error == ENOSPC) {
resblks = 0;
error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+ XFS_TRANS_PERM_LOG_RES, log_count);
}
if (error) {
ASSERT(error != ENOSPC);
- REMOVE_DEBUG_TRACE(__LINE__);
- xfs_trans_cancel(tp, 0);
- return error;
+ cancel_flags = 0;
+ goto out_trans_cancel;
}
- error = xfs_lock_dir_and_entry(dp, ip);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
- }
+ xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
/*
* At this point, we've gotten both the directory and the entry
@@ -2189,46 +1962,83 @@ xfs_remove(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
- * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
+ * If we're removing a directory perform some additional validation.
*/
+ if (is_dir) {
+ ASSERT(ip->i_d.di_nlink >= 2);
+ if (ip->i_d.di_nlink != 2) {
+ error = XFS_ERROR(ENOTEMPTY);
+ goto out_trans_cancel;
+ }
+ if (!xfs_dir_isempty(ip)) {
+ error = XFS_ERROR(ENOTEMPTY);
+ goto out_trans_cancel;
+ }
+ }
+
XFS_BMAP_INIT(&free_list, &first_block);
error = xfs_dir_removename(tp, dp, name, ip->i_ino,
&first_block, &free_list, resblks);
if (error) {
ASSERT(error != ENOENT);
- REMOVE_DEBUG_TRACE(__LINE__);
- goto error1;
+ goto out_bmap_cancel;
}
xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ /*
+ * Bump the in memory generation count on the parent
+ * directory so that other can know that it has changed.
+ */
dp->i_gen++;
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
- error = xfs_droplink(tp, ip);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- goto error1;
+ if (is_dir) {
+ /*
+ * Drop the link from ip's "..".
+ */
+ error = xfs_droplink(tp, dp);
+ if (error)
+ goto out_bmap_cancel;
+
+ /*
+ * Drop the link from dp to ip.
+ */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ goto out_bmap_cancel;
+ } else {
+ /*
+ * When removing a non-directory we need to log the parent
+ * inode here for the i_gen update. For a directory this is
+ * done implicitly by the xfs_droplink call for the ".." entry.
+ */
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
}
- /* Determine if this is the last link while
+ /*
+ * Drop the "." link from ip to self.
+ */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ goto out_bmap_cancel;
+
+ /*
+ * Determine if this is the last link while
* we are in the transaction.
*/
- link_zero = (ip)->i_d.di_nlink==0;
+ link_zero = (ip->i_d.di_nlink == 0);
/*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
* the user.
*/
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- }
error = xfs_bmap_finish(&tp, &free_list, &committed);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- goto error_rele;
- }
+ if (error)
+ goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
if (error)
@@ -2240,38 +2050,26 @@ xfs_remove(
* will get killed on last close in xfs_close() so we don't
* have to worry about that.
*/
- if (link_zero && xfs_inode_is_filestream(ip))
+ if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
xfs_filestream_deassociate(ip);
xfs_itrace_exit(ip);
+ xfs_itrace_exit(dp);
-/* Fall through to std_return with error = 0 */
std_return:
if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
- (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
- dp, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL,
- name->name, NULL, ip->i_d.di_mode, error, 0);
+ XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
+ NULL, DM_RIGHT_NULL, name->name, NULL,
+ ip->i_d.di_mode, error, 0);
}
- return error;
- error1:
- xfs_bmap_cancel(&free_list);
- cancel_flags |= XFS_TRANS_ABORT;
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
+ return error;
- error_rele:
- /*
- * In this case make sure to not release the inode until after
- * the current transaction is aborted. Releasing it beforehand
- * can cause us to go to xfs_inactive and start a recursive
- * transaction which can easily deadlock with the current one.
- */
+ out_bmap_cancel:
xfs_bmap_cancel(&free_list);
cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
-
goto std_return;
}
@@ -2283,7 +2081,6 @@ xfs_link(
{
xfs_mount_t *mp = tdp->i_mount;
xfs_trans_t *tp;
- xfs_inode_t *ips[2];
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
@@ -2331,15 +2128,7 @@ xfs_link(
goto error_return;
}
- if (sip->i_ino < tdp->i_ino) {
- ips[0] = sip;
- ips[1] = tdp;
- } else {
- ips[0] = tdp;
- ips[1] = sip;
- }
-
- xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
/*
* Increment vnode ref counts since xfs_trans_commit &
@@ -2480,7 +2269,7 @@ xfs_mkdir(
* Make sure that we have allocated dquot(s) on disk.
*/
error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(credp), current_fsgid(credp), prid,
+ current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -2638,186 +2427,6 @@ std_return:
}
int
-xfs_rmdir(
- xfs_inode_t *dp,
- struct xfs_name *name,
- xfs_inode_t *cdp)
-{
- xfs_mount_t *mp = dp->i_mount;
- xfs_trans_t *tp;
- int error;
- xfs_bmap_free_t free_list;
- xfs_fsblock_t first_block;
- int cancel_flags;
- int committed;
- int last_cdp_link;
- uint resblks;
-
- xfs_itrace_entry(dp);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
- error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
- dp, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL, name->name,
- NULL, cdp->i_d.di_mode, 0, 0);
- if (error)
- return XFS_ERROR(error);
- }
-
- /*
- * Get the dquots for the inodes.
- */
- error = XFS_QM_DQATTACH(mp, dp, 0);
- if (!error)
- error = XFS_QM_DQATTACH(mp, cdp, 0);
- if (error) {
- REMOVE_DEBUG_TRACE(__LINE__);
- goto std_return;
- }
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
- cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
- /*
- * We try to get the real space reservation first,
- * allowing for directory btree deletion(s) implying
- * possible bmap insert(s). If we can't get the space
- * reservation then we use 0 instead, and avoid the bmap
- * btree insert(s) in the directory code by, if the bmap
- * insert tries to happen, instead trimming the LAST
- * block from the directory.
- */
- resblks = XFS_REMOVE_SPACE_RES(mp);
- error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
- if (error == ENOSPC) {
- resblks = 0;
- error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
- }
- if (error) {
- ASSERT(error != ENOSPC);
- cancel_flags = 0;
- goto error_return;
- }
- XFS_BMAP_INIT(&free_list, &first_block);
-
- /*
- * Now lock the child directory inode and the parent directory
- * inode in the proper order. This will take care of validating
- * that the directory entry for the child directory inode has
- * not changed while we were obtaining a log reservation.
- */
- error = xfs_lock_dir_and_entry(dp, cdp);
- if (error) {
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
- }
-
- IHOLD(dp);
- xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-
- IHOLD(cdp);
- xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
-
- ASSERT(cdp->i_d.di_nlink >= 2);
- if (cdp->i_d.di_nlink != 2) {
- error = XFS_ERROR(ENOTEMPTY);
- goto error_return;
- }
- if (!xfs_dir_isempty(cdp)) {
- error = XFS_ERROR(ENOTEMPTY);
- goto error_return;
- }
-
- error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
- &first_block, &free_list, resblks);
- if (error)
- goto error1;
-
- xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
- /*
- * Bump the in memory generation count on the parent
- * directory so that other can know that it has changed.
- */
- dp->i_gen++;
-
- /*
- * Drop the link from cdp's "..".
- */
- error = xfs_droplink(tp, dp);
- if (error) {
- goto error1;
- }
-
- /*
- * Drop the link from dp to cdp.
- */
- error = xfs_droplink(tp, cdp);
- if (error) {
- goto error1;
- }
-
- /*
- * Drop the "." link from cdp to self.
- */
- error = xfs_droplink(tp, cdp);
- if (error) {
- goto error1;
- }
-
- /* Determine these before committing transaction */
- last_cdp_link = (cdp)->i_d.di_nlink==0;
-
- /*
- * If this is a synchronous mount, make sure that the
- * rmdir transaction goes to disk before returning to
- * the user.
- */
- if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
- xfs_trans_set_sync(tp);
- }
-
- error = xfs_bmap_finish (&tp, &free_list, &committed);
- if (error) {
- xfs_bmap_cancel(&free_list);
- xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT));
- goto std_return;
- }
-
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (error) {
- goto std_return;
- }
-
-
- /* Fall through to std_return with error = 0 or the errno
- * from xfs_trans_commit. */
- std_return:
- if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
- (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
- dp, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL,
- name->name, NULL, cdp->i_d.di_mode,
- error, 0);
- }
- return error;
-
- error1:
- xfs_bmap_cancel(&free_list);
- cancel_flags |= XFS_TRANS_ABORT;
- /* FALLTHROUGH */
-
- error_return:
- xfs_trans_cancel(tp, cancel_flags);
- goto std_return;
-}
-
-int
xfs_symlink(
xfs_inode_t *dp,
struct xfs_name *link_name,
@@ -2886,7 +2495,7 @@ xfs_symlink(
* Make sure that we have allocated dquot(s) on disk.
*/
error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(credp), current_fsgid(credp), prid,
+ current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -3181,14 +2790,13 @@ int
xfs_reclaim(
xfs_inode_t *ip)
{
- bhv_vnode_t *vp = XFS_ITOV(ip);
xfs_itrace_entry(ip);
- ASSERT(!VN_MAPPED(vp));
+ ASSERT(!VN_MAPPED(VFS_I(ip)));
/* bad inode, get out here ASAP */
- if (VN_BAD(vp)) {
+ if (VN_BAD(VFS_I(ip))) {
xfs_ireclaim(ip);
return 0;
}
@@ -3225,7 +2833,7 @@ xfs_reclaim(
XFS_MOUNT_ILOCK(mp);
spin_lock(&ip->i_flags_lock);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
- vn_to_inode(vp)->i_private = NULL;
+ VFS_I(ip)->i_private = NULL;
ip->i_vnode = NULL;
spin_unlock(&ip->i_flags_lock);
list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
@@ -3241,8 +2849,7 @@ xfs_finish_reclaim(
int sync_mode)
{
xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
- bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
- int error;
+ struct inode *vp = VFS_I(ip);
if (vp && VN_BAD(vp))
goto reclaim;
@@ -3285,29 +2892,16 @@ xfs_finish_reclaim(
xfs_iflock(ip);
}
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- if (ip->i_update_core ||
- ((ip->i_itemp != NULL) &&
- (ip->i_itemp->ili_format.ilf_fields != 0))) {
- error = xfs_iflush(ip, sync_mode);
- /*
- * If we hit an error, typically because of filesystem
- * shutdown, we don't need to let vn_reclaim to know
- * because we're gonna reclaim the inode anyway.
- */
- if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- goto reclaim;
- }
- xfs_iflock(ip); /* synchronize with xfs_iflush_done */
- }
-
- ASSERT(ip->i_update_core == 0);
- ASSERT(ip->i_itemp == NULL ||
- ip->i_itemp->ili_format.ilf_fields == 0);
+ /*
+ * In the case of a forced shutdown we rely on xfs_iflush() to
+ * wait for the inode to be unpinned before returning an error.
+ */
+ if (xfs_iflush(ip, sync_mode) == 0) {
+ /* synchronize with xfs_iflush_done */
+ xfs_iflock(ip);
+ xfs_ifunlock(ip);
}
- xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
reclaim:
@@ -3418,7 +3012,7 @@ xfs_alloc_file_space(
/* Generate a DMAPI event if needed. */
if (alloc_type != 0 && offset < ip->i_size &&
- (attr_flags&ATTR_DMI) == 0 &&
+ (attr_flags & XFS_ATTR_DMI) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
xfs_off_t end_dmi_offset;
@@ -3532,7 +3126,7 @@ retry:
allocatesize_fsb -= allocated_fsb;
}
dmapi_enospc_check:
- if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
+ if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
ip, DM_RIGHT_NULL,
@@ -3643,7 +3237,6 @@ xfs_free_file_space(
xfs_off_t len,
int attr_flags)
{
- bhv_vnode_t *vp;
int committed;
int done;
xfs_off_t end_dmi_offset;
@@ -3663,7 +3256,6 @@ xfs_free_file_space(
xfs_trans_t *tp;
int need_iolock = 1;
- vp = XFS_ITOV(ip);
mp = ip->i_mount;
xfs_itrace_entry(ip);
@@ -3679,7 +3271,7 @@ xfs_free_file_space(
end_dmi_offset = offset + len;
endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
- if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 &&
+ if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
if (end_dmi_offset > ip->i_size)
end_dmi_offset = ip->i_size;
@@ -3690,7 +3282,7 @@ xfs_free_file_space(
return error;
}
- if (attr_flags & ATTR_NOLOCK)
+ if (attr_flags & XFS_ATTR_NOLOCK)
need_iolock = 0;
if (need_iolock) {
xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3700,7 +3292,7 @@ xfs_free_file_space(
rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
ioffset = offset & ~(rounding - 1);
- if (VN_CACHED(vp) != 0) {
+ if (VN_CACHED(VFS_I(ip)) != 0) {
xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
if (error)
@@ -3867,7 +3459,7 @@ xfs_change_file_space(
xfs_off_t startoffset;
xfs_off_t llen;
xfs_trans_t *tp;
- bhv_vattr_t va;
+ struct iattr iattr;
xfs_itrace_entry(ip);
@@ -3941,10 +3533,10 @@ xfs_change_file_space(
break;
}
- va.va_mask = XFS_AT_SIZE;
- va.va_size = startoffset;
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = startoffset;
- error = xfs_setattr(ip, &va, attr_flags, credp);
+ error = xfs_setattr(ip, &iattr, attr_flags, credp);
if (error)
return error;
@@ -3974,7 +3566,7 @@ xfs_change_file_space(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
- if ((attr_flags & ATTR_DMI) == 0) {
+ if ((attr_flags & XFS_ATTR_DMI) == 0) {
ip->i_d.di_mode &= ~S_ISUID;
/*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce53..e932a96bec54 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
#define _XFS_VNODEOPS_H 1
struct attrlist_cursor_kern;
-struct bhv_vattr;
struct cred;
struct file;
+struct iattr;
struct inode;
struct iovec;
struct kiocb;
@@ -15,14 +15,18 @@ struct xfs_iomap;
int xfs_open(struct xfs_inode *ip);
-int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
+int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
struct cred *credp);
+#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
+#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
+#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
+
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);
int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
- struct xfs_inode **ipp);
+ struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
struct xfs_name *target_name);
int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
mode_t mode, struct xfs_inode **ipp, struct cred *credp);
-int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
- struct xfs_inode *cdp);
int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
xfs_off_t *offset, filldir_t filldir);
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,