summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c21
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_inode.c126
-rw-r--r--fs/9p/vfs_super.c39
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/afs/file.c18
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/binfmt_elf.c28
-rw-r--r--fs/block_dev.c29
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c9
-rw-r--r--fs/char_dev.c40
-rw-r--r--fs/cifs/CHANGES5
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/cifsfs.c22
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h21
-rw-r--r--fs/cifs/cifssmb.c316
-rw-r--r--fs/cifs/connect.c49
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c43
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/transport.c17
-rw-r--r--fs/compat.c17
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/exec.c67
-rw-r--r--fs/ext2/acl.c8
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/namei.c8
-rw-r--r--fs/ext3/Kconfig32
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/file.c63
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext3/super.c40
-rw-r--r--fs/ext4/acl.c8
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/file.c55
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/fat/file.c22
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fs-writeback.c1119
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/acl.c106
-rw-r--r--fs/gfs2/dentry.c18
-rw-r--r--fs/gfs2/eaops.c157
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/export.c36
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/gfs2/incore.h15
-rw-r--r--fs/gfs2/inode.c159
-rw-r--r--fs/gfs2/ops_fstype.c66
-rw-r--r--fs/gfs2/ops_inode.c82
-rw-r--r--fs/gfs2/rgrp.c88
-rw-r--r--fs/gfs2/rgrp.h6
-rw-r--r--fs/gfs2/super.c46
-rw-r--r--fs/gfs2/super.h5
-rw-r--r--fs/gfs2/sys.c49
-rw-r--r--fs/gfs2/util.c41
-rw-r--r--fs/gfs2/xattr.c (renamed from fs/gfs2/eattr.c)425
-rw-r--r--fs/gfs2/xattr.h (renamed from fs/gfs2/eattr.h)54
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--fs/jffs2/acl.c7
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/wbuf.c10
-rw-r--r--fs/jfs/acl.c7
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/host.c14
-rw-r--r--fs/lockd/mon.c44
-rw-r--r--fs/locks.c4
-rw-r--r--fs/namei.c110
-rw-r--r--fs/nfs/Makefile3
-rw-r--r--fs/nfs/cache_lib.c140
-rw-r--r--fs/nfs/cache_lib.h27
-rw-r--r--fs/nfs/callback.c26
-rw-r--r--fs/nfs/client.c16
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/dns_resolve.c335
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/idmap.c6
-rw-r--r--fs/nfs/inode.c100
-rw-r--r--fs/nfs/internal.h39
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4namespace.c24
-rw-r--r--fs/nfs/nfs4proc.c40
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/nfs4xdr.c1460
-rw-r--r--fs/nfs/super.c451
-rw-r--r--fs/nfs/write.c91
-rw-r--r--fs/nfsd/auth.c4
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/nfs4idmap.c20
-rw-r--r--fs/nfsd/nfsctl.c21
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/Kconfig2
-rw-r--r--fs/nilfs2/bmap.c151
-rw-r--r--fs/nilfs2/bmap.h76
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/btree.c625
-rw-r--r--fs/nilfs2/cpfile.c11
-rw-r--r--fs/nilfs2/cpfile.h2
-rw-r--r--fs/nilfs2/dat.c42
-rw-r--r--fs/nilfs2/dat.h8
-rw-r--r--fs/nilfs2/direct.c161
-rw-r--r--fs/nilfs2/ifile.h1
-rw-r--r--fs/nilfs2/inode.c3
-rw-r--r--fs/nilfs2/ioctl.c26
-rw-r--r--fs/nilfs2/mdt.c40
-rw-r--r--fs/nilfs2/mdt.h3
-rw-r--r--fs/nilfs2/recovery.c3
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/sufile.h1
-rw-r--r--fs/nilfs2/super.c102
-rw-r--r--fs/nilfs2/the_nilfs.c15
-rw-r--r--fs/nilfs2/the_nilfs.h45
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c46
-rw-r--r--fs/notify/inotify/inotify_user.c254
-rw-r--r--fs/notify/notification.c11
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/mft.c13
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/dcache.c11
-rw-r--r--fs/ocfs2/dlm/dlmfs.c1
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/file.c49
-rw-r--r--fs/ocfs2/ocfs2_lockid.h1
-rw-r--r--fs/ocfs2/quota_global.c10
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/open.c12
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/select.c1
-rw-r--r--fs/splice.c30
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c76
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/inode.c135
-rw-r--r--fs/sysfs/symlink.c2
-rw-r--r--fs/sysfs/sysfs.h12
-rw-r--r--fs/ubifs/budget.c16
-rw-r--r--fs/ubifs/super.c9
-rw-r--r--fs/udf/directory.c86
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c19
-rw-r--r--fs/udf/lowlevel.c4
-rw-r--r--fs/udf/namei.c1
-rw-r--r--fs/xattr.c55
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/xfs_iget.c113
175 files changed, 5236 insertions, 4079 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 332b5ff02fec..f7003cfac63d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -76,7 +76,7 @@ static const match_table_t tokens = {
* Return 0 upon success, -ERRNO upon failure.
*/
-static int v9fs_parse_options(struct v9fs_session_info *v9ses)
+static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
{
char *options;
substring_t args[MAX_OPT_ARGS];
@@ -90,10 +90,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses)
v9ses->debug = 0;
v9ses->cache = 0;
- if (!v9ses->options)
+ if (!opts)
return 0;
- options = kstrdup(v9ses->options, GFP_KERNEL);
+ options = kstrdup(opts, GFP_KERNEL);
if (!options) {
P9_DPRINTK(P9_DEBUG_ERROR,
"failed to allocate copy of option string\n");
@@ -206,24 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->uid = ~0;
v9ses->dfltuid = V9FS_DEFUID;
v9ses->dfltgid = V9FS_DEFGID;
- if (data) {
- v9ses->options = kstrdup(data, GFP_KERNEL);
- if (!v9ses->options) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "failed to allocate copy of option string\n");
- retval = -ENOMEM;
- goto error;
- }
- }
- rc = v9fs_parse_options(v9ses);
+ rc = v9fs_parse_options(v9ses, data);
if (rc < 0) {
retval = rc;
goto error;
}
- v9ses->clnt = p9_client_create(dev_name, v9ses->options);
-
+ v9ses->clnt = p9_client_create(dev_name, data);
if (IS_ERR(v9ses->clnt)) {
retval = PTR_ERR(v9ses->clnt);
v9ses->clnt = NULL;
@@ -280,7 +270,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
__putname(v9ses->uname);
__putname(v9ses->aname);
- kfree(v9ses->options);
}
/**
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a7d567192998..38762bf102a9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -85,7 +85,6 @@ struct v9fs_session_info {
unsigned int afid;
unsigned int cache;
- char *options; /* copy of mount options */
char *uname; /* user name to mount as */
char *aname; /* name of remote hierarchy being mounted */
unsigned int maxdata; /* max data for client interface */
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 81f8bbf12f9f..06a223d50a81 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended)
/**
* v9fs_blank_wstat - helper function to setup a 9P stat structure
- * @v9ses: 9P session info (for determining extended mode)
* @wstat: structure to initialize
*
*/
@@ -207,65 +206,72 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
struct inode *v9fs_get_inode(struct super_block *sb, int mode)
{
+ int err;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
inode = new_inode(sb);
- if (inode) {
- inode->i_mode = mode;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_blocks = 0;
- inode->i_rdev = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_mapping->a_ops = &v9fs_addr_operations;
-
- switch (mode & S_IFMT) {
- case S_IFIFO:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFSOCK:
- if (!v9fs_extended(v9ses)) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "special files without extended mode\n");
- return ERR_PTR(-EINVAL);
- }
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
- break;
- case S_IFREG:
- inode->i_op = &v9fs_file_inode_operations;
- inode->i_fop = &v9fs_file_operations;
- break;
- case S_IFLNK:
- if (!v9fs_extended(v9ses)) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "extended modes used w/o 9P2000.u\n");
- return ERR_PTR(-EINVAL);
- }
- inode->i_op = &v9fs_symlink_inode_operations;
- break;
- case S_IFDIR:
- inc_nlink(inode);
- if (v9fs_extended(v9ses))
- inode->i_op = &v9fs_dir_inode_operations_ext;
- else
- inode->i_op = &v9fs_dir_inode_operations;
- inode->i_fop = &v9fs_dir_operations;
- break;
- default:
- P9_DPRINTK(P9_DEBUG_ERROR,
- "BAD mode 0x%x S_IFMT 0x%x\n",
- mode, mode & S_IFMT);
- return ERR_PTR(-EINVAL);
- }
- } else {
+ if (!inode) {
P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
return ERR_PTR(-ENOMEM);
}
+
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_blocks = 0;
+ inode->i_rdev = 0;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mapping->a_ops = &v9fs_addr_operations;
+
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFSOCK:
+ if (!v9fs_extended(v9ses)) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "special files without extended mode\n");
+ err = -EINVAL;
+ goto error;
+ }
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ break;
+ case S_IFREG:
+ inode->i_op = &v9fs_file_inode_operations;
+ inode->i_fop = &v9fs_file_operations;
+ break;
+ case S_IFLNK:
+ if (!v9fs_extended(v9ses)) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "extended modes used w/o 9P2000.u\n");
+ err = -EINVAL;
+ goto error;
+ }
+ inode->i_op = &v9fs_symlink_inode_operations;
+ break;
+ case S_IFDIR:
+ inc_nlink(inode);
+ if (v9fs_extended(v9ses))
+ inode->i_op = &v9fs_dir_inode_operations_ext;
+ else
+ inode->i_op = &v9fs_dir_inode_operations;
+ inode->i_fop = &v9fs_dir_operations;
+ break;
+ default:
+ P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+ mode, mode & S_IFMT);
+ err = -EINVAL;
+ goto error;
+ }
+
return inode;
+
+error:
+ iput(inode);
+ return ERR_PTR(err);
}
/*
@@ -338,30 +344,25 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
ret = NULL;
st = p9_client_stat(fid);
- if (IS_ERR(st)) {
- err = PTR_ERR(st);
- st = NULL;
- goto error;
- }
+ if (IS_ERR(st))
+ return ERR_CAST(st);
umode = p9mode2unixmode(v9ses, st->mode);
ret = v9fs_get_inode(sb, umode);
if (IS_ERR(ret)) {
err = PTR_ERR(ret);
- ret = NULL;
goto error;
}
v9fs_stat2inode(st, ret, sb);
ret->i_ino = v9fs_qid2ino(&st->qid);
+ p9stat_free(st);
kfree(st);
return ret;
error:
+ p9stat_free(st);
kfree(st);
- if (ret)
- iput(ret);
-
return ERR_PTR(err);
}
@@ -403,9 +404,9 @@ v9fs_open_created(struct inode *inode, struct file *file)
* @v9ses: session information
* @dir: directory that dentry is being created in
* @dentry: dentry that is being created
+ * @extension: 9p2000.u extension string to support devices, etc.
* @perm: create permissions
* @mode: open mode
- * @extension: 9p2000.u extension string to support devices, etc.
*
*/
static struct p9_fid *
@@ -470,7 +471,10 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
- v9fs_fid_add(dentry, fid);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+
return ofid;
error:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 38d695d66a0b..8961f1a8f668 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -81,7 +81,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
static void
v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
- int flags)
+ int flags, void *data)
{
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
@@ -91,6 +91,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
MS_NOATIME;
+
+ save_mount_options(sb, data);
}
/**
@@ -113,14 +115,11 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
struct v9fs_session_info *v9ses = NULL;
struct p9_wstat *st = NULL;
int mode = S_IRWXUGO | S_ISVTX;
- uid_t uid = current_fsuid();
- gid_t gid = current_fsgid();
struct p9_fid *fid;
int retval = 0;
P9_DPRINTK(P9_DEBUG_VFS, " \n");
- st = NULL;
v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
if (!v9ses)
return -ENOMEM;
@@ -142,7 +141,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(sb);
goto free_stat;
}
- v9fs_fill_super(sb, v9ses, flags);
+ v9fs_fill_super(sb, v9ses, flags, data);
inode = v9fs_get_inode(sb, S_IFDIR | mode);
if (IS_ERR(inode)) {
@@ -150,9 +149,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
goto release_sb;
}
- inode->i_uid = uid;
- inode->i_gid = gid;
-
root = d_alloc_root(inode);
if (!root) {
iput(inode);
@@ -173,10 +169,8 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
simple_set_mnt(mnt, sb);
return 0;
-release_sb:
- deactivate_locked_super(sb);
-
free_stat:
+ p9stat_free(st);
kfree(st);
clunk_fid:
@@ -185,7 +179,12 @@ clunk_fid:
close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
+ return retval;
+release_sb:
+ p9stat_free(st);
+ kfree(st);
+ deactivate_locked_super(sb);
return retval;
}
@@ -207,24 +206,10 @@ static void v9fs_kill_super(struct super_block *s)
v9fs_session_close(v9ses);
kfree(v9ses);
+ s->s_fs_info = NULL;
P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
}
-/**
- * v9fs_show_options - Show mount options in /proc/mounts
- * @m: seq_file to write to
- * @mnt: mount descriptor
- *
- */
-
-static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
-{
- struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
-
- seq_printf(m, "%s", v9ses->options);
- return 0;
-}
-
static void
v9fs_umount_begin(struct super_block *sb)
{
@@ -237,7 +222,7 @@ v9fs_umount_begin(struct super_block *sb)
static const struct super_operations v9fs_super_ops = {
.statfs = simple_statfs,
.clear_inode = v9fs_clear_inode,
- .show_options = v9fs_show_options,
+ .show_options = generic_show_options,
.umount_begin = v9fs_umount_begin,
};
diff --git a/fs/Kconfig b/fs/Kconfig
index 0e7da7bb5d93..455aa207e67e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -43,6 +43,7 @@ source "fs/xfs/Kconfig"
source "fs/gfs2/Kconfig"
source "fs/ocfs2/Kconfig"
source "fs/btrfs/Kconfig"
+source "fs/nilfs2/Kconfig"
endif # BLOCK
@@ -186,7 +187,6 @@ source "fs/romfs/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
-source "fs/nilfs2/Kconfig"
endif # MISC_FILESYSTEMS
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 0149dab365e7..681c2a7b013f 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -134,9 +134,16 @@ static int afs_readpage(struct file *file, struct page *page)
inode = page->mapping->host;
- ASSERT(file != NULL);
- key = file->private_data;
- ASSERT(key != NULL);
+ if (file) {
+ key = file->private_data;
+ ASSERT(key != NULL);
+ } else {
+ key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_nokey;
+ }
+ }
_enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
@@ -207,12 +214,17 @@ static int afs_readpage(struct file *file, struct page *page)
unlock_page(page);
}
+ if (!file)
+ key_put(key);
_leave(" = 0");
return 0;
error:
SetPageError(page);
unlock_page(page);
+ if (!file)
+ key_put(key);
+error_nokey:
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index aa39ae83f019..3da18d453488 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -77,7 +77,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
}
/* Update the expiry counter if fs is busy */
- if (!may_umount_tree(mnt)) {
+ if (!may_umount_tree(path.mnt)) {
struct autofs_info *ino = autofs4_dentry_ino(top);
ino->last_used = jiffies;
goto done;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b7c1603cd4bd..7c1e65d54872 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
}
}
- /*
- * Now fill out the bss section. First pad the last page up
- * to the page boundary, and then perform a mmap to make sure
- * that there are zero-mapped pages up to and including the
- * last bss page.
- */
- if (padzero(elf_bss)) {
- error = -EFAULT;
- goto out_close;
- }
+ if (last_bss > elf_bss) {
+ /*
+ * Now fill out the bss section. First pad the last page up
+ * to the page boundary, and then perform a mmap to make sure
+ * that there are zero-mapped pages up to and including the
+ * last bss page.
+ */
+ if (padzero(elf_bss)) {
+ error = -EFAULT;
+ goto out_close;
+ }
- /* What we have mapped so far */
- elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
+ /* What we have mapped so far */
+ elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
- /* Map the last of the bss segment */
- if (last_bss > elf_bss) {
+ /* Map the last of the bss segment */
down_write(&current->mm->mmap_sem);
error = do_brk(elf_bss, last_bss - elf_bss);
up_write(&current->mm->mmap_sem);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 94dfda24c06e..3581a4e53942 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1405,6 +1405,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
}
/*
+ * Write data to the block device. Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+ *
+ * Does not take i_mutex for the write and thus is not for general purpose
+ * use.
+ */
+ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t ret;
+
+ BUG_ON(iocb->ki_pos != pos);
+
+ ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+ if (ret > 0 || ret == -EIOCBQUEUED) {
+ ssize_t err;
+
+ err = generic_write_sync(file, pos, ret);
+ if (err < 0 && ret > 0)
+ ret = err;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_aio_write);
+
+/*
* Try to release a page associated with block device when the system
* is under memory pressure.
*/
@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write_nolock,
+ .aio_write = blkdev_aio_write,
.mmap = generic_file_mmap,
.fsync = block_fsync,
.unlocked_ioctl = block_ioctl,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e4602c..15831d5c7367 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
{
int err;
+ bdi->name = "btrfs";
bdi->capabilities = BDI_CAP_MAP_COPY;
err = bdi_init(bdi);
if (err)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9f..535f85ba104f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1511,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
- blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+ blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
+ DISCARD_FL_BARRIER);
}
#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 272b9b2bea86..59cba180fe83 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3099,8 +3099,12 @@ static void inode_tree_add(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_inode *entry;
- struct rb_node **p = &root->inode_tree.rb_node;
- struct rb_node *parent = NULL;
+ struct rb_node **p;
+ struct rb_node *parent;
+
+again:
+ p = &root->inode_tree.rb_node;
+ parent = NULL;
spin_lock(&root->inode_lock);
while (*p) {
@@ -3108,13 +3112,16 @@ static void inode_tree_add(struct inode *inode)
entry = rb_entry(parent, struct btrfs_inode, rb_node);
if (inode->i_ino < entry->vfs_inode.i_ino)
- p = &(*p)->rb_left;
+ p = &parent->rb_left;
else if (inode->i_ino > entry->vfs_inode.i_ino)
- p = &(*p)->rb_right;
+ p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
(I_WILL_FREE | I_FREEING | I_CLEAR)));
- break;
+ rb_erase(parent, &root->inode_tree);
+ RB_CLEAR_NODE(parent);
+ spin_unlock(&root->inode_lock);
+ goto again;
}
}
rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
@@ -3126,12 +3133,12 @@ static void inode_tree_del(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ spin_lock(&root->inode_lock);
if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
- spin_lock(&root->inode_lock);
rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
- spin_unlock(&root->inode_lock);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
}
+ spin_unlock(&root->inode_lock);
}
static noinline void init_btrfs_i(struct inode *inode)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4af..5cf405b0828d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -260,7 +260,7 @@ loop_lock:
num_run++;
batch_run++;
- if (bio_sync(cur))
+ if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
num_sync_run++;
if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
- if (bio_sync(bio))
+ if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
pending_bios = &device->pending_sync_bios;
else
pending_bios = &device->pending_bios;
diff --git a/fs/buffer.c b/fs/buffer.c
index a3ef091a45bd..90a98865b0cc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -281,7 +281,7 @@ static void free_more_memory(void)
struct zone *zone;
int nid;
- wakeup_pdflush(1024);
+ wakeup_flusher_threads(1024);
yield();
for_each_online_node(nid) {
@@ -1165,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
if (!test_set_buffer_dirty(bh)) {
struct page *page = bh->b_page;
- if (!TestSetPageDirty(page))
- __set_page_dirty(page, page_mapping(page), 0);
+ if (!TestSetPageDirty(page)) {
+ struct address_space *mapping = page_mapping(page);
+ if (mapping)
+ __set_page_dirty(page, mapping, 0);
+ }
}
}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a173551e19d7..3cbc57f932d2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -31,6 +31,7 @@
* - no readahead or I/O queue unplugging required
*/
struct backing_dev_info directly_mappable_cdev_bdi = {
+ .name = "char",
.capabilities = (
#ifdef CONFIG_MMU
/* permit private copies of the data to be taken */
@@ -237,8 +238,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
}
/**
- * register_chrdev() - Register a major number for character devices.
+ * __register_chrdev() - create and register a cdev occupying a range of minors
* @major: major device number or 0 for dynamic allocation
+ * @baseminor: first of the requested range of minor numbers
+ * @count: the number of minor numbers required
* @name: name of this range of devices
* @fops: file operations associated with this devices
*
@@ -254,19 +257,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
* /dev. It only helps to keep track of the different owners of devices. If
* your module name has only one type of devices it's ok to use e.g. the name
* of the module here.
- *
- * This function registers a range of 256 minor numbers. The first minor number
- * is 0.
*/
-int register_chrdev(unsigned int major, const char *name,
- const struct file_operations *fops)
+int __register_chrdev(unsigned int major, unsigned int baseminor,
+ unsigned int count, const char *name,
+ const struct file_operations *fops)
{
struct char_device_struct *cd;
struct cdev *cdev;
char *s;
int err = -ENOMEM;
- cd = __register_chrdev_region(major, 0, 256, name);
+ cd = __register_chrdev_region(major, baseminor, count, name);
if (IS_ERR(cd))
return PTR_ERR(cd);
@@ -280,7 +281,7 @@ int register_chrdev(unsigned int major, const char *name,
for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
*s = '!';
- err = cdev_add(cdev, MKDEV(cd->major, 0), 256);
+ err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
if (err)
goto out;
@@ -290,7 +291,7 @@ int register_chrdev(unsigned int major, const char *name,
out:
kobject_put(&cdev->kobj);
out2:
- kfree(__unregister_chrdev_region(cd->major, 0, 256));
+ kfree(__unregister_chrdev_region(cd->major, baseminor, count));
return err;
}
@@ -316,10 +317,23 @@ void unregister_chrdev_region(dev_t from, unsigned count)
}
}
-void unregister_chrdev(unsigned int major, const char *name)
+/**
+ * __unregister_chrdev - unregister and destroy a cdev
+ * @major: major device number
+ * @baseminor: first of the range of minor numbers
+ * @count: the number of minor numbers this cdev is occupying
+ * @name: name of this range of devices
+ *
+ * Unregister and destroy the cdev occupying the region described by
+ * @major, @baseminor and @count. This function undoes what
+ * __register_chrdev() did.
+ */
+void __unregister_chrdev(unsigned int major, unsigned int baseminor,
+ unsigned int count, const char *name)
{
struct char_device_struct *cd;
- cd = __unregister_chrdev_region(major, 0, 256);
+
+ cd = __unregister_chrdev_region(major, baseminor, count);
if (cd && cd->cdev)
cdev_del(cd->cdev);
kfree(cd);
@@ -568,6 +582,6 @@ EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
EXPORT_SYMBOL(cdev_index);
-EXPORT_SYMBOL(register_chrdev);
-EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(__register_chrdev);
+EXPORT_SYMBOL(__unregister_chrdev);
EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index e85b1e4389e0..145540a316ab 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -3,7 +3,10 @@ Version 1.60
Fix memory leak in reconnect. Fix oops in DFS mount error path.
Set s_maxbytes to smaller (the max that vfs can handle) so that
sendfile will now work over cifs mounts again. Add noforcegid
-and noforceuid mount parameters.
+and noforceuid mount parameters. Fix small mem leak when using
+ntlmv2. Fix 2nd mount to same server but with different port to
+be allowed (rather than reusing the 1st port) - only when the
+user explicitly overrides the port on the 2nd mount.
Version 1.59
------------
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 051caecf7d67..8ec7736ce954 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -125,7 +125,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
if (server->addr.sockAddr.sin_family == AF_INET)
sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr);
else if (server->addr.sockAddr.sin_family == AF_INET6)
- sprintf(dp, "ip6=%pi6", &server->addr.sockAddr6.sin6_addr);
+ sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr);
else
goto out;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 6941c22398a6..7dfe0842a6f6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -607,7 +607,7 @@ static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
return get_cifs_acl_by_path(cifs_sb, path, pacllen);
pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
return pntsd;
}
@@ -665,7 +665,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
return rc;
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7c9809523f42..7efe1745494d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -373,6 +373,7 @@ calc_exit_2:
compare with the NTLM example */
hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
+ kfree(pctxt);
return rc;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84b75253b05a..3610e9958b4c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -361,13 +361,10 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
static int
cifs_show_options(struct seq_file *s, struct vfsmount *m)
{
- struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *tcon;
-
- cifs_sb = CIFS_SB(m->mnt_sb);
- tcon = cifs_sb->tcon;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
+ struct cifsTconInfo *tcon = cifs_sb->tcon;
- seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
+ seq_printf(s, ",unc=%s", tcon->treeName);
if (tcon->ses->userName)
seq_printf(s, ",username=%s", tcon->ses->userName);
if (tcon->ses->domainName)
@@ -989,19 +986,19 @@ static int cifs_oplock_thread(void *dummyarg)
if (try_to_freeze())
continue;
- spin_lock(&GlobalMid_Lock);
- if (list_empty(&GlobalOplock_Q)) {
- spin_unlock(&GlobalMid_Lock);
+ spin_lock(&cifs_oplock_lock);
+ if (list_empty(&cifs_oplock_list)) {
+ spin_unlock(&cifs_oplock_lock);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(39*HZ);
} else {
- oplock_item = list_entry(GlobalOplock_Q.next,
+ oplock_item = list_entry(cifs_oplock_list.next,
struct oplock_q_entry, qhead);
cFYI(1, ("found oplock item to write out"));
pTcon = oplock_item->tcon;
inode = oplock_item->pinode;
netfid = oplock_item->netfid;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_oplock_lock);
DeleteOplockQEntry(oplock_item);
/* can not grab inode sem here since it would
deadlock when oplock received on delete
@@ -1058,7 +1055,7 @@ init_cifs(void)
int rc = 0;
cifs_proc_init();
INIT_LIST_HEAD(&cifs_tcp_ses_list);
- INIT_LIST_HEAD(&GlobalOplock_Q);
+ INIT_LIST_HEAD(&cifs_oplock_list);
#ifdef CONFIG_CIFS_EXPERIMENTAL
INIT_LIST_HEAD(&GlobalDnotifyReqList);
INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
@@ -1087,6 +1084,7 @@ init_cifs(void)
rwlock_init(&GlobalSMBSeslock);
rwlock_init(&cifs_tcp_ses_lock);
spin_lock_init(&GlobalMid_Lock);
+ spin_lock_init(&cifs_oplock_lock);
if (cifs_max_pending < 2) {
cifs_max_pending = 2;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6c170948300d..094325e3f714 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.60"
+#define CIFS_VERSION "1.61"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6084d6379c03..6cfc81a32703 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -351,11 +351,24 @@ struct cifsFileInfo {
bool closePend:1; /* file is marked to close */
bool invalidHandle:1; /* file closed via session abend */
bool messageMode:1; /* for pipes: message vs byte mode */
- atomic_t wrtPending; /* handle in use - defer close */
+ atomic_t count; /* reference count */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
};
+/* Take a reference on the file private data */
+static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
+{
+ atomic_inc(&cifs_file->count);
+}
+
+/* Release a reference on the file private data */
+static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
+{
+ if (atomic_dec_and_test(&cifs_file->count))
+ kfree(cifs_file);
+}
+
/*
* One of these for each file inode
*/
@@ -656,7 +669,11 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock;
*/
GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;
-GLOBAL_EXTERN struct list_head GlobalOplock_Q;
+/* Global list of oplocks */
+GLOBAL_EXTERN struct list_head cifs_oplock_list;
+
+/* Protects the cifs_oplock_list */
+GLOBAL_EXTERN spinlock_t cifs_oplock_lock;
/* Outstanding dir notify requests */
GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1866bc2927d4..301e307e1279 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -100,110 +100,138 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
to this tcon */
}
-/* Allocate and return pointer to an SMB request buffer, and set basic
- SMB information in the SMB header. If the return code is zero, this
- function must have filled in request_buf pointer */
+/* reconnect the socket, tcon, and smb session if needed */
static int
-small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
- void **request_buf)
+cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
{
int rc = 0;
+ struct cifsSesInfo *ses;
+ struct TCP_Server_Info *server;
+ struct nls_table *nls_codepage;
- /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so
- check for tcp and smb session status done differently
- for those three - in the calling routine */
- if (tcon) {
- if (tcon->tidStatus == CifsExiting) {
- /* only tree disconnect, open, and write,
- (and ulogoff which does not have tcon)
- are allowed as we start force umount */
- if ((smb_command != SMB_COM_WRITE_ANDX) &&
- (smb_command != SMB_COM_OPEN_ANDX) &&
- (smb_command != SMB_COM_TREE_DISCONNECT)) {
- cFYI(1, ("can not send cmd %d while umounting",
- smb_command));
- return -ENODEV;
- }
+ /*
+ * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
+ * tcp and smb session status done differently for those three - in the
+ * calling routine
+ */
+ if (!tcon)
+ return 0;
+
+ ses = tcon->ses;
+ server = ses->server;
+
+ /*
+ * only tree disconnect, open, and write, (and ulogoff which does not
+ * have tcon) are allowed as we start force umount
+ */
+ if (tcon->tidStatus == CifsExiting) {
+ if (smb_command != SMB_COM_WRITE_ANDX &&
+ smb_command != SMB_COM_OPEN_ANDX &&
+ smb_command != SMB_COM_TREE_DISCONNECT) {
+ cFYI(1, ("can not send cmd %d while umounting",
+ smb_command));
+ return -ENODEV;
}
- if ((tcon->ses) && (tcon->ses->status != CifsExiting) &&
- (tcon->ses->server)) {
- struct nls_table *nls_codepage;
- /* Give Demultiplex thread up to 10 seconds to
- reconnect, should be greater than cifs socket
- timeout which is 7 seconds */
- while (tcon->ses->server->tcpStatus ==
- CifsNeedReconnect) {
- wait_event_interruptible_timeout(tcon->ses->server->response_q,
- (tcon->ses->server->tcpStatus ==
- CifsGood), 10 * HZ);
- if (tcon->ses->server->tcpStatus ==
- CifsNeedReconnect) {
- /* on "soft" mounts we wait once */
- if (!tcon->retry ||
- (tcon->ses->status == CifsExiting)) {
- cFYI(1, ("gave up waiting on "
- "reconnect in smb_init"));
- return -EHOSTDOWN;
- } /* else "hard" mount - keep retrying
- until process is killed or server
- comes back on-line */
- } else /* TCP session is reestablished now */
- break;
- }
+ }
- nls_codepage = load_nls_default();
- /* need to prevent multiple threads trying to
- simultaneously reconnect the same SMB session */
- down(&tcon->ses->sesSem);
- if (tcon->ses->need_reconnect)
- rc = cifs_setup_session(0, tcon->ses,
- nls_codepage);
- if (!rc && (tcon->need_reconnect)) {
- mark_open_files_invalid(tcon);
- rc = CIFSTCon(0, tcon->ses, tcon->treeName,
- tcon, nls_codepage);
- up(&tcon->ses->sesSem);
- /* BB FIXME add code to check if wsize needs
- update due to negotiated smb buffer size
- shrinking */
- if (rc == 0) {
- atomic_inc(&tconInfoReconnectCount);
- /* tell server Unix caps we support */
- if (tcon->ses->capabilities & CAP_UNIX)
- reset_cifs_unix_caps(
- 0 /* no xid */,
- tcon,
- NULL /* we do not know sb */,
- NULL /* no vol info */);
- }
+ if (ses->status == CifsExiting)
+ return -EIO;
- cFYI(1, ("reconnect tcon rc = %d", rc));
- /* Removed call to reopen open files here.
- It is safer (and faster) to reopen files
- one at a time as needed in read and write */
-
- /* Check if handle based operation so we
- know whether we can continue or not without
- returning to caller to reset file handle */
- switch (smb_command) {
- case SMB_COM_READ_ANDX:
- case SMB_COM_WRITE_ANDX:
- case SMB_COM_CLOSE:
- case SMB_COM_FIND_CLOSE2:
- case SMB_COM_LOCKING_ANDX: {
- unload_nls(nls_codepage);
- return -EAGAIN;
- }
- }
- } else {
- up(&tcon->ses->sesSem);
- }
- unload_nls(nls_codepage);
+ /*
+ * Give demultiplex thread up to 10 seconds to reconnect, should be
+ * greater than cifs socket timeout which is 7 seconds
+ */
+ while (server->tcpStatus == CifsNeedReconnect) {
+ wait_event_interruptible_timeout(server->response_q,
+ (server->tcpStatus == CifsGood), 10 * HZ);
- } else {
- return -EIO;
+ /* is TCP session is reestablished now ?*/
+ if (server->tcpStatus != CifsNeedReconnect)
+ break;
+
+ /*
+ * on "soft" mounts we wait once. Hard mounts keep
+ * retrying until process is killed or server comes
+ * back on-line
+ */
+ if (!tcon->retry || ses->status == CifsExiting) {
+ cFYI(1, ("gave up waiting on reconnect in smb_init"));
+ return -EHOSTDOWN;
}
}
+
+ if (!ses->need_reconnect && !tcon->need_reconnect)
+ return 0;
+
+ nls_codepage = load_nls_default();
+
+ /*
+ * need to prevent multiple threads trying to simultaneously
+ * reconnect the same SMB session
+ */
+ down(&ses->sesSem);
+ if (ses->need_reconnect)
+ rc = cifs_setup_session(0, ses, nls_codepage);
+
+ /* do we need to reconnect tcon? */
+ if (rc || !tcon->need_reconnect) {
+ up(&ses->sesSem);
+ goto out;
+ }
+
+ mark_open_files_invalid(tcon);
+ rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
+ up(&ses->sesSem);
+ cFYI(1, ("reconnect tcon rc = %d", rc));
+
+ if (rc)
+ goto out;
+
+ /*
+ * FIXME: check if wsize needs updated due to negotiated smb buffer
+ * size shrinking
+ */
+ atomic_inc(&tconInfoReconnectCount);
+
+ /* tell server Unix caps we support */
+ if (ses->capabilities & CAP_UNIX)
+ reset_cifs_unix_caps(0, tcon, NULL, NULL);
+
+ /*
+ * Removed call to reopen open files here. It is safer (and faster) to
+ * reopen files one at a time as needed in read and write.
+ *
+ * FIXME: what about file locks? don't we need to reclaim them ASAP?
+ */
+
+out:
+ /*
+ * Check if handle based operation so we know whether we can continue
+ * or not without returning to caller to reset file handle
+ */
+ switch (smb_command) {
+ case SMB_COM_READ_ANDX:
+ case SMB_COM_WRITE_ANDX:
+ case SMB_COM_CLOSE:
+ case SMB_COM_FIND_CLOSE2:
+ case SMB_COM_LOCKING_ANDX:
+ rc = -EAGAIN;
+ }
+
+ unload_nls(nls_codepage);
+ return rc;
+}
+
+/* Allocate and return pointer to an SMB request buffer, and set basic
+ SMB information in the SMB header. If the return code is zero, this
+ function must have filled in request_buf pointer */
+static int
+small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
+ void **request_buf)
+{
+ int rc = 0;
+
+ rc = cifs_reconnect_tcon(tcon, smb_command);
if (rc)
return rc;
@@ -256,101 +284,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
{
int rc = 0;
- /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so
- check for tcp and smb session status done differently
- for those three - in the calling routine */
- if (tcon) {
- if (tcon->tidStatus == CifsExiting) {
- /* only tree disconnect, open, and write,
- (and ulogoff which does not have tcon)
- are allowed as we start force umount */
- if ((smb_command != SMB_COM_WRITE_ANDX) &&
- (smb_command != SMB_COM_OPEN_ANDX) &&
- (smb_command != SMB_COM_TREE_DISCONNECT)) {
- cFYI(1, ("can not send cmd %d while umounting",
- smb_command));
- return -ENODEV;
- }
- }
-
- if ((tcon->ses) && (tcon->ses->status != CifsExiting) &&
- (tcon->ses->server)) {
- struct nls_table *nls_codepage;
- /* Give Demultiplex thread up to 10 seconds to
- reconnect, should be greater than cifs socket
- timeout which is 7 seconds */
- while (tcon->ses->server->tcpStatus ==
- CifsNeedReconnect) {
- wait_event_interruptible_timeout(tcon->ses->server->response_q,
- (tcon->ses->server->tcpStatus ==
- CifsGood), 10 * HZ);
- if (tcon->ses->server->tcpStatus ==
- CifsNeedReconnect) {
- /* on "soft" mounts we wait once */
- if (!tcon->retry ||
- (tcon->ses->status == CifsExiting)) {
- cFYI(1, ("gave up waiting on "
- "reconnect in smb_init"));
- return -EHOSTDOWN;
- } /* else "hard" mount - keep retrying
- until process is killed or server
- comes on-line */
- } else /* TCP session is reestablished now */
- break;
- }
- nls_codepage = load_nls_default();
- /* need to prevent multiple threads trying to
- simultaneously reconnect the same SMB session */
- down(&tcon->ses->sesSem);
- if (tcon->ses->need_reconnect)
- rc = cifs_setup_session(0, tcon->ses,
- nls_codepage);
- if (!rc && (tcon->need_reconnect)) {
- mark_open_files_invalid(tcon);
- rc = CIFSTCon(0, tcon->ses, tcon->treeName,
- tcon, nls_codepage);
- up(&tcon->ses->sesSem);
- /* BB FIXME add code to check if wsize needs
- update due to negotiated smb buffer size
- shrinking */
- if (rc == 0) {
- atomic_inc(&tconInfoReconnectCount);
- /* tell server Unix caps we support */
- if (tcon->ses->capabilities & CAP_UNIX)
- reset_cifs_unix_caps(
- 0 /* no xid */,
- tcon,
- NULL /* do not know sb */,
- NULL /* no vol info */);
- }
-
- cFYI(1, ("reconnect tcon rc = %d", rc));
- /* Removed call to reopen open files here.
- It is safer (and faster) to reopen files
- one at a time as needed in read and write */
-
- /* Check if handle based operation so we
- know whether we can continue or not without
- returning to caller to reset file handle */
- switch (smb_command) {
- case SMB_COM_READ_ANDX:
- case SMB_COM_WRITE_ANDX:
- case SMB_COM_CLOSE:
- case SMB_COM_FIND_CLOSE2:
- case SMB_COM_LOCKING_ANDX: {
- unload_nls(nls_codepage);
- return -EAGAIN;
- }
- }
- } else {
- up(&tcon->ses->sesSem);
- }
- unload_nls(nls_codepage);
-
- } else {
- return -EIO;
- }
- }
+ rc = cifs_reconnect_tcon(tcon, smb_command);
if (rc)
return rc;
@@ -3961,6 +3895,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
if (is_unicode) {
__le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
GFP_KERNEL);
+ if (tmp == NULL) {
+ rc = -ENOMEM;
+ goto parse_DFS_referrals_exit;
+ }
cifsConvertToUCS((__le16 *) tmp, searchName,
PATH_MAX, nls_codepage, remap);
node->path_consumed = cifs_ucs2_bytes(tmp,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1f3345d7fa79..d49682433c20 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1377,7 +1377,7 @@ cifs_parse_mount_options(char *options, const char *devname,
}
static struct TCP_Server_Info *
-cifs_find_tcp_session(struct sockaddr_storage *addr)
+cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
{
struct list_head *tmp;
struct TCP_Server_Info *server;
@@ -1397,16 +1397,37 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
if (server->tcpStatus == CifsNew)
continue;
- if (addr->ss_family == AF_INET &&
- (addr4->sin_addr.s_addr !=
- server->addr.sockAddr.sin_addr.s_addr))
- continue;
- else if (addr->ss_family == AF_INET6 &&
- (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
- &addr6->sin6_addr) ||
- server->addr.sockAddr6.sin6_scope_id !=
- addr6->sin6_scope_id))
- continue;
+ switch (addr->ss_family) {
+ case AF_INET:
+ if (addr4->sin_addr.s_addr ==
+ server->addr.sockAddr.sin_addr.s_addr) {
+ addr4->sin_port = htons(port);
+ /* user overrode default port? */
+ if (addr4->sin_port) {
+ if (addr4->sin_port !=
+ server->addr.sockAddr.sin_port)
+ continue;
+ }
+ break;
+ } else
+ continue;
+
+ case AF_INET6:
+ if (ipv6_addr_equal(&addr6->sin6_addr,
+ &server->addr.sockAddr6.sin6_addr) &&
+ (addr6->sin6_scope_id ==
+ server->addr.sockAddr6.sin6_scope_id)) {
+ addr6->sin6_port = htons(port);
+ /* user overrode default port? */
+ if (addr6->sin6_port) {
+ if (addr6->sin6_port !=
+ server->addr.sockAddr6.sin6_port)
+ continue;
+ }
+ break;
+ } else
+ continue;
+ }
++server->srv_count;
write_unlock(&cifs_tcp_ses_lock);
@@ -1475,7 +1496,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
}
/* see if we already have a matching tcp_ses */
- tcp_ses = cifs_find_tcp_session(&addr);
+ tcp_ses = cifs_find_tcp_session(&addr, volume_info->port);
if (tcp_ses)
return tcp_ses;
@@ -2636,9 +2657,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
return -EIO;
smb_buffer = cifs_buf_get();
- if (smb_buffer == NULL) {
+ if (smb_buffer == NULL)
return -ENOMEM;
- }
+
smb_buffer_response = smb_buffer;
header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4326ffd90fa9..a6424cfc0121 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -153,7 +153,7 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
mutex_init(&pCifsFile->fh_mutex);
mutex_init(&pCifsFile->lock_mutex);
INIT_LIST_HEAD(&pCifsFile->llist);
- atomic_set(&pCifsFile->wrtPending, 0);
+ atomic_set(&pCifsFile->count, 1);
/* set the following in open now
pCifsFile->pfile = file; */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c34b7f8a217b..fa7beac8b80e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -53,11 +53,9 @@ static inline struct cifsFileInfo *cifs_init_private(
private_data->pInode = inode;
private_data->invalidHandle = false;
private_data->closePend = false;
- /* we have to track num writers to the inode, since writepages
- does not tell us which handle the write is for so there can
- be a close (overlapping with write) of the filehandle that
- cifs_writepages chose to use */
- atomic_set(&private_data->wrtPending, 0);
+ /* Initialize reference count to one. The private data is
+ freed on the release of the last reference */
+ atomic_set(&private_data->count, 1);
return private_data;
}
@@ -643,7 +641,7 @@ int cifs_close(struct inode *inode, struct file *file)
if (!pTcon->need_reconnect) {
write_unlock(&GlobalSMBSeslock);
timeout = 2;
- while ((atomic_read(&pSMBFile->wrtPending) != 0)
+ while ((atomic_read(&pSMBFile->count) != 1)
&& (timeout <= 2048)) {
/* Give write a better chance to get to
server ahead of the close. We do not
@@ -657,8 +655,6 @@ int cifs_close(struct inode *inode, struct file *file)
msleep(timeout);
timeout *= 4;
}
- if (atomic_read(&pSMBFile->wrtPending))
- cERROR(1, ("close with pending write"));
if (!pTcon->need_reconnect &&
!pSMBFile->invalidHandle)
rc = CIFSSMBClose(xid, pTcon,
@@ -681,24 +677,7 @@ int cifs_close(struct inode *inode, struct file *file)
list_del(&pSMBFile->flist);
list_del(&pSMBFile->tlist);
write_unlock(&GlobalSMBSeslock);
- timeout = 10;
- /* We waited above to give the SMBWrite a chance to issue
- on the wire (so we do not get SMBWrite returning EBADF
- if writepages is racing with close. Note that writepages
- does not specify a file handle, so it is possible for a file
- to be opened twice, and the application close the "wrong"
- file handle - in these cases we delay long enough to allow
- the SMBWrite to get on the wire before the SMB Close.
- We allow total wait here over 45 seconds, more than
- oplock break time, and more than enough to allow any write
- to complete on the server, or to time out on the client */
- while ((atomic_read(&pSMBFile->wrtPending) != 0)
- && (timeout <= 50000)) {
- cERROR(1, ("writes pending, delay free of handle"));
- msleep(timeout);
- timeout *= 8;
- }
- kfree(file->private_data);
+ cifsFileInfo_put(file->private_data);
file->private_data = NULL;
} else
rc = -EBADF;
@@ -1236,7 +1215,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
if (!open_file->invalidHandle) {
/* found a good file */
/* lock it so it will not be closed on us */
- atomic_inc(&open_file->wrtPending);
+ cifsFileInfo_get(open_file);
read_unlock(&GlobalSMBSeslock);
return open_file;
} /* else might as well continue, and look for
@@ -1276,7 +1255,7 @@ refind_writable:
if (open_file->pfile &&
((open_file->pfile->f_flags & O_RDWR) ||
(open_file->pfile->f_flags & O_WRONLY))) {
- atomic_inc(&open_file->wrtPending);
+ cifsFileInfo_get(open_file);
if (!open_file->invalidHandle) {
/* found a good writable file */
@@ -1293,7 +1272,7 @@ refind_writable:
else { /* start over in case this was deleted */
/* since the list could be modified */
read_lock(&GlobalSMBSeslock);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
goto refind_writable;
}
}
@@ -1309,7 +1288,7 @@ refind_writable:
read_lock(&GlobalSMBSeslock);
/* can not use this handle, no write
pending on this one after all */
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
if (open_file->closePend) /* list could have changed */
goto refind_writable;
@@ -1373,7 +1352,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
if (open_file) {
bytes_written = cifs_write(open_file->pfile, write_data,
to-from, &offset);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
/* Does mm or vfs already set times? */
inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
if ((bytes_written > 0) && (offset))
@@ -1562,7 +1541,7 @@ retry:
bytes_to_write, offset,
&bytes_written, iov, n_iov,
long_op);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
cifs_update_eof(cifsi, offset, bytes_written);
if (rc || bytes_written < bytes_to_write) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 82d83839655e..1f09c7619319 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -800,7 +800,7 @@ set_via_filehandle:
if (open_file == NULL)
CIFSSMBClose(xid, pTcon, netfid);
else
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
out:
return rc;
}
@@ -1635,7 +1635,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
__u32 npid = open_file->pid;
rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
npid, false);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
cFYI(1, ("SetFSize for attrs rc = %d", rc));
if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
unsigned int bytes_written;
@@ -1790,7 +1790,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
u16 nfid = open_file->netfid;
u32 npid = open_file->pid;
rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
- atomic_dec(&open_file->wrtPending);
+ cifsFileInfo_put(open_file);
} else {
rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
cifs_sb->local_nls,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0ad3e2d116a6..1da4ab250eae 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,20 +119,19 @@ AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon)
temp->pinode = pinode;
temp->tcon = tcon;
temp->netfid = fid;
- spin_lock(&GlobalMid_Lock);
- list_add_tail(&temp->qhead, &GlobalOplock_Q);
- spin_unlock(&GlobalMid_Lock);
+ spin_lock(&cifs_oplock_lock);
+ list_add_tail(&temp->qhead, &cifs_oplock_list);
+ spin_unlock(&cifs_oplock_lock);
}
return temp;
-
}
void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry)
{
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_oplock_lock);
/* should we check if list empty first? */
list_del(&oplockEntry->qhead);
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_oplock_lock);
kmem_cache_free(cifs_oplock_cachep, oplockEntry);
}
@@ -144,14 +143,14 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
if (tcon == NULL)
return;
- spin_lock(&GlobalMid_Lock);
- list_for_each_entry(temp, &GlobalOplock_Q, qhead) {
+ spin_lock(&cifs_oplock_lock);
+ list_for_each_entry(temp, &cifs_oplock_list, qhead) {
if ((temp->tcon) && (temp->tcon == tcon)) {
list_del(&temp->qhead);
kmem_cache_free(cifs_oplock_cachep, temp);
}
}
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_oplock_lock);
}
static int
diff --git a/fs/compat.c b/fs/compat.c
index 94502dab972a..6d6f98fe64a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename,
if (!bprm)
goto out_files;
- retval = -ERESTARTNOINTR;
- if (mutex_lock_interruptible(&current->cred_guard_mutex))
+ retval = prepare_bprm_creds(bprm);
+ if (retval)
goto out_free;
- current->in_execve = 1;
-
- retval = -ENOMEM;
- bprm->cred = prepare_exec_creds();
- if (!bprm->cred)
- goto out_unlock;
retval = check_unsafe_exec(bprm);
if (retval < 0)
- goto out_unlock;
+ goto out_free;
clear_in_exec = retval;
+ current->in_execve = 1;
file = open_exec(filename);
retval = PTR_ERR(file);
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename,
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
- mutex_unlock(&current->cred_guard_mutex);
acct_update_integrals(current);
free_bprm(bprm);
if (displaced)
@@ -1567,10 +1561,7 @@ out_file:
out_unmark:
if (clear_in_exec)
current->fs->in_exec = 0;
-
-out_unlock:
current->in_execve = 0;
- mutex_unlock(&current->cred_guard_mutex);
out_free:
free_bprm(bprm);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4921e7426d95..a2f746066c5d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = {
};
static struct backing_dev_info configfs_backing_dev_info = {
+ .name = "configfs",
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
diff --git a/fs/dcache.c b/fs/dcache.c
index 9e5cd3c3a6ba..a100fa35a48f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
#include <linux/swap.h>
#include <linux/bootmem.h>
#include <linux/fs_struct.h>
+#include <linux/hardirq.h>
#include "internal.h"
int sysctl_vfs_cache_pressure __read_mostly = 100;
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ccc9d62c462d..55ea369f43a9 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -63,7 +63,7 @@ static int send_data(struct sk_buff *skb)
return rv;
}
- return genlmsg_unicast(skb, listener_nlpid);
+ return genlmsg_unicast(&init_net, skb, listener_nlpid);
}
static int user_cmd(struct sk_buff *skb, struct genl_info *info)
diff --git a/fs/exec.c b/fs/exec.c
index 4a8849e45b21..172ceb6edde4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,8 +678,8 @@ exit:
}
EXPORT_SYMBOL(open_exec);
-int kernel_read(struct file *file, unsigned long offset,
- char *addr, unsigned long count)
+int kernel_read(struct file *file, loff_t offset,
+ char *addr, unsigned long count)
{
mm_segment_t old_fs;
loff_t pos = offset;
@@ -1016,6 +1016,35 @@ out:
EXPORT_SYMBOL(flush_old_exec);
/*
+ * Prepare credentials and lock ->cred_guard_mutex.
+ * install_exec_creds() commits the new creds and drops the lock.
+ * Or, if exec fails before, free_bprm() should release ->cred and
+ * and unlock.
+ */
+int prepare_bprm_creds(struct linux_binprm *bprm)
+{
+ if (mutex_lock_interruptible(&current->cred_guard_mutex))
+ return -ERESTARTNOINTR;
+
+ bprm->cred = prepare_exec_creds();
+ if (likely(bprm->cred))
+ return 0;
+
+ mutex_unlock(&current->cred_guard_mutex);
+ return -ENOMEM;
+}
+
+void free_bprm(struct linux_binprm *bprm)
+{
+ free_arg_pages(bprm);
+ if (bprm->cred) {
+ mutex_unlock(&current->cred_guard_mutex);
+ abort_creds(bprm->cred);
+ }
+ kfree(bprm);
+}
+
+/*
* install the new credentials for this executable
*/
void install_exec_creds(struct linux_binprm *bprm)
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm)
commit_creds(bprm->cred);
bprm->cred = NULL;
-
- /* cred_guard_mutex must be held at least to this point to prevent
+ /*
+ * cred_guard_mutex must be held at least to this point to prevent
* ptrace_attach() from altering our determination of the task's
- * credentials; any time after this it may be unlocked */
-
+ * credentials; any time after this it may be unlocked.
+ */
security_bprm_committed_creds(bprm);
+ mutex_unlock(&current->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
EXPORT_SYMBOL(search_binary_handler);
-void free_bprm(struct linux_binprm *bprm)
-{
- free_arg_pages(bprm);
- if (bprm->cred)
- abort_creds(bprm->cred);
- kfree(bprm);
-}
-
/*
* sys_execve() executes a new program.
*/
@@ -1277,20 +1299,15 @@ int do_execve(char * filename,
if (!bprm)
goto out_files;
- retval = -ERESTARTNOINTR;
- if (mutex_lock_interruptible(&current->cred_guard_mutex))
+ retval = prepare_bprm_creds(bprm);
+ if (retval)
goto out_free;
- current->in_execve = 1;
-
- retval = -ENOMEM;
- bprm->cred = prepare_exec_creds();
- if (!bprm->cred)
- goto out_unlock;
retval = check_unsafe_exec(bprm);
if (retval < 0)
- goto out_unlock;
+ goto out_free;
clear_in_exec = retval;
+ current->in_execve = 1;
file = open_exec(filename);
retval = PTR_ERR(file);
@@ -1340,7 +1357,6 @@ int do_execve(char * filename,
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
- mutex_unlock(&current->cred_guard_mutex);
acct_update_integrals(current);
free_bprm(bprm);
if (displaced)
@@ -1360,10 +1376,7 @@ out_file:
out_unmark:
if (clear_in_exec)
current->fs->in_exec = 0;
-
-out_unlock:
current->in_execve = 0;
- mutex_unlock(&current->cred_guard_mutex);
out_free:
free_bprm(bprm);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d636e1297cad..a63d44256a70 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -230,7 +230,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
}
-static int
+int
ext2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
@@ -246,12 +246,6 @@ ext2_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int
-ext2_permission(struct inode *inode, int mask)
-{
- return generic_permission(inode, mask, ext2_check_acl);
-}
-
/*
* Initialize the ACLs of a new inode. Called from ext2_new_inode.
*
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index ecefe478898f..3ff6cbb9ac44 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size)
#ifdef CONFIG_EXT2_FS_POSIX_ACL
/* acl.c */
-extern int ext2_permission (struct inode *, int);
+extern int ext2_check_acl (struct inode *, int);
extern int ext2_acl_chmod (struct inode *);
extern int ext2_init_acl (struct inode *, struct inode *);
#else
#include <linux/sched.h>
-#define ext2_permission NULL
+#define ext2_check_acl NULL
#define ext2_get_acl NULL
#define ext2_set_acl NULL
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b9e47dc9222..a2f3afd1a1c1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -85,6 +85,6 @@ const struct inode_operations ext2_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
- .permission = ext2_permission,
+ .check_acl = ext2_check_acl,
.fiemap = ext2_fiemap,
};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e27130341d4f..1c1638f873a4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
unlock_buffer(bh);
mark_buffer_dirty_inode(bh, inode);
/* We used to sync bh here if IS_SYNC(inode).
- * But we now rely upon generic_osync_inode()
+ * But we now rely upon generic_write_sync()
* and b_inode_buffers. But not for directories.
*/
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e1dedb0f7873..23701f289e98 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
if (dir_de) {
if (old_dir != new_dir)
ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
+ else {
+ kunmap(dir_page);
+ page_cache_release(dir_page);
+ }
inode_dec_link_count(old_dir);
}
return 0;
@@ -396,7 +400,7 @@ const struct inode_operations ext2_dir_inode_operations = {
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
- .permission = ext2_permission,
+ .check_acl = ext2_check_acl,
};
const struct inode_operations ext2_special_inode_operations = {
@@ -407,5 +411,5 @@ const struct inode_operations ext2_special_inode_operations = {
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
- .permission = ext2_permission,
+ .check_acl = ext2_check_acl,
};
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
index fb3c1a21b135..522b15498f45 100644
--- a/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@ -29,23 +29,25 @@ config EXT3_FS
module will be called ext3.
config EXT3_DEFAULTS_TO_ORDERED
- bool "Default to 'data=ordered' in ext3 (legacy option)"
+ bool "Default to 'data=ordered' in ext3"
depends on EXT3_FS
help
- If a filesystem does not explicitly specify a data ordering
- mode, and the journal capability allowed it, ext3 used to
- historically default to 'data=ordered'.
-
- That was a rather unfortunate choice, because it leads to all
- kinds of latency problems, and the 'data=writeback' mode is more
- appropriate these days.
-
- You should probably always answer 'n' here, and if you really
- want to use 'data=ordered' mode, set it in the filesystem itself
- with 'tune2fs -o journal_data_ordered'.
-
- But if you really want to enable the legacy default, you can do
- so by answering 'y' to this question.
+ The journal mode options for ext3 have different tradeoffs
+ between when data is guaranteed to be on disk and
+ performance. The use of "data=writeback" can cause
+ unwritten data to appear in files after an system crash or
+ power failure, which can be a security issue. However,
+ "data=ordered" mode can also result in major performance
+ problems, including seconds-long delays before an fsync()
+ call returns. For details, see:
+
+ http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs
+
+ If you have been historically happy with ext3's performance,
+ data=ordered mode will be a safe choice and you should
+ answer 'y' here. If you understand the reliability and data
+ privacy issues of data=writeback and are willing to make
+ that trade off, answer 'n'.
config EXT3_FS_XATTR
bool "Ext3 extended attributes"
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e167bae37ef0..c9b0df376b5f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
}
-static int
+int
ext3_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
@@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int
-ext3_permission(struct inode *inode, int mask)
-{
- return generic_permission(inode, mask, ext3_check_acl);
-}
-
/*
* Initialize the ACLs of a new inode. Called from ext3_new_inode.
*
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 07d15a3a5969..597334626de9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
#ifdef CONFIG_EXT3_FS_POSIX_ACL
/* acl.c */
-extern int ext3_permission (struct inode *, int);
+extern int ext3_check_acl (struct inode *, int);
extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT3_FS_POSIX_ACL */
#include <linux/sched.h>
-#define ext3_permission NULL
+#define ext3_check_acl NULL
static inline int
ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5b49704b231b..388bbdfa0b4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
return 0;
}
-static ssize_t
-ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_path.dentry->d_inode;
- ssize_t ret;
- int err;
-
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-
- /*
- * Skip flushing if there was an error, or if nothing was written.
- */
- if (ret <= 0)
- return ret;
-
- /*
- * If the inode is IS_SYNC, or is O_SYNC and we are doing data
- * journalling then we need to make sure that we force the transaction
- * to disk to keep all metadata uptodate synchronously.
- */
- if (file->f_flags & O_SYNC) {
- /*
- * If we are non-data-journaled, then the dirty data has
- * already been flushed to backing store by generic_osync_inode,
- * and the inode has been flushed too if there have been any
- * modifications other than mere timestamp updates.
- *
- * Open question --- do we care about flushing timestamps too
- * if the inode is IS_SYNC?
- */
- if (!ext3_should_journal_data(inode))
- return ret;
-
- goto force_commit;
- }
-
- /*
- * So we know that there has been no forced data flush. If the inode
- * is marked IS_SYNC, we need to force one ourselves.
- */
- if (!IS_SYNC(inode))
- return ret;
-
- /*
- * Open question #2 --- should we force data to disk here too? If we
- * don't, the only impact is that data=writeback filesystems won't
- * flush data to disk automatically on IS_SYNC, only metadata (but
- * historically, that is what ext2 has done.)
- */
-
-force_commit:
- err = ext3_force_commit(inode->i_sb);
- if (err)
- return err;
- return ret;
-}
-
const struct file_operations ext3_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
- .aio_write = ext3_file_write,
+ .aio_write = generic_file_aio_write,
.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl,
@@ -137,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext3_permission,
+ .check_acl = ext3_check_acl,
.fiemap = ext3_fiemap,
};
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6ff7b9730234..aad6400c9b77 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext3_permission,
+ .check_acl = ext3_check_acl,
};
const struct inode_operations ext3_special_inode_operations = {
@@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext3_permission,
+ .check_acl = ext3_check_acl,
};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 524b349c6299..a8d80a7f1105 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
#endif
}
+static char *data_mode_string(unsigned long mode)
+{
+ switch (mode) {
+ case EXT3_MOUNT_JOURNAL_DATA:
+ return "journal";
+ case EXT3_MOUNT_ORDERED_DATA:
+ return "ordered";
+ case EXT3_MOUNT_WRITEBACK_DATA:
+ return "writeback";
+ }
+ return "unknown";
+}
+
/*
* Show an option if
* - it's set to a non-default value OR
@@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh");
- if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
- seq_puts(seq, ",data=journal");
- else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
- seq_puts(seq, ",data=ordered");
- else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
- seq_puts(seq, ",data=writeback");
-
+ seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt &
+ EXT3_MOUNT_DATA_FLAGS));
if (test_opt(sb, DATA_ERR_ABORT))
seq_puts(seq, ",data_err=abort");
@@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb,
datacheck:
if (is_remount) {
if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
- != data_opt) {
- printk(KERN_ERR
- "EXT3-fs: cannot change data "
- "mode on remount\n");
- return 0;
- }
+ == data_opt)
+ break;
+ printk(KERN_ERR
+ "EXT3-fs (device %s): Cannot change "
+ "data mode on remount. The filesystem "
+ "is mounted in data=%s mode and you "
+ "try to remount it in data=%s mode.\n",
+ sb->s_id,
+ data_mode_string(sbi->s_mount_opt &
+ EXT3_MOUNT_DATA_FLAGS),
+ data_mode_string(data_opt));
+ return 0;
} else {
sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
sbi->s_mount_opt |= data_opt;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index f6d8967149ca..0df88b2a69b0 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -236,7 +236,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
}
-static int
+int
ext4_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
@@ -252,12 +252,6 @@ ext4_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int
-ext4_permission(struct inode *inode, int mask)
-{
- return generic_permission(inode, mask, ext4_check_acl);
-}
-
/*
* Initialize the ACLs of a new inode. Called from ext4_new_inode.
*
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 949789d2bba6..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
/* acl.c */
-extern int ext4_permission(struct inode *, int);
+extern int ext4_check_acl(struct inode *, int);
extern int ext4_acl_chmod(struct inode *);
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT4_FS_POSIX_ACL */
#include <linux/sched.h>
-#define ext4_permission NULL
+#define ext4_check_acl NULL
static inline int
ext4_acl_chmod(struct inode *inode)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3f1873fef1c6..5ca3eca70a1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -58,10 +58,7 @@ static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_path.dentry->d_inode;
- ssize_t ret;
- int err;
+ struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
/*
* If we have encountered a bitmap-format file, the size limit
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
}
}
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
- /*
- * Skip flushing if there was an error, or if nothing was written.
- */
- if (ret <= 0)
- return ret;
-
- /*
- * If the inode is IS_SYNC, or is O_SYNC and we are doing data
- * journalling then we need to make sure that we force the transaction
- * to disk to keep all metadata uptodate synchronously.
- */
- if (file->f_flags & O_SYNC) {
- /*
- * If we are non-data-journaled, then the dirty data has
- * already been flushed to backing store by generic_osync_inode,
- * and the inode has been flushed too if there have been any
- * modifications other than mere timestamp updates.
- *
- * Open question --- do we care about flushing timestamps too
- * if the inode is IS_SYNC?
- */
- if (!ext4_should_journal_data(inode))
- return ret;
-
- goto force_commit;
- }
-
- /*
- * So we know that there has been no forced data flush. If the inode
- * is marked IS_SYNC, we need to force one ourselves.
- */
- if (!IS_SYNC(inode))
- return ret;
-
- /*
- * Open question #2 --- should we force data to disk here too? If we
- * don't, the only impact is that data=writeback filesystems won't
- * flush data to disk automatically on IS_SYNC, only metadata (but
- * historically, that is what ext2 has done.)
- */
-
-force_commit:
- err = ext4_force_commit(inode->i_sb);
- if (err)
- return err;
- return ret;
+ return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
static struct vm_operations_struct ext4_file_vm_ops = {
@@ -207,7 +158,7 @@ const struct inode_operations ext4_file_inode_operations = {
.listxattr = ext4_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext4_permission,
+ .check_acl = ext4_check_acl,
.fallocate = ext4_fallocate,
.fiemap = ext4_fiemap,
};
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index de04013d16ff..114abe5d2c1d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2536,7 +2536,7 @@ const struct inode_operations ext4_dir_inode_operations = {
.listxattr = ext4_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext4_permission,
+ .check_acl = ext4_check_acl,
.fiemap = ext4_fiemap,
};
@@ -2548,5 +2548,5 @@ const struct inode_operations ext4_special_inode_operations = {
.listxattr = ext4_listxattr,
.removexattr = generic_removexattr,
#endif
- .permission = ext4_permission,
+ .check_acl = ext4_check_acl,
};
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f042b965c95c..e8c159de236b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- if (IS_SYNC(inode))
- err = sync_page_range_nolock(inode, mapping, start, count);
+ if (IS_SYNC(inode)) {
+ int err2;
+
+ /*
+ * Opencode syncing since we don't have a file open to use
+ * standard fsync path.
+ */
+ err = filemap_fdatawrite_range(mapping, start,
+ start + count - 1);
+ err2 = sync_mapping_buffers(mapping);
+ if (!err)
+ err = err2;
+ err2 = write_inode_now(inode, 1);
+ if (!err)
+ err = err2;
+ if (!err) {
+ err = filemap_fdatawait_range(mapping, start,
+ start + count - 1);
+ }
+ }
out:
return err;
}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index a6c20473dfd7..4e35be873e09 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
MSDOS_I(inode)->i_start = new_dclus;
MSDOS_I(inode)->i_logstart = new_dclus;
/*
- * Since generic_osync_inode() synchronize later if
- * this is not directory, we don't here.
+ * Since generic_write_sync() synchronizes regular files later,
+ * we sync here only directories.
*/
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
ret = fat_sync_inode(inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c54226be5294..628235cf44b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -19,171 +19,223 @@
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/mm.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include "internal.h"
+#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
-/**
- * writeback_acquire - attempt to get exclusive writeback access to a device
- * @bdi: the device's backing_dev_info structure
- *
- * It is a waste of resources to have more than one pdflush thread blocked on
- * a single request queue. Exclusion at the request_queue level is obtained
- * via a flag in the request_queue's backing_dev_info.state.
- *
- * Non-request_queue-backed address_spaces will share default_backing_dev_info,
- * unless they implement their own. Which is somewhat inefficient, as this
- * may prevent concurrent writeback against multiple devices.
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
*/
-static int writeback_acquire(struct backing_dev_info *bdi)
+int nr_pdflush_threads;
+
+/*
+ * Work items for the bdi_writeback threads
+ */
+struct bdi_work {
+ struct list_head list;
+ struct list_head wait_list;
+ struct rcu_head rcu_head;
+
+ unsigned long seen;
+ atomic_t pending;
+
+ struct super_block *sb;
+ unsigned long nr_pages;
+ enum writeback_sync_modes sync_mode;
+
+ unsigned long state;
+};
+
+enum {
+ WS_USED_B = 0,
+ WS_ONSTACK_B,
+};
+
+#define WS_USED (1 << WS_USED_B)
+#define WS_ONSTACK (1 << WS_ONSTACK_B)
+
+static inline bool bdi_work_on_stack(struct bdi_work *work)
+{
+ return test_bit(WS_ONSTACK_B, &work->state);
+}
+
+static inline void bdi_work_init(struct bdi_work *work,
+ struct writeback_control *wbc)
+{
+ INIT_RCU_HEAD(&work->rcu_head);
+ work->sb = wbc->sb;
+ work->nr_pages = wbc->nr_to_write;
+ work->sync_mode = wbc->sync_mode;
+ work->state = WS_USED;
+}
+
+static inline void bdi_work_init_on_stack(struct bdi_work *work,
+ struct writeback_control *wbc)
{
- return !test_and_set_bit(BDI_pdflush, &bdi->state);
+ bdi_work_init(work, wbc);
+ work->state |= WS_ONSTACK;
}
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
*
- * Determine whether there is writeback in progress against a backing device.
+ * Determine whether there is writeback waiting to be handled against a
+ * backing device.
*/
int writeback_in_progress(struct backing_dev_info *bdi)
{
- return test_bit(BDI_pdflush, &bdi->state);
+ return !list_empty(&bdi->work_list);
}
-/**
- * writeback_release - relinquish exclusive writeback access against a device.
- * @bdi: the device's backing_dev_info structure
- */
-static void writeback_release(struct backing_dev_info *bdi)
+static void bdi_work_clear(struct bdi_work *work)
{
- BUG_ON(!writeback_in_progress(bdi));
- clear_bit(BDI_pdflush, &bdi->state);
+ clear_bit(WS_USED_B, &work->state);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&work->state, WS_USED_B);
}
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+static void bdi_work_free(struct rcu_head *head)
{
- if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
- struct dentry *dentry;
- const char *name = "?";
+ struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
- dentry = d_find_alias(inode);
- if (dentry) {
- spin_lock(&dentry->d_lock);
- name = (const char *) dentry->d_name.name;
- }
- printk(KERN_DEBUG
- "%s(%d): dirtied inode %lu (%s) on %s\n",
- current->comm, task_pid_nr(current), inode->i_ino,
- name, inode->i_sb->s_id);
- if (dentry) {
- spin_unlock(&dentry->d_lock);
- dput(dentry);
- }
- }
+ if (!bdi_work_on_stack(work))
+ kfree(work);
+ else
+ bdi_work_clear(work);
}
-/**
- * __mark_inode_dirty - internal function
- * @inode: inode to mark
- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
- * Mark an inode as dirty. Callers should use mark_inode_dirty or
- * mark_inode_dirty_sync.
- *
- * Put the inode on the super block's dirty list.
- *
- * CAREFUL! We mark it dirty unconditionally, but move it onto the
- * dirty list only if it is hashed or if it refers to a blockdev.
- * If it was not hashed, it will never be added to the dirty list
- * even if it is later hashed, as it will have been marked dirty already.
- *
- * In short, make sure you hash any inodes _before_ you start marking
- * them dirty.
- *
- * This function *must* be atomic for the I_DIRTY_PAGES case -
- * set_page_dirty() is called under spinlock in several places.
- *
- * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
- * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
- * the kernel-internal blockdev inode represents the dirtying time of the
- * blockdev's pages. This is why for I_DIRTY_PAGES we always use
- * page->mapping->host, so the page-dirtying time is recorded in the internal
- * blockdev inode.
- */
-void __mark_inode_dirty(struct inode *inode, int flags)
+static void wb_work_complete(struct bdi_work *work)
{
- struct super_block *sb = inode->i_sb;
+ const enum writeback_sync_modes sync_mode = work->sync_mode;
/*
- * Don't do this for I_DIRTY_PAGES - that doesn't actually
- * dirty the inode itself
+ * For allocated work, we can clear the done/seen bit right here.
+ * For on-stack work, we need to postpone both the clear and free
+ * to after the RCU grace period, since the stack could be invalidated
+ * as soon as bdi_work_clear() has done the wakeup.
*/
- if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
- if (sb->s_op->dirty_inode)
- sb->s_op->dirty_inode(inode);
- }
+ if (!bdi_work_on_stack(work))
+ bdi_work_clear(work);
+ if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work))
+ call_rcu(&work->rcu_head, bdi_work_free);
+}
+static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
+{
/*
- * make sure that changes are seen by all cpus before we test i_state
- * -- mikulas
+ * The caller has retrieved the work arguments from this work,
+ * drop our reference. If this is the last ref, delete and free it
*/
- smp_mb();
-
- /* avoid the locking if we can */
- if ((inode->i_state & flags) == flags)
- return;
+ if (atomic_dec_and_test(&work->pending)) {
+ struct backing_dev_info *bdi = wb->bdi;
- if (unlikely(block_dump))
- block_dump___mark_inode_dirty(inode);
+ spin_lock(&bdi->wb_lock);
+ list_del_rcu(&work->list);
+ spin_unlock(&bdi->wb_lock);
- spin_lock(&inode_lock);
- if ((inode->i_state & flags) != flags) {
- const int was_dirty = inode->i_state & I_DIRTY;
+ wb_work_complete(work);
+ }
+}
- inode->i_state |= flags;
+static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+{
+ if (work) {
+ work->seen = bdi->wb_mask;
+ BUG_ON(!work->seen);
+ atomic_set(&work->pending, bdi->wb_cnt);
+ BUG_ON(!bdi->wb_cnt);
/*
- * If the inode is being synced, just update its dirty state.
- * The unlocker will place the inode on the appropriate
- * superblock list, based upon its state.
+ * Make sure stores are seen before it appears on the list
*/
- if (inode->i_state & I_SYNC)
- goto out;
+ smp_mb();
- /*
- * Only add valid (hashed) inodes to the superblock's
- * dirty list. Add blockdev inodes as well.
- */
- if (!S_ISBLK(inode->i_mode)) {
- if (hlist_unhashed(&inode->i_hash))
- goto out;
- }
- if (inode->i_state & (I_FREEING|I_CLEAR))
- goto out;
+ spin_lock(&bdi->wb_lock);
+ list_add_tail_rcu(&work->list, &bdi->work_list);
+ spin_unlock(&bdi->wb_lock);
+ }
+
+ /*
+ * If the default thread isn't there, make sure we add it. When
+ * it gets created and wakes up, we'll run this work.
+ */
+ if (unlikely(list_empty_careful(&bdi->wb_list)))
+ wake_up_process(default_backing_dev_info.wb.task);
+ else {
+ struct bdi_writeback *wb = &bdi->wb;
/*
- * If the inode was already on s_dirty/s_io/s_more_io, don't
- * reposition it (that would break s_dirty time-ordering).
+ * If we failed allocating the bdi work item, wake up the wb
+ * thread always. As a safety precaution, it'll flush out
+ * everything
*/
- if (!was_dirty) {
- inode->dirtied_when = jiffies;
- list_move(&inode->i_list, &sb->s_dirty);
- }
+ if (!wb_has_dirty_io(wb)) {
+ if (work)
+ wb_clear_pending(wb, work);
+ } else if (wb->task)
+ wake_up_process(wb->task);
}
-out:
- spin_unlock(&inode_lock);
}
-EXPORT_SYMBOL(__mark_inode_dirty);
+/*
+ * Used for on-stack allocated work items. The caller needs to wait until
+ * the wb threads have acked the work before it's safe to continue.
+ */
+static void bdi_wait_on_work_clear(struct bdi_work *work)
+{
+ wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
+ TASK_UNINTERRUPTIBLE);
+}
-static int write_inode(struct inode *inode, int sync)
+static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
{
- if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
- return inode->i_sb->s_op->write_inode(inode, sync);
- return 0;
+ struct bdi_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work)
+ bdi_work_init(work, wbc);
+
+ return work;
+}
+
+void bdi_start_writeback(struct writeback_control *wbc)
+{
+ const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
+ struct bdi_work work_stack, *work = NULL;
+
+ if (!must_wait)
+ work = bdi_alloc_work(wbc);
+
+ if (!work) {
+ work = &work_stack;
+ bdi_work_init_on_stack(work, wbc);
+ }
+
+ bdi_queue_work(wbc->bdi, work);
+
+ /*
+ * If the sync mode is WB_SYNC_ALL, block waiting for the work to
+ * complete. If not, we only need to wait for the work to be started,
+ * if we allocated it on-stack. We use the same mechanism, if the
+ * wait bit is set in the bdi_work struct, then threads will not
+ * clear pending until after they are done.
+ *
+ * Note that work == &work_stack if must_wait is true, so we don't
+ * need to do call_rcu() here ever, since the completion path will
+ * have done that for us.
+ */
+ if (must_wait || work == &work_stack) {
+ bdi_wait_on_work_clear(work);
+ if (work != &work_stack)
+ call_rcu(&work->rcu_head, bdi_work_free);
+ }
}
/*
@@ -191,31 +243,32 @@ static int write_inode(struct inode *inode, int sync)
* furthest end of its superblock's dirty-inode list.
*
* Before stamping the inode's ->dirtied_when, we check to see whether it is
- * already the most-recently-dirtied inode on the s_dirty list. If that is
+ * already the most-recently-dirtied inode on the b_dirty list. If that is
* the case then the inode must have been redirtied while it was being written
* out and we don't reset its dirtied_when.
*/
static void redirty_tail(struct inode *inode)
{
- struct super_block *sb = inode->i_sb;
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
- if (!list_empty(&sb->s_dirty)) {
- struct inode *tail_inode;
+ if (!list_empty(&wb->b_dirty)) {
+ struct inode *tail;
- tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);
- if (time_before(inode->dirtied_when,
- tail_inode->dirtied_when))
+ tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+ if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies;
}
- list_move(&inode->i_list, &sb->s_dirty);
+ list_move(&inode->i_list, &wb->b_dirty);
}
/*
- * requeue inode for re-scanning after sb->s_io list is exhausted.
+ * requeue inode for re-scanning after bdi->b_io list is exhausted.
*/
static void requeue_io(struct inode *inode)
{
- list_move(&inode->i_list, &inode->i_sb->s_more_io);
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+
+ list_move(&inode->i_list, &wb->b_more_io);
}
static void inode_sync_complete(struct inode *inode)
@@ -262,20 +315,18 @@ static void move_expired_inodes(struct list_head *delaying_queue,
/*
* Queue all expired dirty inodes for io, eldest first.
*/
-static void queue_io(struct super_block *sb,
- unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
{
- list_splice_init(&sb->s_more_io, sb->s_io.prev);
- move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);
+ list_splice_init(&wb->b_more_io, wb->b_io.prev);
+ move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
}
-int sb_has_dirty_inodes(struct super_block *sb)
+static int write_inode(struct inode *inode, int sync)
{
- return !list_empty(&sb->s_dirty) ||
- !list_empty(&sb->s_io) ||
- !list_empty(&sb->s_more_io);
+ if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
+ return inode->i_sb->s_op->write_inode(inode, sync);
+ return 0;
}
-EXPORT_SYMBOL(sb_has_dirty_inodes);
/*
* Wait for writeback on an inode to complete.
@@ -322,11 +373,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
if (inode->i_state & I_SYNC) {
/*
* If this inode is locked for writeback and we are not doing
- * writeback-for-data-integrity, move it to s_more_io so that
+ * writeback-for-data-integrity, move it to b_more_io so that
* writeback can proceed with the other inodes on s_io.
*
* We'll have another go at writing back this inode when we
- * completed a full scan of s_io.
+ * completed a full scan of b_io.
*/
if (!wait) {
requeue_io(inode);
@@ -371,11 +422,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
/*
* We didn't write back all the pages. nfs_writepages()
* sometimes bales out without doing anything. Redirty
- * the inode; Move it from s_io onto s_more_io/s_dirty.
+ * the inode; Move it from b_io onto b_more_io/b_dirty.
*/
/*
* akpm: if the caller was the kupdate function we put
- * this inode at the head of s_dirty so it gets first
+ * this inode at the head of b_dirty so it gets first
* consideration. Otherwise, move it to the tail, for
* the reasons described there. I'm not really sure
* how much sense this makes. Presumably I had a good
@@ -385,7 +436,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->for_kupdate) {
/*
* For the kupdate function we move the inode
- * to s_more_io so it will get more writeout as
+ * to b_more_io so it will get more writeout as
* soon as the queue becomes uncongested.
*/
inode->i_state |= I_DIRTY_PAGES;
@@ -434,50 +485,84 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Write out a superblock's list of dirty inodes. A wait will be performed
- * upon no inodes, all inodes or the final one, depending upon sync_mode.
- *
- * If older_than_this is non-NULL, then only write out inodes which
- * had their first dirtying at a time earlier than *older_than_this.
- *
- * If we're a pdflush thread, then implement pdflush collision avoidance
- * against the entire list.
- *
- * If `bdi' is non-zero then we're being asked to writeback a specific queue.
- * This function assumes that the blockdev superblock's inodes are backed by
- * a variety of queues, so all inodes are searched. For other superblocks,
- * assume that all inodes are backed by the same queue.
- *
- * FIXME: this linear search could get expensive with many fileystems. But
- * how to fix? We need to go from an address_space to all inodes which share
- * a queue with that address_space. (Easy: have a global "dirty superblocks"
- * list).
+ * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * before calling writeback. So make sure that we do pin it, so it doesn't
+ * go away while we are writing inodes from it.
*
- * The inodes to be written are parked on sb->s_io. They are moved back onto
- * sb->s_dirty as they are selected for writing. This way, none can be missed
- * on the writer throttling path, and we get decent balancing between many
- * throttled threads: we don't want them all piling up on inode_sync_wait.
+ * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
+ * 1 if we failed.
*/
-void generic_sync_sb_inodes(struct super_block *sb,
+static int pin_sb_for_writeback(struct writeback_control *wbc,
+ struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ /*
+ * Caller must already hold the ref for this
+ */
+ if (wbc->sync_mode == WB_SYNC_ALL) {
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ return 0;
+ }
+
+ spin_lock(&sb_lock);
+ sb->s_count++;
+ if (down_read_trylock(&sb->s_umount)) {
+ if (sb->s_root) {
+ spin_unlock(&sb_lock);
+ return 0;
+ }
+ /*
+ * umounted, drop rwsem again and fall through to failure
+ */
+ up_read(&sb->s_umount);
+ }
+
+ sb->s_count--;
+ spin_unlock(&sb_lock);
+ return 1;
+}
+
+static void unpin_sb_for_writeback(struct writeback_control *wbc,
+ struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ return;
+
+ up_read(&sb->s_umount);
+ put_super(sb);
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
struct writeback_control *wbc)
{
+ struct super_block *sb = wbc->sb;
+ const int is_blkdev_sb = sb_is_blkdev_sb(sb);
const unsigned long start = jiffies; /* livelock avoidance */
- int sync = wbc->sync_mode == WB_SYNC_ALL;
spin_lock(&inode_lock);
- if (!wbc->for_kupdate || list_empty(&sb->s_io))
- queue_io(sb, wbc->older_than_this);
- while (!list_empty(&sb->s_io)) {
- struct inode *inode = list_entry(sb->s_io.prev,
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+
+ while (!list_empty(&wb->b_io)) {
+ struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- struct address_space *mapping = inode->i_mapping;
- struct backing_dev_info *bdi = mapping->backing_dev_info;
long pages_skipped;
- if (!bdi_cap_writeback_dirty(bdi)) {
+ /*
+ * super block given and doesn't match, skip this inode
+ */
+ if (sb && sb != inode->i_sb) {
+ redirty_tail(inode);
+ continue;
+ }
+
+ if (!bdi_cap_writeback_dirty(wb->bdi)) {
redirty_tail(inode);
- if (sb_is_blkdev_sb(sb)) {
+ if (is_blkdev_sb) {
/*
* Dirty memory-backed blockdev: the ramdisk
* driver does this. Skip just this inode
@@ -497,21 +582,14 @@ void generic_sync_sb_inodes(struct super_block *sb,
continue;
}
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
wbc->encountered_congestion = 1;
- if (!sb_is_blkdev_sb(sb))
+ if (!is_blkdev_sb)
break; /* Skip a congested fs */
requeue_io(inode);
continue; /* Skip a congested blockdev */
}
- if (wbc->bdi && bdi != wbc->bdi) {
- if (!sb_is_blkdev_sb(sb))
- break; /* fs has the wrong queue */
- requeue_io(inode);
- continue; /* blockdev has wrong queue */
- }
-
/*
* Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock.
@@ -519,16 +597,16 @@ void generic_sync_sb_inodes(struct super_block *sb,
if (inode_dirtied_after(inode, start))
break;
- /* Is another pdflush already flushing this queue? */
- if (current_is_pdflush() && !writeback_acquire(bdi))
- break;
+ if (pin_sb_for_writeback(wbc, inode)) {
+ requeue_io(inode);
+ continue;
+ }
BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
__iget(inode);
pages_skipped = wbc->pages_skipped;
writeback_single_inode(inode, wbc);
- if (current_is_pdflush())
- writeback_release(bdi);
+ unpin_sb_for_writeback(wbc, inode);
if (wbc->pages_skipped != pages_skipped) {
/*
* writeback is not making progress due to locked
@@ -544,144 +622,571 @@ void generic_sync_sb_inodes(struct super_block *sb,
wbc->more_io = 1;
break;
}
- if (!list_empty(&sb->s_more_io))
+ if (!list_empty(&wb->b_more_io))
wbc->more_io = 1;
}
- if (sync) {
- struct inode *inode, *old_inode = NULL;
+ spin_unlock(&inode_lock);
+ /* Leave any unwritten inodes on b_io */
+}
+
+void writeback_inodes_wbc(struct writeback_control *wbc)
+{
+ struct backing_dev_info *bdi = wbc->bdi;
+
+ writeback_inodes_wb(&bdi->wb, wbc);
+}
+
+/*
+ * The maximum number of pages to writeout in a single bdi flush/kupdate
+ * operation. We do this so we don't hold I_SYNC against an inode for
+ * enormous amounts of time, which would block a userspace task which has
+ * been forced to throttle against that inode. Also, the code reevaluates
+ * the dirty each time it has written this many pages.
+ */
+#define MAX_WRITEBACK_PAGES 1024
+
+static inline bool over_bground_thresh(void)
+{
+ unsigned long background_thresh, dirty_thresh;
+
+ get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
+
+ return (global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
+}
+
+/*
+ * Explicit flushing or periodic writeback of "old" data.
+ *
+ * Define "old": the first time one of an inode's pages is dirtied, we mark the
+ * dirtying-time in the inode's address_space. So this periodic writeback code
+ * just walks the superblock inode list, writing back any inodes which are
+ * older than a specific point in time.
+ *
+ * Try to run once per dirty_writeback_interval. But if a writeback event
+ * takes longer than a dirty_writeback_interval interval, then leave a
+ * one-second gap.
+ *
+ * older_than_this takes precedence over nr_to_write. So we'll only write back
+ * all dirty pages if they are all attached to "old" mappings.
+ */
+static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
+ struct super_block *sb,
+ enum writeback_sync_modes sync_mode, int for_kupdate)
+{
+ struct writeback_control wbc = {
+ .bdi = wb->bdi,
+ .sb = sb,
+ .sync_mode = sync_mode,
+ .older_than_this = NULL,
+ .for_kupdate = for_kupdate,
+ .range_cyclic = 1,
+ };
+ unsigned long oldest_jif;
+ long wrote = 0;
+
+ if (wbc.for_kupdate) {
+ wbc.older_than_this = &oldest_jif;
+ oldest_jif = jiffies -
+ msecs_to_jiffies(dirty_expire_interval * 10);
+ }
+
+ for (;;) {
+ /*
+ * Don't flush anything for non-integrity writeback where
+ * no nr_pages was given
+ */
+ if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
+ break;
/*
- * Data integrity sync. Must wait for all pages under writeback,
- * because there may have been pages dirtied before our sync
- * call, but which had writeout started before we write it out.
- * In which case, the inode may not be on the dirty list, but
- * we still have to wait for that writeout.
+ * If no specific pages were given and this is just a
+ * periodic background writeout and we are below the
+ * background dirty threshold, don't do anything
*/
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- struct address_space *mapping;
+ if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
+ break;
- if (inode->i_state &
- (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
- continue;
- mapping = inode->i_mapping;
- if (mapping->nrpages == 0)
+ wbc.more_io = 0;
+ wbc.encountered_congestion = 0;
+ wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+ wbc.pages_skipped = 0;
+ writeback_inodes_wb(wb, &wbc);
+ nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+
+ /*
+ * If we ran out of stuff to write, bail unless more_io got set
+ */
+ if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
+ if (wbc.more_io && !wbc.for_kupdate)
continue;
- __iget(inode);
- spin_unlock(&inode_lock);
+ break;
+ }
+ }
+
+ return wrote;
+}
+
+/*
+ * Return the next bdi_work struct that hasn't been processed by this
+ * wb thread yet
+ */
+static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
+ struct bdi_writeback *wb)
+{
+ struct bdi_work *work, *ret = NULL;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(work, &bdi->work_list, list) {
+ if (!test_and_clear_bit(wb->nr, &work->seen))
+ continue;
+
+ ret = work;
+ break;
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static long wb_check_old_data_flush(struct bdi_writeback *wb)
+{
+ unsigned long expired;
+ long nr_pages;
+
+ expired = wb->last_old_flush +
+ msecs_to_jiffies(dirty_writeback_interval * 10);
+ if (time_before(jiffies, expired))
+ return 0;
+
+ wb->last_old_flush = jiffies;
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+ if (nr_pages)
+ return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
+
+ return 0;
+}
+
+/*
+ * Retrieve work items and do the writeback they describe
+ */
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
+{
+ struct backing_dev_info *bdi = wb->bdi;
+ struct bdi_work *work;
+ long nr_pages, wrote = 0;
+
+ while ((work = get_next_work_item(bdi, wb)) != NULL) {
+ enum writeback_sync_modes sync_mode;
+
+ nr_pages = work->nr_pages;
+
+ /*
+ * Override sync mode, in case we must wait for completion
+ */
+ if (force_wait)
+ work->sync_mode = sync_mode = WB_SYNC_ALL;
+ else
+ sync_mode = work->sync_mode;
+
+ /*
+ * If this isn't a data integrity operation, just notify
+ * that we have seen this work and we are now starting it.
+ */
+ if (sync_mode == WB_SYNC_NONE)
+ wb_clear_pending(wb, work);
+
+ wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
+
+ /*
+ * This is a data integrity writeback, so only do the
+ * notification when we have completed the work.
+ */
+ if (sync_mode == WB_SYNC_ALL)
+ wb_clear_pending(wb, work);
+ }
+
+ /*
+ * Check for periodic writeback, kupdated() style
+ */
+ wrote += wb_check_old_data_flush(wb);
+
+ return wrote;
+}
+
+/*
+ * Handle writeback of dirty data for the device backed by this bdi. Also
+ * wakes up periodically and does kupdated style flushing.
+ */
+int bdi_writeback_task(struct bdi_writeback *wb)
+{
+ unsigned long last_active = jiffies;
+ unsigned long wait_jiffies = -1UL;
+ long pages_written;
+
+ while (!kthread_should_stop()) {
+ pages_written = wb_do_writeback(wb, 0);
+
+ if (pages_written)
+ last_active = jiffies;
+ else if (wait_jiffies != -1UL) {
+ unsigned long max_idle;
+
/*
- * We hold a reference to 'inode' so it couldn't have
- * been removed from s_inodes list while we dropped the
- * inode_lock. We cannot iput the inode now as we can
- * be holding the last reference and we cannot iput it
- * under inode_lock. So we keep the reference and iput
- * it later.
+ * Longest period of inactivity that we tolerate. If we
+ * see dirty data again later, the task will get
+ * recreated automatically.
*/
- iput(old_inode);
- old_inode = inode;
+ max_idle = max(5UL * 60 * HZ, wait_jiffies);
+ if (time_after(jiffies, max_idle + last_active))
+ break;
+ }
+
+ wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(wait_jiffies);
+ try_to_freeze();
+ }
+
+ return 0;
+}
+
+/*
+ * Schedule writeback for all backing devices. Expensive! If this is a data
+ * integrity operation, writeback will be complete when this returns. If
+ * we are simply called for WB_SYNC_NONE, then writeback will merely be
+ * scheduled to run.
+ */
+static void bdi_writeback_all(struct writeback_control *wbc)
+{
+ const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
+ struct backing_dev_info *bdi;
+ struct bdi_work *work;
+ LIST_HEAD(list);
+
+restart:
+ spin_lock(&bdi_lock);
+
+ list_for_each_entry(bdi, &bdi_list, bdi_list) {
+ struct bdi_work *work;
+
+ if (!bdi_has_dirty_io(bdi))
+ continue;
- filemap_fdatawait(mapping);
+ /*
+ * If work allocation fails, do the writes inline. We drop
+ * the lock and restart the list writeout. This should be OK,
+ * since this happens rarely and because the writeout should
+ * eventually make more free memory available.
+ */
+ work = bdi_alloc_work(wbc);
+ if (!work) {
+ struct writeback_control __wbc;
- cond_resched();
+ /*
+ * Not a data integrity writeout, just continue
+ */
+ if (!must_wait)
+ continue;
- spin_lock(&inode_lock);
+ spin_unlock(&bdi_lock);
+ __wbc = *wbc;
+ __wbc.bdi = bdi;
+ writeback_inodes_wbc(&__wbc);
+ goto restart;
}
- spin_unlock(&inode_lock);
- iput(old_inode);
- } else
- spin_unlock(&inode_lock);
+ if (must_wait)
+ list_add_tail(&work->wait_list, &list);
+
+ bdi_queue_work(bdi, work);
+ }
+
+ spin_unlock(&bdi_lock);
- return; /* Leave any unwritten inodes on s_io */
+ /*
+ * If this is for WB_SYNC_ALL, wait for pending work to complete
+ * before returning.
+ */
+ while (!list_empty(&list)) {
+ work = list_entry(list.next, struct bdi_work, wait_list);
+ list_del(&work->wait_list);
+ bdi_wait_on_work_clear(work);
+ call_rcu(&work->rcu_head, bdi_work_free);
+ }
}
-EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
-static void sync_sb_inodes(struct super_block *sb,
- struct writeback_control *wbc)
+/*
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
+ * the whole world.
+ */
+void wakeup_flusher_threads(long nr_pages)
{
- generic_sync_sb_inodes(sb, wbc);
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .older_than_this = NULL,
+ .range_cyclic = 1,
+ };
+
+ if (nr_pages == 0)
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ wbc.nr_to_write = nr_pages;
+ bdi_writeback_all(&wbc);
}
-/*
- * Start writeback of dirty pagecache data against all unlocked inodes.
+static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+{
+ if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+ struct dentry *dentry;
+ const char *name = "?";
+
+ dentry = d_find_alias(inode);
+ if (dentry) {
+ spin_lock(&dentry->d_lock);
+ name = (const char *) dentry->d_name.name;
+ }
+ printk(KERN_DEBUG
+ "%s(%d): dirtied inode %lu (%s) on %s\n",
+ current->comm, task_pid_nr(current), inode->i_ino,
+ name, inode->i_sb->s_id);
+ if (dentry) {
+ spin_unlock(&dentry->d_lock);
+ dput(dentry);
+ }
+ }
+}
+
+/**
+ * __mark_inode_dirty - internal function
+ * @inode: inode to mark
+ * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ * Mark an inode as dirty. Callers should use mark_inode_dirty or
+ * mark_inode_dirty_sync.
+ *
+ * Put the inode on the super block's dirty list.
+ *
+ * CAREFUL! We mark it dirty unconditionally, but move it onto the
+ * dirty list only if it is hashed or if it refers to a blockdev.
+ * If it was not hashed, it will never be added to the dirty list
+ * even if it is later hashed, as it will have been marked dirty already.
*
- * Note:
- * We don't need to grab a reference to superblock here. If it has non-empty
- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
- * empty. Since __sync_single_inode() regains inode_lock before it finally moves
- * inode from superblock lists we are OK.
+ * In short, make sure you hash any inodes _before_ you start marking
+ * them dirty.
*
- * If `older_than_this' is non-zero then only flush inodes which have a
- * flushtime older than *older_than_this.
+ * This function *must* be atomic for the I_DIRTY_PAGES case -
+ * set_page_dirty() is called under spinlock in several places.
*
- * If `bdi' is non-zero then we will scan the first inode against each
- * superblock until we find the matching ones. One group will be the dirty
- * inodes against a filesystem. Then when we hit the dummy blockdev superblock,
- * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not
- * super-efficient but we're about to do a ton of I/O...
+ * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
+ * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
+ * the kernel-internal blockdev inode represents the dirtying time of the
+ * blockdev's pages. This is why for I_DIRTY_PAGES we always use
+ * page->mapping->host, so the page-dirtying time is recorded in the internal
+ * blockdev inode.
*/
-void
-writeback_inodes(struct writeback_control *wbc)
+void __mark_inode_dirty(struct inode *inode, int flags)
{
- struct super_block *sb;
+ struct super_block *sb = inode->i_sb;
- might_sleep();
- spin_lock(&sb_lock);
-restart:
- list_for_each_entry_reverse(sb, &super_blocks, s_list) {
- if (sb_has_dirty_inodes(sb)) {
- /* we're making our own get_super here */
- sb->s_count++;
- spin_unlock(&sb_lock);
- /*
- * If we can't get the readlock, there's no sense in
- * waiting around, most of the time the FS is going to
- * be unmounted by the time it is released.
- */
- if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root)
- sync_sb_inodes(sb, wbc);
- up_read(&sb->s_umount);
+ /*
+ * Don't do this for I_DIRTY_PAGES - that doesn't actually
+ * dirty the inode itself
+ */
+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+ if (sb->s_op->dirty_inode)
+ sb->s_op->dirty_inode(inode);
+ }
+
+ /*
+ * make sure that changes are seen by all cpus before we test i_state
+ * -- mikulas
+ */
+ smp_mb();
+
+ /* avoid the locking if we can */
+ if ((inode->i_state & flags) == flags)
+ return;
+
+ if (unlikely(block_dump))
+ block_dump___mark_inode_dirty(inode);
+
+ spin_lock(&inode_lock);
+ if ((inode->i_state & flags) != flags) {
+ const int was_dirty = inode->i_state & I_DIRTY;
+
+ inode->i_state |= flags;
+
+ /*
+ * If the inode is being synced, just update its dirty state.
+ * The unlocker will place the inode on the appropriate
+ * superblock list, based upon its state.
+ */
+ if (inode->i_state & I_SYNC)
+ goto out;
+
+ /*
+ * Only add valid (hashed) inodes to the superblock's
+ * dirty list. Add blockdev inodes as well.
+ */
+ if (!S_ISBLK(inode->i_mode)) {
+ if (hlist_unhashed(&inode->i_hash))
+ goto out;
+ }
+ if (inode->i_state & (I_FREEING|I_CLEAR))
+ goto out;
+
+ /*
+ * If the inode was already on b_dirty/b_io/b_more_io, don't
+ * reposition it (that would break b_dirty time-ordering).
+ */
+ if (!was_dirty) {
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+ struct backing_dev_info *bdi = wb->bdi;
+
+ if (bdi_cap_writeback_dirty(bdi) &&
+ !test_bit(BDI_registered, &bdi->state)) {
+ WARN_ON(1);
+ printk(KERN_ERR "bdi-%s not registered\n",
+ bdi->name);
}
- spin_lock(&sb_lock);
- if (__put_super_and_need_restart(sb))
- goto restart;
+
+ inode->dirtied_when = jiffies;
+ list_move(&inode->i_list, &wb->b_dirty);
}
- if (wbc->nr_to_write <= 0)
- break;
}
- spin_unlock(&sb_lock);
+out:
+ spin_unlock(&inode_lock);
}
+EXPORT_SYMBOL(__mark_inode_dirty);
/*
- * writeback and wait upon the filesystem's dirty inodes. The caller will
- * do this in two passes - one to write, and one to wait.
+ * Write out a superblock's list of dirty inodes. A wait will be performed
+ * upon no inodes, all inodes or the final one, depending upon sync_mode.
+ *
+ * If older_than_this is non-NULL, then only write out inodes which
+ * had their first dirtying at a time earlier than *older_than_this.
+ *
+ * If we're a pdlfush thread, then implement pdflush collision avoidance
+ * against the entire list.
*
- * A finite limit is set on the number of pages which will be written.
- * To prevent infinite livelock of sys_sync().
+ * If `bdi' is non-zero then we're being asked to writeback a specific queue.
+ * This function assumes that the blockdev superblock's inodes are backed by
+ * a variety of queues, so all inodes are searched. For other superblocks,
+ * assume that all inodes are backed by the same queue.
*
- * We add in the number of potentially dirty inodes, because each inode write
- * can dirty pagecache in the underlying blockdev.
+ * The inodes to be written are parked on bdi->b_io. They are moved back onto
+ * bdi->b_dirty as they are selected for writing. This way, none can be missed
+ * on the writer throttling path, and we get decent balancing between many
+ * throttled threads: we don't want them all piling up on inode_sync_wait.
+ */
+static void wait_sb_inodes(struct writeback_control *wbc)
+{
+ struct inode *inode, *old_inode = NULL;
+
+ /*
+ * We need to be protected against the filesystem going from
+ * r/o to r/w or vice versa.
+ */
+ WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
+
+ spin_lock(&inode_lock);
+
+ /*
+ * Data integrity sync. Must wait for all pages under writeback,
+ * because there may have been pages dirtied before our sync
+ * call, but which had writeout started before we write it out.
+ * In which case, the inode may not be on the dirty list, but
+ * we still have to wait for that writeout.
+ */
+ list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
+ struct address_space *mapping;
+
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+ continue;
+ mapping = inode->i_mapping;
+ if (mapping->nrpages == 0)
+ continue;
+ __iget(inode);
+ spin_unlock(&inode_lock);
+ /*
+ * We hold a reference to 'inode' so it couldn't have
+ * been removed from s_inodes list while we dropped the
+ * inode_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it
+ * under inode_lock. So we keep the reference and iput
+ * it later.
+ */
+ iput(old_inode);
+ old_inode = inode;
+
+ filemap_fdatawait(mapping);
+
+ cond_resched();
+
+ spin_lock(&inode_lock);
+ }
+ spin_unlock(&inode_lock);
+ iput(old_inode);
+}
+
+/**
+ * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO. The number of pages submitted is
+ * returned.
*/
-void sync_inodes_sb(struct super_block *sb, int wait)
+long writeback_inodes_sb(struct super_block *sb)
{
struct writeback_control wbc = {
- .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
.range_start = 0,
.range_end = LLONG_MAX,
};
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ long nr_to_write;
- if (!wait) {
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-
- wbc.nr_to_write = nr_dirty + nr_unstable +
+ nr_to_write = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
- } else
- wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
- sync_sb_inodes(sb, &wbc);
+ wbc.nr_to_write = nr_to_write;
+ bdi_writeback_all(&wbc);
+ return nr_to_write - wbc.nr_to_write;
}
+EXPORT_SYMBOL(writeback_inodes_sb);
+
+/**
+ * sync_inodes_sb - sync sb inode pages
+ * @sb: the superblock
+ *
+ * This function writes and waits on any dirty inode belonging to this
+ * super_block. The number of pages synced is returned.
+ */
+long sync_inodes_sb(struct super_block *sb)
+{
+ struct writeback_control wbc = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ };
+ long nr_to_write = LONG_MAX; /* doesn't actually matter */
+
+ wbc.nr_to_write = nr_to_write;
+ bdi_writeback_all(&wbc);
+ wait_sb_inodes(&wbc);
+ return nr_to_write - wbc.nr_to_write;
+}
+EXPORT_SYMBOL(sync_inodes_sb);
/**
* write_inode_now - write an inode to disk
@@ -737,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
return ret;
}
EXPORT_SYMBOL(sync_inode);
-
-/**
- * generic_osync_inode - flush all dirty data for a given inode to disk
- * @inode: inode to write
- * @mapping: the address_space that should be flushed
- * @what: what to write and wait upon
- *
- * This can be called by file_write functions for files which have the
- * O_SYNC flag set, to flush dirty writes to disk.
- *
- * @what is a bitmask, specifying which part of the inode's data should be
- * written and waited upon.
- *
- * OSYNC_DATA: i_mapping's dirty data
- * OSYNC_METADATA: the buffers at i_mapping->private_list
- * OSYNC_INODE: the inode itself
- */
-
-int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
-{
- int err = 0;
- int need_write_inode_now = 0;
- int err2;
-
- if (what & OSYNC_DATA)
- err = filemap_fdatawrite(mapping);
- if (what & (OSYNC_METADATA|OSYNC_DATA)) {
- err2 = sync_mapping_buffers(mapping);
- if (!err)
- err = err2;
- }
- if (what & OSYNC_DATA) {
- err2 = filemap_fdatawait(mapping);
- if (!err)
- err = err2;
- }
-
- spin_lock(&inode_lock);
- if ((inode->i_state & I_DIRTY) &&
- ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
- need_write_inode_now = 1;
- spin_unlock(&inode_lock);
-
- if (need_write_inode_now) {
- err2 = write_inode_now(inode, 1);
- if (!err)
- err = err2;
- }
- else
- inode_sync_wait(inode);
-
- return err;
-}
-EXPORT_SYMBOL(generic_osync_inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..4567db6f9430 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -801,6 +801,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
int err;
+ fc->bdi.name = "fuse";
fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
fc->bdi.unplug_io_fn = default_unplug_io_fn;
/* fuse does it's own writeback accounting */
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 3da2f1f4f738..21f7e46da4c0 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,6 +1,6 @@
EXTRA_CFLAGS := -I$(src)
obj-$(CONFIG_GFS2_FS) += gfs2.o
-gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
+gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
glops.o inode.o log.o lops.o main.o meta_io.o \
aops.o dentry.o export.o file.o \
ops_fstype.o ops_inode.o quota.o \
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index fa881bdc3d85..3fc4e3ac7d84 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -19,8 +19,7 @@
#include "gfs2.h"
#include "incore.h"
#include "acl.h"
-#include "eaops.h"
-#include "eattr.h"
+#include "xattr.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
@@ -31,8 +30,7 @@
#define ACL_DEFAULT 0
int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
- struct gfs2_ea_request *er,
- int *remove, mode_t *mode)
+ struct gfs2_ea_request *er, int *remove, mode_t *mode)
{
struct posix_acl *acl;
int error;
@@ -83,30 +81,20 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
return 0;
}
-static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
- struct gfs2_ea_location *el, char **data, unsigned int *len)
+static int acl_get(struct gfs2_inode *ip, const char *name,
+ struct posix_acl **acl, struct gfs2_ea_location *el,
+ char **datap, unsigned int *lenp)
{
- struct gfs2_ea_request er;
- struct gfs2_ea_location el_this;
+ char *data;
+ unsigned int len;
int error;
+ el->el_bh = NULL;
+
if (!ip->i_eattr)
return 0;
- memset(&er, 0, sizeof(struct gfs2_ea_request));
- if (access) {
- er.er_name = GFS2_POSIX_ACL_ACCESS;
- er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
- } else {
- er.er_name = GFS2_POSIX_ACL_DEFAULT;
- er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
- }
- er.er_type = GFS2_EATYPE_SYS;
-
- if (!el)
- el = &el_this;
-
- error = gfs2_ea_find(ip, &er, el);
+ error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el);
if (error)
return error;
if (!el->el_ea)
@@ -114,32 +102,31 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
if (!GFS2_EA_DATA_LEN(el->el_ea))
goto out;
- er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
- er.er_data = kmalloc(er.er_data_len, GFP_NOFS);
+ len = GFS2_EA_DATA_LEN(el->el_ea);
+ data = kmalloc(len, GFP_NOFS);
error = -ENOMEM;
- if (!er.er_data)
+ if (!data)
goto out;
- error = gfs2_ea_get_copy(ip, el, er.er_data);
- if (error)
+ error = gfs2_ea_get_copy(ip, el, data, len);
+ if (error < 0)
goto out_kfree;
+ error = 0;
if (acl) {
- *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
+ *acl = posix_acl_from_xattr(data, len);
if (IS_ERR(*acl))
error = PTR_ERR(*acl);
}
out_kfree:
- if (error || !data)
- kfree(er.er_data);
- else {
- *data = er.er_data;
- *len = er.er_data_len;
+ if (error || !datap) {
+ kfree(data);
+ } else {
+ *datap = data;
+ *lenp = len;
}
out:
- if (error || el == &el_this)
- brelse(el->el_bh);
return error;
}
@@ -153,10 +140,12 @@ out:
int gfs2_check_acl(struct inode *inode, int mask)
{
+ struct gfs2_ea_location el;
struct posix_acl *acl = NULL;
int error;
- error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
+ error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL);
+ brelse(el.el_bh);
if (error)
return error;
@@ -196,10 +185,12 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
{
+ struct gfs2_ea_location el;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct posix_acl *acl = NULL, *clone;
- struct gfs2_ea_request er;
mode_t mode = ip->i_inode.i_mode;
+ char *data = NULL;
+ unsigned int len;
int error;
if (!sdp->sd_args.ar_posix_acl)
@@ -207,11 +198,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
if (S_ISLNK(ip->i_inode.i_mode))
return 0;
- memset(&er, 0, sizeof(struct gfs2_ea_request));
- er.er_type = GFS2_EATYPE_SYS;
-
- error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
- &er.er_data, &er.er_data_len);
+ error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len);
+ brelse(el.el_bh);
if (error)
return error;
if (!acl) {
@@ -229,9 +217,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
acl = clone;
if (S_ISDIR(ip->i_inode.i_mode)) {
- er.er_name = GFS2_POSIX_ACL_DEFAULT;
- er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
- error = gfs2_system_eaops.eo_set(ip, &er);
+ error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
+ GFS2_POSIX_ACL_DEFAULT, data, len, 0);
if (error)
goto out;
}
@@ -239,21 +226,19 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
error = posix_acl_create_masq(acl, &mode);
if (error < 0)
goto out;
- if (error > 0) {
- er.er_name = GFS2_POSIX_ACL_ACCESS;
- er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
- posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
- er.er_mode = mode;
- er.er_flags = GFS2_ERF_MODE;
- error = gfs2_system_eaops.eo_set(ip, &er);
- if (error)
- goto out;
- } else
- munge_mode(ip, mode);
+ if (error == 0)
+ goto munge;
+ posix_acl_to_xattr(acl, data, len);
+ error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
+ GFS2_POSIX_ACL_ACCESS, data, len, 0);
+ if (error)
+ goto out;
+munge:
+ error = munge_mode(ip, mode);
out:
posix_acl_release(acl);
- kfree(er.er_data);
+ kfree(data);
return error;
}
@@ -265,9 +250,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
unsigned int len;
int error;
- error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
+ error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len);
if (error)
- return error;
+ goto out_brelse;
if (!acl)
return gfs2_setattr_simple(ip, attr);
@@ -286,8 +271,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
out:
posix_acl_release(acl);
- brelse(el.el_bh);
kfree(data);
+out_brelse:
+ brelse(el.el_bh);
return error;
}
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 022c66cd5606..91beddadd388 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -107,8 +107,26 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
return 0;
}
+static int gfs2_dentry_delete(struct dentry *dentry)
+{
+ struct gfs2_inode *ginode;
+
+ if (!dentry->d_inode)
+ return 0;
+
+ ginode = GFS2_I(dentry->d_inode);
+ if (!ginode->i_iopen_gh.gh_gl)
+ return 0;
+
+ if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
+ return 1;
+
+ return 0;
+}
+
const struct dentry_operations gfs2_dops = {
.d_revalidate = gfs2_drevalidate,
.d_hash = gfs2_dhash,
+ .d_delete = gfs2_dentry_delete,
};
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
deleted file mode 100644
index dee9b03e5b37..000000000000
--- a/fs/gfs2/eaops.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/gfs2_ondisk.h>
-#include <asm/uaccess.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "acl.h"
-#include "eaops.h"
-#include "eattr.h"
-#include "util.h"
-
-/**
- * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
- * @namep: ea name, possibly with type appended
- *
- * Returns: GFS2_EATYPE_XXX
- */
-
-unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
-{
- unsigned int type;
-
- if (strncmp(name, "system.", 7) == 0) {
- type = GFS2_EATYPE_SYS;
- if (truncated_name)
- *truncated_name = name + sizeof("system.") - 1;
- } else if (strncmp(name, "user.", 5) == 0) {
- type = GFS2_EATYPE_USR;
- if (truncated_name)
- *truncated_name = name + sizeof("user.") - 1;
- } else if (strncmp(name, "security.", 9) == 0) {
- type = GFS2_EATYPE_SECURITY;
- if (truncated_name)
- *truncated_name = name + sizeof("security.") - 1;
- } else {
- type = GFS2_EATYPE_UNUSED;
- if (truncated_name)
- *truncated_name = NULL;
- }
-
- return type;
-}
-
-static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
- if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
- !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 &&
- (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
- GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
- return -EOPNOTSUPP;
-
- return gfs2_ea_get_i(ip, er);
-}
-
-static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
- int remove = 0;
- int error;
-
- if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
- if (!(er->er_flags & GFS2_ERF_MODE)) {
- er->er_mode = ip->i_inode.i_mode;
- er->er_flags |= GFS2_ERF_MODE;
- }
- error = gfs2_acl_validate_set(ip, 1, er,
- &remove, &er->er_mode);
- if (error)
- return error;
- error = gfs2_ea_set_i(ip, er);
- if (error)
- return error;
- if (remove)
- gfs2_ea_remove_i(ip, er);
- return 0;
-
- } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
- error = gfs2_acl_validate_set(ip, 0, er,
- &remove, NULL);
- if (error)
- return error;
- if (!remove)
- error = gfs2_ea_set_i(ip, er);
- else {
- error = gfs2_ea_remove_i(ip, er);
- if (error == -ENODATA)
- error = 0;
- }
- return error;
- }
-
- return -EPERM;
-}
-
-static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
- if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
- int error = gfs2_acl_validate_remove(ip, 1);
- if (error)
- return error;
-
- } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
- int error = gfs2_acl_validate_remove(ip, 0);
- if (error)
- return error;
-
- } else
- return -EPERM;
-
- return gfs2_ea_remove_i(ip, er);
-}
-
-static const struct gfs2_eattr_operations gfs2_user_eaops = {
- .eo_get = gfs2_ea_get_i,
- .eo_set = gfs2_ea_set_i,
- .eo_remove = gfs2_ea_remove_i,
- .eo_name = "user",
-};
-
-const struct gfs2_eattr_operations gfs2_system_eaops = {
- .eo_get = system_eo_get,
- .eo_set = system_eo_set,
- .eo_remove = system_eo_remove,
- .eo_name = "system",
-};
-
-static const struct gfs2_eattr_operations gfs2_security_eaops = {
- .eo_get = gfs2_ea_get_i,
- .eo_set = gfs2_ea_set_i,
- .eo_remove = gfs2_ea_remove_i,
- .eo_name = "security",
-};
-
-const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
- NULL,
- &gfs2_user_eaops,
- &gfs2_system_eaops,
- &gfs2_security_eaops,
-};
-
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
deleted file mode 100644
index da2f7fbbb40d..000000000000
--- a/fs/gfs2/eaops.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __EAOPS_DOT_H__
-#define __EAOPS_DOT_H__
-
-struct gfs2_ea_request;
-struct gfs2_inode;
-
-struct gfs2_eattr_operations {
- int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
- int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
- int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
- char *eo_name;
-};
-
-unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
-
-extern const struct gfs2_eattr_operations gfs2_system_eaops;
-
-extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
-
-#endif /* __EAOPS_DOT_H__ */
-
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9200ef221716..d15876e9aa26 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -143,17 +143,14 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
}
static struct dentry *gfs2_get_dentry(struct super_block *sb,
- struct gfs2_inum_host *inum)
+ struct gfs2_inum_host *inum)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_holder i_gh, ri_gh, rgd_gh;
- struct gfs2_rgrpd *rgd;
+ struct gfs2_holder i_gh;
struct inode *inode;
struct dentry *dentry;
int error;
- /* System files? */
-
inode = gfs2_ilookup(sb, inum->no_addr);
if (inode) {
if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
@@ -168,29 +165,11 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
if (error)
return ERR_PTR(error);
- error = gfs2_rindex_hold(sdp, &ri_gh);
+ error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
if (error)
goto fail;
- error = -EINVAL;
- rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
- if (!rgd)
- goto fail_rindex;
-
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
- if (error)
- goto fail_rindex;
-
- error = -ESTALE;
- if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
- goto fail_rgd;
-
- gfs2_glock_dq_uninit(&rgd_gh);
- gfs2_glock_dq_uninit(&ri_gh);
-
- inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
- inum->no_addr,
- 0, 0);
+ inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0, 0);
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
goto fail;
@@ -224,13 +203,6 @@ out_inode:
if (!IS_ERR(dentry))
dentry->d_op = &gfs2_dops;
return dentry;
-
-fail_rgd:
- gfs2_glock_dq_uninit(&rgd_gh);
-
-fail_rindex:
- gfs2_glock_dq_uninit(&ri_gh);
-
fail:
gfs2_glock_dq_uninit(&i_gh);
return ERR_PTR(error);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 73318a3ce6f1..166f38fbd246 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -38,7 +38,6 @@
#include "rgrp.h"
#include "trans.h"
#include "util.h"
-#include "eaops.h"
/**
* gfs2_llseek - seek to a location in a file
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 61801ada36f0..6edb423f90b3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -406,6 +406,12 @@ struct gfs2_statfs_change_host {
#define GFS2_DATA_WRITEBACK 1
#define GFS2_DATA_ORDERED 2
+#define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW
+#define GFS2_ERRORS_WITHDRAW 0
+#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */
+#define GFS2_ERRORS_RO 2 /* place holder for future feature */
+#define GFS2_ERRORS_PANIC 3
+
struct gfs2_args {
char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
@@ -422,6 +428,7 @@ struct gfs2_args {
unsigned int ar_data:2; /* ordered/writeback */
unsigned int ar_meta:1; /* mount metafs */
unsigned int ar_discard:1; /* discard requests */
+ unsigned int ar_errors:2; /* errors=withdraw | panic */
int ar_commit; /* Commit interval */
};
@@ -489,7 +496,6 @@ struct gfs2_sb_host {
*/
struct lm_lockstruct {
- u32 ls_id;
unsigned int ls_jid;
unsigned int ls_first;
unsigned int ls_first_done;
@@ -541,18 +547,12 @@ struct gfs2_sbd {
struct dentry *sd_root_dir;
struct inode *sd_jindex;
- struct inode *sd_inum_inode;
struct inode *sd_statfs_inode;
- struct inode *sd_ir_inode;
struct inode *sd_sc_inode;
struct inode *sd_qc_inode;
struct inode *sd_rindex;
struct inode *sd_quota_inode;
- /* Inum stuff */
-
- struct mutex sd_inum_mutex;
-
/* StatFS stuff */
spinlock_t sd_statfs_spin;
@@ -580,7 +580,6 @@ struct gfs2_sbd {
struct gfs2_holder sd_journal_gh;
struct gfs2_holder sd_jinode_gh;
- struct gfs2_holder sd_ir_gh;
struct gfs2_holder sd_sc_gh;
struct gfs2_holder sd_qc_gh;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2f94bd723698..fb15d3b1f409 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -24,7 +24,7 @@
#include "acl.h"
#include "bmap.h"
#include "dir.h"
-#include "eattr.h"
+#include "xattr.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
@@ -519,139 +519,6 @@ out:
return inode ? inode : ERR_PTR(error);
}
-static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
-{
- const struct gfs2_inum_range *str = buf;
-
- ir->ir_start = be64_to_cpu(str->ir_start);
- ir->ir_length = be64_to_cpu(str->ir_length);
-}
-
-static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
-{
- struct gfs2_inum_range *str = buf;
-
- str->ir_start = cpu_to_be64(ir->ir_start);
- str->ir_length = cpu_to_be64(ir->ir_length);
-}
-
-static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
-{
- struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
- struct buffer_head *bh;
- struct gfs2_inum_range_host ir;
- int error;
-
- error = gfs2_trans_begin(sdp, RES_DINODE, 0);
- if (error)
- return error;
- mutex_lock(&sdp->sd_inum_mutex);
-
- error = gfs2_meta_inode_buffer(ip, &bh);
- if (error) {
- mutex_unlock(&sdp->sd_inum_mutex);
- gfs2_trans_end(sdp);
- return error;
- }
-
- gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
-
- if (ir.ir_length) {
- *formal_ino = ir.ir_start++;
- ir.ir_length--;
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_inum_range_out(&ir,
- bh->b_data + sizeof(struct gfs2_dinode));
- brelse(bh);
- mutex_unlock(&sdp->sd_inum_mutex);
- gfs2_trans_end(sdp);
- return 0;
- }
-
- brelse(bh);
-
- mutex_unlock(&sdp->sd_inum_mutex);
- gfs2_trans_end(sdp);
-
- return 1;
-}
-
-static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
-{
- struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
- struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
- struct gfs2_holder gh;
- struct buffer_head *bh;
- struct gfs2_inum_range_host ir;
- int error;
-
- error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- if (error)
- return error;
-
- error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
- if (error)
- goto out;
- mutex_lock(&sdp->sd_inum_mutex);
-
- error = gfs2_meta_inode_buffer(ip, &bh);
- if (error)
- goto out_end_trans;
-
- gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
-
- if (!ir.ir_length) {
- struct buffer_head *m_bh;
- u64 x, y;
- __be64 z;
-
- error = gfs2_meta_inode_buffer(m_ip, &m_bh);
- if (error)
- goto out_brelse;
-
- z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
- x = y = be64_to_cpu(z);
- ir.ir_start = x;
- ir.ir_length = GFS2_INUM_QUANTUM;
- x += GFS2_INUM_QUANTUM;
- if (x < y)
- gfs2_consist_inode(m_ip);
- z = cpu_to_be64(x);
- gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
- *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z;
-
- brelse(m_bh);
- }
-
- *formal_ino = ir.ir_start++;
- ir.ir_length--;
-
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
-
-out_brelse:
- brelse(bh);
-out_end_trans:
- mutex_unlock(&sdp->sd_inum_mutex);
- gfs2_trans_end(sdp);
-out:
- gfs2_glock_dq_uninit(&gh);
- return error;
-}
-
-static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum)
-{
- int error;
-
- error = pick_formal_ino_1(sdp, inum);
- if (error <= 0)
- return error;
-
- error = pick_formal_ino_2(sdp, inum);
-
- return error;
-}
-
/**
* create_ok - OK to create a new on-disk inode here?
* @dip: Directory in which dinode is to be created
@@ -731,7 +598,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
if (error)
goto out_ipreserv;
- *no_addr = gfs2_alloc_di(dip, generation);
+ error = gfs2_alloc_di(dip, no_addr, generation);
gfs2_trans_end(sdp);
@@ -924,7 +791,6 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
size_t len;
void *value;
char *name;
- struct gfs2_ea_request er;
err = security_inode_init_security(&ip->i_inode, &dip->i_inode,
&name, &value, &len);
@@ -935,16 +801,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
return err;
}
- memset(&er, 0, sizeof(struct gfs2_ea_request));
-
- er.er_type = GFS2_EATYPE_SECURITY;
- er.er_name = name;
- er.er_data = value;
- er.er_name_len = strlen(name);
- er.er_data_len = len;
-
- err = gfs2_ea_set_i(ip, &er);
-
+ err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
kfree(value);
kfree(name);
@@ -991,13 +848,10 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock;
- error = pick_formal_ino(sdp, &inum.no_formal_ino);
- if (error)
- goto fail_gunlock;
-
error = alloc_dinode(dip, &inum.no_addr, &generation);
if (error)
goto fail_gunlock;
+ inum.no_formal_ino = generation;
error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops,
LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
@@ -1008,9 +862,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock2;
- inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
- inum.no_addr,
- inum.no_formal_ino, 0);
+ inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
+ inum.no_formal_ino, 0);
if (IS_ERR(inode))
goto fail_gunlock2;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7bc3c45cd676..52fb6c048981 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -84,7 +84,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
gfs2_tune_init(&sdp->sd_tune);
- mutex_init(&sdp->sd_inum_mutex);
spin_lock_init(&sdp->sd_statfs_spin);
spin_lock_init(&sdp->sd_rindex_spin);
@@ -833,21 +832,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
if (error)
goto fail;
- /* Read in the master inode number inode */
- sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
- if (IS_ERR(sdp->sd_inum_inode)) {
- error = PTR_ERR(sdp->sd_inum_inode);
- fs_err(sdp, "can't read in inum inode: %d\n", error);
- goto fail_journal;
- }
-
-
/* Read in the master statfs inode */
sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
if (IS_ERR(sdp->sd_statfs_inode)) {
error = PTR_ERR(sdp->sd_statfs_inode);
fs_err(sdp, "can't read in statfs inode: %d\n", error);
- goto fail_inum;
+ goto fail_journal;
}
/* Read in the resource index inode */
@@ -876,8 +866,6 @@ fail_rindex:
iput(sdp->sd_rindex);
fail_statfs:
iput(sdp->sd_statfs_inode);
-fail_inum:
- iput(sdp->sd_inum_inode);
fail_journal:
init_journal(sdp, UNDO);
fail:
@@ -905,20 +893,12 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
return error;
}
- sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
- sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
- if (IS_ERR(sdp->sd_ir_inode)) {
- error = PTR_ERR(sdp->sd_ir_inode);
- fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
- goto fail;
- }
-
sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
if (IS_ERR(sdp->sd_sc_inode)) {
error = PTR_ERR(sdp->sd_sc_inode);
fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
- goto fail_ir_i;
+ goto fail;
}
sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
@@ -932,27 +912,16 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
iput(pn);
pn = NULL;
- ip = GFS2_I(sdp->sd_ir_inode);
- error = gfs2_glock_nq_init(ip->i_gl,
- LM_ST_EXCLUSIVE, 0,
- &sdp->sd_ir_gh);
- if (error) {
- fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
- goto fail_qc_i;
- }
-
ip = GFS2_I(sdp->sd_sc_inode);
- error = gfs2_glock_nq_init(ip->i_gl,
- LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
&sdp->sd_sc_gh);
if (error) {
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
- goto fail_ir_gh;
+ goto fail_qc_i;
}
ip = GFS2_I(sdp->sd_qc_inode);
- error = gfs2_glock_nq_init(ip->i_gl,
- LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
&sdp->sd_qc_gh);
if (error) {
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
@@ -965,14 +934,10 @@ fail_qc_gh:
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
fail_ut_gh:
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
-fail_ir_gh:
- gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
fail_qc_i:
iput(sdp->sd_qc_inode);
fail_ut_i:
iput(sdp->sd_sc_inode);
-fail_ir_i:
- iput(sdp->sd_ir_inode);
fail:
if (pn)
iput(pn);
@@ -1063,7 +1028,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
ls->ls_ops = lm;
ls->ls_first = 1;
- ls->ls_id = 0;
for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
substring_t tmp[MAX_OPT_ARGS];
@@ -1081,10 +1045,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
ls->ls_jid = option;
break;
case Opt_id:
- ret = match_int(&tmp[0], &option);
- if (ret)
- goto hostdata_error;
- ls->ls_id = option;
+ /* Obsolete, but left for backward compat purposes */
break;
case Opt_first:
ret = match_int(&tmp[0], &option);
@@ -1133,6 +1094,17 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
lm->lm_unmount(sdp);
}
+void gfs2_online_uevent(struct gfs2_sbd *sdp)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ char ro[20];
+ char spectator[20];
+ char *envp[] = { ro, spectator, NULL };
+ sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
+ sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
+ kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
+}
+
/**
* fill_super - Read in superblock
* @sb: The VFS superblock
@@ -1157,6 +1129,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
sdp->sd_args.ar_commit = 60;
+ sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT;
error = gfs2_mount_args(sdp, &sdp->sd_args, data);
if (error) {
@@ -1174,6 +1147,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_export_op = &gfs2_export_ops;
+ sb->s_xattr = gfs2_xattr_handlers;
sb->s_time_gran = 1;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -1236,7 +1210,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
}
gfs2_glock_dq_uninit(&mount_gh);
-
+ gfs2_online_uevent(sdp);
return 0;
fail_threads:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f8bd20baf99c..c3ac18054057 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -26,8 +26,7 @@
#include "acl.h"
#include "bmap.h"
#include "dir.h"
-#include "eaops.h"
-#include "eattr.h"
+#include "xattr.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
@@ -349,7 +348,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
- goto out_rgrp;
+ goto out_gunlock;
error = gfs2_dir_del(dip, &dentry->d_name);
if (error)
@@ -1302,60 +1301,53 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
const void *data, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
- struct gfs2_ea_request er;
-
- memset(&er, 0, sizeof(struct gfs2_ea_request));
- er.er_type = gfs2_ea_name2type(name, &er.er_name);
- if (er.er_type == GFS2_EATYPE_UNUSED)
- return -EOPNOTSUPP;
- er.er_data = (char *)data;
- er.er_name_len = strlen(er.er_name);
- er.er_data_len = size;
- er.er_flags = flags;
-
- gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE));
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
- return gfs2_ea_set(GFS2_I(inode), &er);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret == 0) {
+ ret = generic_setxattr(dentry, name, data, size, flags);
+ gfs2_glock_dq(&gh);
+ }
+ gfs2_holder_uninit(&gh);
+ return ret;
}
static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
void *data, size_t size)
{
- struct gfs2_ea_request er;
-
- memset(&er, 0, sizeof(struct gfs2_ea_request));
- er.er_type = gfs2_ea_name2type(name, &er.er_name);
- if (er.er_type == GFS2_EATYPE_UNUSED)
- return -EOPNOTSUPP;
- er.er_data = data;
- er.er_name_len = strlen(er.er_name);
- er.er_data_len = size;
-
- return gfs2_ea_get(GFS2_I(dentry->d_inode), &er);
-}
-
-static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct gfs2_ea_request er;
-
- memset(&er, 0, sizeof(struct gfs2_ea_request));
- er.er_data = (size) ? buffer : NULL;
- er.er_data_len = size;
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
- return gfs2_ea_list(GFS2_I(dentry->d_inode), &er);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret == 0) {
+ ret = generic_getxattr(dentry, name, data, size);
+ gfs2_glock_dq(&gh);
+ }
+ gfs2_holder_uninit(&gh);
+ return ret;
}
static int gfs2_removexattr(struct dentry *dentry, const char *name)
{
- struct gfs2_ea_request er;
-
- memset(&er, 0, sizeof(struct gfs2_ea_request));
- er.er_type = gfs2_ea_name2type(name, &er.er_name);
- if (er.er_type == GFS2_EATYPE_UNUSED)
- return -EOPNOTSUPP;
- er.er_name_len = strlen(er.er_name);
+ struct inode *inode = dentry->d_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
- return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret == 0) {
+ ret = generic_removexattr(dentry, name);
+ gfs2_glock_dq(&gh);
+ }
+ gfs2_holder_uninit(&gh);
+ return ret;
}
static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fba795798d3a..28c590b7c9da 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -857,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
goto start_new_extent;
if ((start + nr_sects) != blk) {
rv = blkdev_issue_discard(bdev, start,
- nr_sects, GFP_NOFS);
+ nr_sects, GFP_NOFS,
+ DISCARD_FL_BARRIER);
if (rv)
goto fail;
nr_sects = 0;
@@ -871,7 +872,8 @@ start_new_extent:
}
}
if (nr_sects) {
- rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
+ rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
+ DISCARD_FL_BARRIER);
if (rv)
goto fail;
}
@@ -1256,7 +1258,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
* Returns: The block type (GFS2_BLKST_*)
*/
-unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
+static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
{
struct gfs2_bitmap *bi = NULL;
u32 length, rgrp_block, buf_block;
@@ -1459,6 +1461,16 @@ int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
return 0;
}
+static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
+{
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
+ fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
+ (unsigned long long)rgd->rd_addr);
+ fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
+ gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ rgd->rd_flags |= GFS2_RDF_ERROR;
+}
+
/**
* gfs2_alloc_block - Allocate one or more blocks
* @ip: the inode to allocate the block for
@@ -1520,22 +1532,20 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
return 0;
rgrp_error:
- fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
- (unsigned long long)rgd->rd_addr);
- fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
- rgd->rd_flags |= GFS2_RDF_ERROR;
+ gfs2_rgrp_error(rgd);
return -EIO;
}
/**
* gfs2_alloc_di - Allocate a dinode
* @dip: the directory that the inode is going in
+ * @bn: the block number which is allocated
+ * @generation: the generation number of the inode
*
- * Returns: the block allocated
+ * Returns: 0 on success or error
*/
-u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
+int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_alloc *al = dip->i_alloc;
@@ -1546,16 +1556,21 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
blk = rgblk_search(rgd, rgd->rd_last_alloc,
GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
- BUG_ON(blk == BFITNOENT);
- rgd->rd_last_alloc = blk;
+ /* Since all blocks are reserved in advance, this shouldn't happen */
+ if (blk == BFITNOENT)
+ goto rgrp_error;
+ rgd->rd_last_alloc = blk;
block = rgd->rd_data0 + blk;
+ if (rgd->rd_free == 0)
+ goto rgrp_error;
- gfs2_assert_withdraw(sdp, rgd->rd_free);
rgd->rd_free--;
rgd->rd_dinodes++;
*generation = rgd->rd_igeneration++;
+ if (*generation == 0)
+ *generation = rgd->rd_igeneration++;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
@@ -1568,7 +1583,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
rgd->rd_free_clone--;
spin_unlock(&sdp->sd_rindex_spin);
trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
- return block;
+ *bn = block;
+ return 0;
+
+rgrp_error:
+ gfs2_rgrp_error(rgd);
+ return -EIO;
}
/**
@@ -1676,6 +1696,46 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
}
/**
+ * gfs2_check_blk_type - Check the type of a block
+ * @sdp: The superblock
+ * @no_addr: The block number to check
+ * @type: The block type we are looking for
+ *
+ * Returns: 0 if the block type matches the expected type
+ * -ESTALE if it doesn't match
+ * or -ve errno if something went wrong while checking
+ */
+
+int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
+{
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh, rgd_gh;
+ int error;
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ goto fail;
+
+ error = -EINVAL;
+ rgd = gfs2_blk2rgrpd(sdp, no_addr);
+ if (!rgd)
+ goto fail_rindex;
+
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
+ if (error)
+ goto fail_rindex;
+
+ if (gfs2_get_block_type(rgd, no_addr) != type)
+ error = -ESTALE;
+
+ gfs2_glock_dq_uninit(&rgd_gh);
+fail_rindex:
+ gfs2_glock_dq_uninit(&ri_gh);
+fail:
+ return error;
+}
+
+/**
* gfs2_rlist_add - add a RG to a list of RGs
* @sdp: the filesystem
* @rlist: the list of resource groups
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 1e76ff0f3e00..b4106ddaaa98 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -44,15 +44,15 @@ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
extern void gfs2_inplace_release(struct gfs2_inode *ip);
-extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
-
extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
-extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
+extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
extern void gfs2_unlink_di(struct inode *inode);
+extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
+ unsigned int type);
struct gfs2_rgrp_list {
unsigned int rl_rgrps;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index f522bb017973..0ec3ec672de1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -38,7 +38,7 @@
#include "trans.h"
#include "util.h"
#include "sys.h"
-#include "eattr.h"
+#include "xattr.h"
#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
@@ -68,6 +68,8 @@ enum {
Opt_discard,
Opt_nodiscard,
Opt_commit,
+ Opt_err_withdraw,
+ Opt_err_panic,
Opt_error,
};
@@ -97,6 +99,8 @@ static const match_table_t tokens = {
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_commit, "commit=%d"},
+ {Opt_err_withdraw, "errors=withdraw"},
+ {Opt_err_panic, "errors=panic"},
{Opt_error, NULL}
};
@@ -152,6 +156,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
args->ar_localcaching = 1;
break;
case Opt_debug:
+ if (args->ar_errors == GFS2_ERRORS_PANIC) {
+ fs_info(sdp, "-o debug and -o errors=panic "
+ "are mutually exclusive.\n");
+ return -EINVAL;
+ }
args->ar_debug = 1;
break;
case Opt_nodebug:
@@ -205,6 +214,17 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
return rv ? rv : -EINVAL;
}
break;
+ case Opt_err_withdraw:
+ args->ar_errors = GFS2_ERRORS_WITHDRAW;
+ break;
+ case Opt_err_panic:
+ if (args->ar_debug) {
+ fs_info(sdp, "-o debug and -o errors=panic "
+ "are mutually exclusive.\n");
+ return -EINVAL;
+ }
+ args->ar_errors = GFS2_ERRORS_PANIC;
+ break;
case Opt_error:
default:
fs_info(sdp, "invalid mount option: %s\n", o);
@@ -768,7 +788,6 @@ restart:
/* Release stuff */
iput(sdp->sd_jindex);
- iput(sdp->sd_inum_inode);
iput(sdp->sd_statfs_inode);
iput(sdp->sd_rindex);
iput(sdp->sd_quota_inode);
@@ -779,10 +798,8 @@ restart:
if (!sdp->sd_args.ar_spectator) {
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
- gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
- iput(sdp->sd_ir_inode);
iput(sdp->sd_sc_inode);
iput(sdp->sd_qc_inode);
}
@@ -1084,6 +1101,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
gt->gt_log_flush_secs = args.ar_commit;
spin_unlock(&gt->gt_spin);
+ gfs2_online_uevent(sdp);
return 0;
}
@@ -1225,6 +1243,22 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
lfsecs = sdp->sd_tune.gt_log_flush_secs;
if (lfsecs != 60)
seq_printf(s, ",commit=%d", lfsecs);
+ if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
+ const char *state;
+
+ switch (args->ar_errors) {
+ case GFS2_ERRORS_WITHDRAW:
+ state = "withdraw";
+ break;
+ case GFS2_ERRORS_PANIC:
+ state = "panic";
+ break;
+ default:
+ state = "unknown";
+ break;
+ }
+ seq_printf(s, ",errors=%s", state);
+ }
return 0;
}
@@ -1252,6 +1286,10 @@ static void gfs2_delete_inode(struct inode *inode)
goto out;
}
+ error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
+ if (error)
+ goto out_truncate;
+
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
error = gfs2_glock_nq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 22e0417ed996..235db3682885 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -25,7 +25,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
return x;
}
-void gfs2_jindex_free(struct gfs2_sbd *sdp);
+extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
@@ -36,7 +36,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-
+extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes);
@@ -54,6 +54,7 @@ extern struct file_system_type gfs2meta_fs_type;
extern const struct export_operations gfs2_export_ops;
extern const struct super_operations gfs2_super_ops;
extern const struct dentry_operations gfs2_dops;
+extern struct xattr_handler *gfs2_xattr_handlers[];
#endif /* __SUPER_DOT_H__ */
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 23419dc3027b..446329728d52 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -16,6 +16,7 @@
#include <linux/kobject.h>
#include <asm/uaccess.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/genhd.h>
#include "gfs2.h"
#include "incore.h"
@@ -319,12 +320,6 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
return ret;
}
-static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
-{
- struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- return sprintf(buf, "%u\n", ls->ls_id);
-}
-
static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -386,22 +381,20 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
#define GDLM_ATTR(_name,_mode,_show,_store) \
static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
-GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
-GDLM_ATTR(block, 0644, block_show, block_store);
-GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
-GDLM_ATTR(id, 0444, lkid_show, NULL);
-GDLM_ATTR(jid, 0444, jid_show, NULL);
-GDLM_ATTR(first, 0444, lkfirst_show, NULL);
-GDLM_ATTR(first_done, 0444, first_done_show, NULL);
-GDLM_ATTR(recover, 0200, NULL, recover_store);
-GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
-GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
+GDLM_ATTR(block, 0644, block_show, block_store);
+GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
+GDLM_ATTR(jid, 0444, jid_show, NULL);
+GDLM_ATTR(first, 0444, lkfirst_show, NULL);
+GDLM_ATTR(first_done, 0444, first_done_show, NULL);
+GDLM_ATTR(recover, 0600, NULL, recover_store);
+GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
+GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
static struct attribute *lock_module_attrs[] = {
&gdlm_attr_proto_name.attr,
&gdlm_attr_block.attr,
&gdlm_attr_withdraw.attr,
- &gdlm_attr_id.attr,
&gdlm_attr_jid.attr,
&gdlm_attr_first.attr,
&gdlm_attr_first_done.attr,
@@ -519,7 +512,14 @@ static struct attribute_group lock_module_group = {
int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
{
+ struct super_block *sb = sdp->sd_vfs;
int error;
+ char ro[20];
+ char spectator[20];
+ char *envp[] = { ro, spectator, NULL };
+
+ sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
+ sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
sdp->sd_kobj.kset = gfs2_kset;
error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
@@ -535,9 +535,17 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
if (error)
goto fail_tune;
- kobject_uevent(&sdp->sd_kobj, KOBJ_ADD);
+ error = sysfs_create_link(&sdp->sd_kobj,
+ &disk_to_dev(sb->s_bdev->bd_disk)->kobj,
+ "device");
+ if (error)
+ goto fail_lock_module;
+
+ kobject_uevent_env(&sdp->sd_kobj, KOBJ_ADD, envp);
return 0;
+fail_lock_module:
+ sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
fail_tune:
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_reg:
@@ -549,12 +557,12 @@ fail:
void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
{
+ sysfs_remove_link(&sdp->sd_kobj, "device");
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
}
-
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
@@ -563,6 +571,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
+ if (!sdp->sd_args.ar_spectator)
+ add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
if (gfs2_uuid_valid(uuid)) {
add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
"%02X%02X-%02X%02X%02X%02X%02X%02X",
@@ -578,7 +588,6 @@ static struct kset_uevent_ops gfs2_uevent_ops = {
.uevent = gfs2_uevent,
};
-
int gfs2_sys_init(void)
{
gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 9d12b1118ba0..f6a7efa34eb9 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -38,24 +38,30 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
const struct lm_lockops *lm = ls->ls_ops;
va_list args;
- if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
+ test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return 0;
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
- fs_err(sdp, "about to withdraw this file system\n");
- BUG_ON(sdp->sd_args.ar_debug);
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
+ fs_err(sdp, "about to withdraw this file system\n");
+ BUG_ON(sdp->sd_args.ar_debug);
- kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+ kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
- if (lm->lm_unmount) {
- fs_err(sdp, "telling LM to unmount\n");
- lm->lm_unmount(sdp);
+ if (lm->lm_unmount) {
+ fs_err(sdp, "telling LM to unmount\n");
+ lm->lm_unmount(sdp);
+ }
+ fs_err(sdp, "withdrawn\n");
+ dump_stack();
}
- fs_err(sdp, "withdrawn\n");
- dump_stack();
+
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
+ panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname);
return -1;
}
@@ -93,17 +99,24 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
gfs2_tune_get(sdp, gt_complain_secs) * HZ))
return -2;
- printk(KERN_WARNING
- "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
- "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
- sdp->sd_fsname, assertion,
- sdp->sd_fsname, function, file, line);
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
+ printk(KERN_WARNING
+ "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
+ "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
+ sdp->sd_fsname, assertion,
+ sdp->sd_fsname, function, file, line);
if (sdp->sd_args.ar_debug)
BUG();
else
dump_stack();
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
+ panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
+ "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
+ sdp->sd_fsname, assertion,
+ sdp->sd_fsname, function, file, line);
+
sdp->sd_last_warning = jiffies;
return -1;
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/xattr.c
index 07ea9529adda..8a0f8ef6ee27 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/xattr.c
@@ -18,8 +18,7 @@
#include "gfs2.h"
#include "incore.h"
#include "acl.h"
-#include "eaops.h"
-#include "eattr.h"
+#include "xattr.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
@@ -38,26 +37,32 @@
* Returns: 1 if the EA should be stuffed
*/
-static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
+static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize,
unsigned int *size)
{
- *size = GFS2_EAREQ_SIZE_STUFFED(er);
- if (*size <= sdp->sd_jbsize)
+ unsigned int jbsize = sdp->sd_jbsize;
+
+ /* Stuffed */
+ *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8);
+
+ if (*size <= jbsize)
return 1;
- *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
+ /* Unstuffed */
+ *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize +
+ (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8);
return 0;
}
-static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
+static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
{
unsigned int size;
- if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
+ if (dsize > GFS2_EA_MAX_DATA_LEN)
return -ERANGE;
- ea_calc_size(sdp, er, &size);
+ ea_calc_size(sdp, nsize, dsize, &size);
/* This can only happen with 512 byte blocks */
if (size > sdp->sd_jbsize)
@@ -151,7 +156,9 @@ out:
}
struct ea_find {
- struct gfs2_ea_request *ef_er;
+ int type;
+ const char *name;
+ size_t namel;
struct gfs2_ea_location *ef_el;
};
@@ -160,14 +167,13 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
void *private)
{
struct ea_find *ef = private;
- struct gfs2_ea_request *er = ef->ef_er;
if (ea->ea_type == GFS2_EATYPE_UNUSED)
return 0;
- if (ea->ea_type == er->er_type) {
- if (ea->ea_name_len == er->er_name_len &&
- !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
+ if (ea->ea_type == ef->type) {
+ if (ea->ea_name_len == ef->namel &&
+ !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) {
struct gfs2_ea_location *el = ef->ef_el;
get_bh(bh);
el->el_bh = bh;
@@ -180,13 +186,15 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
return 0;
}
-int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
+int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
struct gfs2_ea_location *el)
{
struct ea_find ef;
int error;
- ef.ef_er = er;
+ ef.type = type;
+ ef.name = name;
+ ef.namel = strlen(name);
ef.ef_el = el;
memset(el, 0, sizeof(struct gfs2_ea_location));
@@ -344,6 +352,20 @@ struct ea_list {
unsigned int ei_size;
};
+static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
+{
+ switch (ea->ea_type) {
+ case GFS2_EATYPE_USR:
+ return 5 + ea->ea_name_len + 1;
+ case GFS2_EATYPE_SYS:
+ return 7 + ea->ea_name_len + 1;
+ case GFS2_EATYPE_SECURITY:
+ return 9 + ea->ea_name_len + 1;
+ default:
+ return 0;
+ }
+}
+
static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
void *private)
@@ -392,21 +414,25 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
}
/**
- * gfs2_ea_list -
- * @ip:
- * @er:
+ * gfs2_listxattr - List gfs2 extended attributes
+ * @dentry: The dentry whose inode we are interested in
+ * @buffer: The buffer to write the results
+ * @size: The size of the buffer
*
* Returns: actual size of data on success, -errno on error
*/
-int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
+ struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+ struct gfs2_ea_request er;
struct gfs2_holder i_gh;
int error;
- if (!er->er_data || !er->er_data_len) {
- er->er_data = NULL;
- er->er_data_len = 0;
+ memset(&er, 0, sizeof(struct gfs2_ea_request));
+ if (size) {
+ er.er_data = buffer;
+ er.er_data_len = size;
}
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
@@ -414,7 +440,7 @@ int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
return error;
if (ip->i_eattr) {
- struct ea_list ei = { .ei_er = er, .ei_size = 0 };
+ struct ea_list ei = { .ei_er = &er, .ei_size = 0 };
error = ea_foreach(ip, ea_list_i, &ei);
if (!error)
@@ -491,84 +517,61 @@ out:
}
int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
- char *data)
+ char *data, size_t size)
{
+ int ret;
+ size_t len = GFS2_EA_DATA_LEN(el->el_ea);
+ if (len > size)
+ return -ERANGE;
+
if (GFS2_EA_IS_STUFFED(el->el_ea)) {
- memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea));
- return 0;
- } else
- return ea_get_unstuffed(ip, el->el_ea, data);
+ memcpy(data, GFS2_EA2DATA(el->el_ea), len);
+ return len;
+ }
+ ret = ea_get_unstuffed(ip, el->el_ea, data);
+ if (ret < 0)
+ return ret;
+ return len;
}
/**
- * gfs2_ea_get_i -
- * @ip: The GFS2 inode
- * @er: The request structure
+ * gfs2_xattr_get - Get a GFS2 extended attribute
+ * @inode: The inode
+ * @type: The type of extended attribute
+ * @name: The name of the extended attribute
+ * @buffer: The buffer to write the result into
+ * @size: The size of the buffer
*
* Returns: actual size of data on success, -errno on error
*/
-int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+int gfs2_xattr_get(struct inode *inode, int type, const char *name,
+ void *buffer, size_t size)
{
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_ea_location el;
int error;
if (!ip->i_eattr)
return -ENODATA;
+ if (strlen(name) > GFS2_EA_MAX_NAME_LEN)
+ return -EINVAL;
- error = gfs2_ea_find(ip, er, &el);
+ error = gfs2_ea_find(ip, type, name, &el);
if (error)
return error;
if (!el.el_ea)
return -ENODATA;
-
- if (er->er_data_len) {
- if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
- error = -ERANGE;
- else
- error = gfs2_ea_get_copy(ip, &el, er->er_data);
- }
- if (!error)
+ if (size)
+ error = gfs2_ea_get_copy(ip, &el, buffer, size);
+ else
error = GFS2_EA_DATA_LEN(el.el_ea);
-
brelse(el.el_bh);
return error;
}
/**
- * gfs2_ea_get -
- * @ip: The GFS2 inode
- * @er: The request structure
- *
- * Returns: actual size of data on success, -errno on error
- */
-
-int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
- struct gfs2_holder i_gh;
- int error;
-
- if (!er->er_name_len ||
- er->er_name_len > GFS2_EA_MAX_NAME_LEN)
- return -EINVAL;
- if (!er->er_data || !er->er_data_len) {
- er->er_data = NULL;
- er->er_data_len = 0;
- }
-
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- if (error)
- return error;
-
- error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
-
- gfs2_glock_dq_uninit(&i_gh);
-
- return error;
-}
-
-/**
* ea_alloc_blk - allocates a new block for extended attributes.
* @ip: A pointer to the inode that's getting extended attributes
* @bhp: Pointer to pointer to a struct buffer_head
@@ -713,12 +716,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- if (er->er_flags & GFS2_ERF_MODE) {
- gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
- (ip->i_inode.i_mode & S_IFMT) ==
- (er->er_mode & S_IFMT));
- ip->i_inode.i_mode = er->er_mode;
- }
ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -762,15 +759,23 @@ static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
* Returns: errno
*/
-static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+static int ea_init(struct gfs2_inode *ip, int type, const char *name,
+ const void *data, size_t size)
{
+ struct gfs2_ea_request er;
unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize;
unsigned int blks = 1;
- if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
- blks += DIV_ROUND_UP(er->er_data_len, jbsize);
+ er.er_type = type;
+ er.er_name = name;
+ er.er_name_len = strlen(name);
+ er.er_data = (void *)data;
+ er.er_data_len = size;
+
+ if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize)
+ blks += DIV_ROUND_UP(er.er_data_len, jbsize);
- return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
+ return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL);
}
static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
@@ -848,12 +853,6 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out;
-
- if (er->er_flags & GFS2_ERF_MODE) {
- gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
- (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
- ip->i_inode.i_mode = er->er_mode;
- }
ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -894,7 +893,8 @@ static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
int stuffed;
int error;
- stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size);
+ stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len,
+ es->es_er->er_data_len, &size);
if (ea->ea_type == GFS2_EATYPE_UNUSED) {
if (GFS2_EA_REC_LEN(ea) < size)
@@ -1005,15 +1005,22 @@ out:
return error;
}
-static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
- struct gfs2_ea_location *el)
+static int ea_set_i(struct gfs2_inode *ip, int type, const char *name,
+ const void *value, size_t size, struct gfs2_ea_location *el)
{
+ struct gfs2_ea_request er;
struct ea_set es;
unsigned int blks = 2;
int error;
+ er.er_type = type;
+ er.er_name = name;
+ er.er_data = (void *)value;
+ er.er_name_len = strlen(name);
+ er.er_data_len = size;
+
memset(&es, 0, sizeof(struct ea_set));
- es.es_er = er;
+ es.es_er = &er;
es.es_el = el;
error = ea_foreach(ip, ea_set_simple, &es);
@@ -1024,10 +1031,10 @@ static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT))
blks++;
- if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
- blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
+ if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
+ blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
- return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
+ return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el);
}
static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
@@ -1039,75 +1046,7 @@ static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
GFS2_EA2NEXT(el->el_prev) == el->el_ea);
}
- return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
-}
-
-int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
- struct gfs2_ea_location el;
- int error;
-
- if (!ip->i_eattr) {
- if (er->er_flags & XATTR_REPLACE)
- return -ENODATA;
- return ea_init(ip, er);
- }
-
- error = gfs2_ea_find(ip, er, &el);
- if (error)
- return error;
-
- if (el.el_ea) {
- if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
- brelse(el.el_bh);
- return -EPERM;
- }
-
- error = -EEXIST;
- if (!(er->er_flags & XATTR_CREATE)) {
- int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
- error = ea_set_i(ip, er, &el);
- if (!error && unstuffed)
- ea_set_remove_unstuffed(ip, &el);
- }
-
- brelse(el.el_bh);
- } else {
- error = -ENODATA;
- if (!(er->er_flags & XATTR_REPLACE))
- error = ea_set_i(ip, er, NULL);
- }
-
- return error;
-}
-
-int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
- struct gfs2_holder i_gh;
- int error;
-
- if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
- return -EINVAL;
- if (!er->er_data || !er->er_data_len) {
- er->er_data = NULL;
- er->er_data_len = 0;
- }
- error = ea_check_size(GFS2_SB(&ip->i_inode), er);
- if (error)
- return error;
-
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
- if (error)
- return error;
-
- if (IS_IMMUTABLE(&ip->i_inode))
- error = -EPERM;
- else
- error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
-
- gfs2_glock_dq_uninit(&i_gh);
-
- return error;
+ return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0);
}
static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
@@ -1131,8 +1070,9 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
if (GFS2_EA_IS_LAST(ea))
prev->ea_flags |= GFS2_EAFLAG_LAST;
- } else
+ } else {
ea->ea_type = GFS2_EATYPE_UNUSED;
+ }
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
@@ -1147,15 +1087,29 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
return error;
}
-int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+/**
+ * gfs2_xattr_remove - Remove a GFS2 extended attribute
+ * @inode: The inode
+ * @type: The type of the extended attribute
+ * @name: The name of the extended attribute
+ *
+ * This is not called directly by the VFS since we use the (common)
+ * scheme of making a "set with NULL data" mean a remove request. Note
+ * that this is different from a set with zero length data.
+ *
+ * Returns: 0, or errno on failure
+ */
+
+static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
{
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_ea_location el;
int error;
if (!ip->i_eattr)
return -ENODATA;
- error = gfs2_ea_find(ip, er, &el);
+ error = gfs2_ea_find(ip, type, name, &el);
if (error)
return error;
if (!el.el_ea)
@@ -1164,8 +1118,7 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
if (GFS2_EA_IS_STUFFED(el.el_ea))
error = ea_remove_stuffed(ip, &el);
else
- error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
- 0);
+ error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0);
brelse(el.el_bh);
@@ -1173,31 +1126,70 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
}
/**
- * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
- * @ip: pointer to the inode of the target file
- * @er: request information
+ * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
+ * @inode: The inode
+ * @type: The type of the extended attribute
+ * @name: The name of the extended attribute
+ * @value: The value of the extended attribute (NULL for remove)
+ * @size: The size of the @value argument
+ * @flags: Create or Replace
*
- * Returns: errno
+ * See gfs2_xattr_remove() for details of the removal of xattrs.
+ *
+ * Returns: 0 or errno on failure
*/
-int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
+int gfs2_xattr_set(struct inode *inode, int type, const char *name,
+ const void *value, size_t size, int flags)
{
- struct gfs2_holder i_gh;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_ea_location el;
+ unsigned int namel = strlen(name);
int error;
- if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
- return -EINVAL;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EPERM;
+ if (namel > GFS2_EA_MAX_NAME_LEN)
+ return -ERANGE;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+ if (value == NULL)
+ return gfs2_xattr_remove(inode, type, name);
+
+ if (ea_check_size(sdp, namel, size))
+ return -ERANGE;
+
+ if (!ip->i_eattr) {
+ if (flags & XATTR_REPLACE)
+ return -ENODATA;
+ return ea_init(ip, type, name, value, size);
+ }
+
+ error = gfs2_ea_find(ip, type, name, &el);
if (error)
return error;
- if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
- error = -EPERM;
- else
- error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
+ if (el.el_ea) {
+ if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
+ brelse(el.el_bh);
+ return -EPERM;
+ }
- gfs2_glock_dq_uninit(&i_gh);
+ error = -EEXIST;
+ if (!(flags & XATTR_CREATE)) {
+ int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
+ error = ea_set_i(ip, type, name, value, size, &el);
+ if (!error && unstuffed)
+ ea_set_remove_unstuffed(ip, &el);
+ }
+
+ brelse(el.el_bh);
+ return error;
+ }
+
+ error = -ENODATA;
+ if (!(flags & XATTR_REPLACE))
+ error = ea_set_i(ip, type, name, value, size, NULL);
return error;
}
@@ -1503,3 +1495,64 @@ out_alloc:
return error;
}
+static int gfs2_xattr_user_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
+}
+
+static int gfs2_xattr_user_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
+}
+
+static int gfs2_xattr_system_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size);
+}
+
+static int gfs2_xattr_system_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags);
+}
+
+static int gfs2_xattr_security_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
+}
+
+static int gfs2_xattr_security_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
+}
+
+static struct xattr_handler gfs2_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = gfs2_xattr_user_get,
+ .set = gfs2_xattr_user_set,
+};
+
+static struct xattr_handler gfs2_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = gfs2_xattr_security_get,
+ .set = gfs2_xattr_security_set,
+};
+
+static struct xattr_handler gfs2_xattr_system_handler = {
+ .prefix = XATTR_SYSTEM_PREFIX,
+ .get = gfs2_xattr_system_get,
+ .set = gfs2_xattr_system_set,
+};
+
+struct xattr_handler *gfs2_xattr_handlers[] = {
+ &gfs2_xattr_user_handler,
+ &gfs2_xattr_security_handler,
+ &gfs2_xattr_system_handler,
+ NULL,
+};
+
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/xattr.h
index c82dbe01d713..cbdfd7743733 100644
--- a/fs/gfs2/eattr.h
+++ b/fs/gfs2/xattr.h
@@ -19,7 +19,7 @@ struct iattr;
#define GFS2_EA_SIZE(ea) \
ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
- (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
+ (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
@@ -27,10 +27,6 @@ ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
#define GFS2_EAREQ_SIZE_STUFFED(er) \
ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
-#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
-ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
- sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
-
#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
@@ -43,16 +39,12 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
#define GFS2_EA_BH2FIRST(bh) \
((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
-#define GFS2_ERF_MODE 0x80000000
-
struct gfs2_ea_request {
const char *er_name;
char *er_data;
unsigned int er_name_len;
unsigned int er_data_len;
unsigned int er_type; /* GFS2_EATYPE_... */
- int er_flags;
- mode_t er_mode;
};
struct gfs2_ea_location {
@@ -61,40 +53,20 @@ struct gfs2_ea_location {
struct gfs2_ea_header *el_prev;
};
-int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-
-int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
-
-int gfs2_ea_dealloc(struct gfs2_inode *ip);
+extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
+ void *buffer, size_t size);
+extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
+ const void *value, size_t size, int flags);
+extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
+extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
/* Exported to acl.c */
-int gfs2_ea_find(struct gfs2_inode *ip,
- struct gfs2_ea_request *er,
- struct gfs2_ea_location *el);
-int gfs2_ea_get_copy(struct gfs2_inode *ip,
- struct gfs2_ea_location *el,
- char *data);
-int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
- struct iattr *attr, char *data);
-
-static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
-{
- switch (ea->ea_type) {
- case GFS2_EATYPE_USR:
- return 5 + ea->ea_name_len + 1;
- case GFS2_EATYPE_SYS:
- return 7 + ea->ea_name_len + 1;
- case GFS2_EATYPE_SECURITY:
- return 9 + ea->ea_name_len + 1;
- default:
- return 0;
- }
-}
+extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
+ struct gfs2_ea_location *el);
+extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
+ char *data, size_t size);
+extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
+ struct iattr *attr, char *data);
#endif /* __EATTR_DOT_H__ */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 941c8425c10b..a93b885311d8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
static struct backing_dev_info hugetlbfs_backing_dev_info = {
+ .name = "hugetlbfs",
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
@@ -935,26 +936,28 @@ static int can_do_hugetlb_shm(void)
return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
}
-struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
+struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
+ struct user_struct **user)
{
int error = -ENOMEM;
- int unlock_shm = 0;
struct file *file;
struct inode *inode;
struct dentry *dentry, *root;
struct qstr quick_string;
- struct user_struct *user = current_user();
+ *user = NULL;
if (!hugetlbfs_vfsmount)
return ERR_PTR(-ENOENT);
if (!can_do_hugetlb_shm()) {
- if (user_shm_lock(size, user)) {
- unlock_shm = 1;
+ *user = current_user();
+ if (user_shm_lock(size, *user)) {
WARN_ONCE(1,
"Using mlock ulimits for SHM_HUGETLB deprecated\n");
- } else
+ } else {
+ *user = NULL;
return ERR_PTR(-EPERM);
+ }
}
root = hugetlbfs_vfsmount->mnt_root;
@@ -996,8 +999,10 @@ out_inode:
out_dentry:
dput(dentry);
out_shm_unlock:
- if (unlock_shm)
- user_shm_unlock(size, user);
+ if (*user) {
+ user_shm_unlock(size, *user);
+ *user = NULL;
+ }
return ERR_PTR(error);
}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 8fcb6239218e..7edb62e97419 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -258,7 +258,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return rc;
}
-static int jffs2_check_acl(struct inode *inode, int mask)
+int jffs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
int rc;
@@ -274,11 +274,6 @@ static int jffs2_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int jffs2_permission(struct inode *inode, int mask)
-{
- return generic_permission(inode, mask, jffs2_check_acl);
-}
-
int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
{
struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index fc929f2a14f6..f0ba63e3c36b 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-extern int jffs2_permission(struct inode *, int);
+extern int jffs2_check_acl(struct inode *, int);
extern int jffs2_acl_chmod(struct inode *);
extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
extern int jffs2_init_acl_post(struct inode *);
@@ -36,7 +36,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
#else
-#define jffs2_permission (NULL)
+#define jffs2_check_acl (NULL)
#define jffs2_acl_chmod(inode) (0)
#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
#define jffs2_init_acl_post(inode) (0)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 6f60cc910f4c..7aa4417e085f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -55,7 +55,7 @@ const struct inode_operations jffs2_dir_inode_operations =
.rmdir = jffs2_rmdir,
.mknod = jffs2_mknod,
.rename = jffs2_rename,
- .permission = jffs2_permission,
+ .check_acl = jffs2_check_acl,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 23c947539864..b7b74e299142 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations =
const struct inode_operations jffs2_file_inode_operations =
{
- .permission = jffs2_permission,
+ .check_acl = jffs2_check_acl,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b7339c3b6ad9..4ec11e8bda8c 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -21,7 +21,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
.follow_link = jffs2_follow_link,
- .permission = jffs2_permission,
+ .check_acl = jffs2_check_acl,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index d9a721e6db70..5ef7bac265e5 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
if (!c->wbuf)
return -ENOMEM;
+#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
+ c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
+ if (!c->wbuf_verify) {
+ kfree(c->wbuf);
+ return -ENOMEM;
+ }
+#endif
return 0;
}
void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) {
+#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
+ kfree(c->wbuf_verify);
+#endif
kfree(c->wbuf);
}
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index a29c7c3e3fb8..d66477c34306 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,7 +114,7 @@ out:
return rc;
}
-static int jfs_check_acl(struct inode *inode, int mask)
+int jfs_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
@@ -129,11 +129,6 @@ static int jfs_check_acl(struct inode *inode, int mask)
return -EAGAIN;
}
-int jfs_permission(struct inode *inode, int mask)
-{
- return generic_permission(inode, mask, jfs_check_acl);
-}
-
int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 7f6063acaa3b..2b70fa78e4a7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -96,7 +96,7 @@ const struct inode_operations jfs_file_inode_operations = {
.removexattr = jfs_removexattr,
#ifdef CONFIG_JFS_POSIX_ACL
.setattr = jfs_setattr,
- .permission = jfs_permission,
+ .check_acl = jfs_check_acl,
#endif
};
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 88475f10a389..b07bd417ef85 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
#ifdef CONFIG_JFS_POSIX_ACL
-int jfs_permission(struct inode *, int);
+int jfs_check_acl(struct inode *, int);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 514ee2edb92a..c79a4270f083 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1543,7 +1543,7 @@ const struct inode_operations jfs_dir_inode_operations = {
.removexattr = jfs_removexattr,
#ifdef CONFIG_JFS_POSIX_ACL
.setattr = jfs_setattr,
- .permission = jfs_permission,
+ .check_acl = jfs_check_acl,
#endif
};
diff --git a/fs/libfs.c b/fs/libfs.c
index ddfa89948c3f..dcec3d3ea64f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
return PTR_ERR(s);
s->s_flags = MS_NOUSER;
- s->s_maxbytes = ~0ULL;
+ s->s_maxbytes = MAX_LFS_FILESIZE;
s->s_blocksize = PAGE_SIZE;
s->s_blocksize_bits = PAGE_SHIFT;
s->s_magic = magic;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 99d737bd4325..7cb076ac6b45 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -87,18 +87,6 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
return hash & (NLM_HOST_NRHASH - 1);
}
-static void nlm_clear_port(struct sockaddr *sap)
-{
- switch (sap->sa_family) {
- case AF_INET:
- ((struct sockaddr_in *)sap)->sin_port = 0;
- break;
- case AF_INET6:
- ((struct sockaddr_in6 *)sap)->sin6_port = 0;
- break;
- }
-}
-
/*
* Common host lookup routine for server & client
*/
@@ -177,7 +165,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
host->h_addrbuf = nsm->sm_addrbuf;
memcpy(nlm_addr(host), ni->sap, ni->salen);
host->h_addrlen = ni->salen;
- nlm_clear_port(nlm_addr(host));
+ rpc_set_port(nlm_addr(host), 0);
memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
host->h_version = ni->version;
host->h_proto = ni->protocol;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7fce1b525849..30c933188dd7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,43 +61,6 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
return (struct sockaddr *)&nsm->sm_addr;
}
-static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf,
- const size_t len)
-{
- const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
- snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
-}
-
-static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf,
- const size_t len)
-{
- const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
- else if (sin6->sin6_scope_id != 0)
- snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
- sin6->sin6_scope_id);
- else
- snprintf(buf, len, "%pI6", &sin6->sin6_addr);
-}
-
-static void nsm_display_address(const struct sockaddr *sap,
- char *buf, const size_t len)
-{
- switch (sap->sa_family) {
- case AF_INET:
- nsm_display_ipv4_address(sap, buf, len);
- break;
- case AF_INET6:
- nsm_display_ipv6_address(sap, buf, len);
- break;
- default:
- snprintf(buf, len, "unsupported address family");
- break;
- }
-}
-
static struct rpc_clnt *nsm_create(void)
{
struct sockaddr_in sin = {
@@ -307,8 +270,11 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
memcpy(nsm_addr(new), sap, salen);
new->sm_addrlen = salen;
nsm_init_private(new);
- nsm_display_address((const struct sockaddr *)&new->sm_addr,
- new->sm_addrbuf, sizeof(new->sm_addrbuf));
+
+ if (rpc_ntop(nsm_addr(new), new->sm_addrbuf,
+ sizeof(new->sm_addrbuf)) == 0)
+ (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf),
+ "unsupported address family");
memcpy(new->sm_name, hostname, hostname_len);
new->sm_name[hostname_len] = '\0';
diff --git a/fs/locks.c b/fs/locks.c
index b6440f52178f..19ee18a6829b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
* give it the opportunity to lock the file.
*/
if (found)
- cond_resched_bkl();
+ cond_resched();
find_conflict:
for_each_lock(inode, before) {
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
if (can_sleep)
lock->fl_flags |= FL_SLEEP;
- error = security_file_lock(filp, cmd);
+ error = security_file_lock(filp, lock->fl_type);
if (error)
goto out_free;
diff --git a/fs/namei.c b/fs/namei.c
index f3c5b278895a..d11f404667e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,19 +169,10 @@ void putname(const char *name)
EXPORT_SYMBOL(putname);
#endif
-
-/**
- * generic_permission - check for access rights on a Posix-like filesystem
- * @inode: inode to check access rights for
- * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- * @check_acl: optional callback to check for Posix ACLs
- *
- * Used to check for read/write/execute permissions on a file.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things..
+/*
+ * This does basic POSIX ACL permission checking
*/
-int generic_permission(struct inode *inode, int mask,
+static int acl_permission_check(struct inode *inode, int mask,
int (*check_acl)(struct inode *inode, int mask))
{
umode_t mode = inode->i_mode;
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask,
else {
if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
int error = check_acl(inode, mask);
- if (error == -EACCES)
- goto check_capabilities;
- else if (error != -EAGAIN)
+ if (error != -EAGAIN)
return error;
}
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask,
*/
if ((mask & ~mode) == 0)
return 0;
+ return -EACCES;
+}
+
+/**
+ * generic_permission - check for access rights on a Posix-like filesystem
+ * @inode: inode to check access rights for
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @check_acl: optional callback to check for Posix ACLs
+ *
+ * Used to check for read/write/execute permissions on a file.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things..
+ */
+int generic_permission(struct inode *inode, int mask,
+ int (*check_acl)(struct inode *inode, int mask))
+{
+ int ret;
+
+ /*
+ * Do the basic POSIX ACL permission checks.
+ */
+ ret = acl_permission_check(inode, mask, check_acl);
+ if (ret != -EACCES)
+ return ret;
- check_capabilities:
/*
* Read/write DACs are always overridable.
* Executable DACs are overridable if at least one exec bit is set.
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask)
if (inode->i_op->permission)
retval = inode->i_op->permission(inode, mask);
else
- retval = generic_permission(inode, mask, NULL);
+ retval = generic_permission(inode, mask, inode->i_op->check_acl);
if (retval)
return retval;
@@ -432,29 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
*/
static int exec_permission_lite(struct inode *inode)
{
- umode_t mode = inode->i_mode;
+ int ret;
- if (inode->i_op->permission)
- return -EAGAIN;
-
- if (current_fsuid() == inode->i_uid)
- mode >>= 6;
- else if (in_group_p(inode->i_gid))
- mode >>= 3;
-
- if (mode & MAY_EXEC)
- goto ok;
-
- if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
- goto ok;
-
- if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
+ if (inode->i_op->permission) {
+ ret = inode->i_op->permission(inode, MAY_EXEC);
+ if (!ret)
+ goto ok;
+ return ret;
+ }
+ ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
+ if (!ret)
goto ok;
- if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
+ if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
goto ok;
- return -EACCES;
+ return ret;
ok:
return security_inode_permission(inode, MAY_EXEC);
}
@@ -853,12 +859,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
nd->flags |= LOOKUP_CONTINUE;
err = exec_permission_lite(inode);
- if (err == -EAGAIN)
- err = inode_permission(nd->path.dentry->d_inode,
- MAY_EXEC);
- if (!err)
- err = ima_path_check(&nd->path, MAY_EXEC,
- IMA_COUNT_UPDATE);
if (err)
break;
@@ -1533,37 +1533,42 @@ int may_open(struct path *path, int acc_mode, int flag)
if (error)
return error;
- error = ima_path_check(path,
- acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+ error = ima_path_check(path, acc_mode ?
+ acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
+ ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
IMA_COUNT_UPDATE);
+
if (error)
return error;
/*
* An append-only file must be opened in append mode for writing.
*/
if (IS_APPEND(inode)) {
+ error = -EPERM;
if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
- return -EPERM;
+ goto err_out;
if (flag & O_TRUNC)
- return -EPERM;
+ goto err_out;
}
/* O_NOATIME can only be set by the owner or superuser */
if (flag & O_NOATIME)
- if (!is_owner_or_cap(inode))
- return -EPERM;
+ if (!is_owner_or_cap(inode)) {
+ error = -EPERM;
+ goto err_out;
+ }
/*
* Ensure there are no outstanding leases on the file.
*/
error = break_lease(inode, flag);
if (error)
- return error;
+ goto err_out;
if (flag & O_TRUNC) {
error = get_write_access(inode);
if (error)
- return error;
+ goto err_out;
/*
* Refuse to truncate files with mandatory locks held on them.
@@ -1581,12 +1586,17 @@ int may_open(struct path *path, int acc_mode, int flag)
}
put_write_access(inode);
if (error)
- return error;
+ goto err_out;
} else
if (flag & FMODE_WRITE)
vfs_dq_init(inode);
return 0;
+err_out:
+ ima_counts_put(path, acc_mode ?
+ acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
+ ACC_MODE(flag) & (MAY_READ | MAY_WRITE));
+ return error;
}
/*
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 845159814de2..da7fda639eac 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
direct.o pagelist.o proc.o read.o symlink.o unlink.o \
- write.o namespace.o mount_clnt.o
+ write.o namespace.o mount_clnt.o \
+ dns_resolve.o cache_lib.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
new file mode 100644
index 000000000000..b4ffd0146ea6
--- /dev/null
+++ b/fs/nfs/cache_lib.c
@@ -0,0 +1,140 @@
+/*
+ * linux/fs/nfs/cache_lib.c
+ *
+ * Helper routines for the NFS client caches
+ *
+ * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+#include "cache_lib.h"
+
+#define NFS_CACHE_UPCALL_PATHLEN 256
+#define NFS_CACHE_UPCALL_TIMEOUT 15
+
+static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] =
+ "/sbin/nfs_cache_getent";
+static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT;
+
+module_param_string(cache_getent, nfs_cache_getent_prog,
+ sizeof(nfs_cache_getent_prog), 0600);
+MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program");
+module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600);
+MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which "
+ "the cache upcall is assumed to have failed");
+
+int nfs_cache_upcall(struct cache_detail *cd, char *entry_name)
+{
+ static char *envp[] = { "HOME=/",
+ "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ NULL
+ };
+ char *argv[] = {
+ nfs_cache_getent_prog,
+ cd->name,
+ entry_name,
+ NULL
+ };
+ int ret = -EACCES;
+
+ if (nfs_cache_getent_prog[0] == '\0')
+ goto out;
+ ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+ /*
+ * Disable the upcall mechanism if we're getting an ENOENT or
+ * EACCES error. The admin can re-enable it on the fly by using
+ * sysfs to set the 'cache_getent' parameter once the problem
+ * has been fixed.
+ */
+ if (ret == -ENOENT || ret == -EACCES)
+ nfs_cache_getent_prog[0] = '\0';
+out:
+ return ret > 0 ? 0 : ret;
+}
+
+/*
+ * Deferred request handling
+ */
+void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
+{
+ if (atomic_dec_and_test(&dreq->count))
+ kfree(dreq);
+}
+
+static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany)
+{
+ struct nfs_cache_defer_req *dreq;
+
+ dreq = container_of(d, struct nfs_cache_defer_req, deferred_req);
+
+ complete_all(&dreq->completion);
+ nfs_cache_defer_req_put(dreq);
+}
+
+static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
+{
+ struct nfs_cache_defer_req *dreq;
+
+ dreq = container_of(req, struct nfs_cache_defer_req, req);
+ dreq->deferred_req.revisit = nfs_dns_cache_revisit;
+ atomic_inc(&dreq->count);
+
+ return &dreq->deferred_req;
+}
+
+struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
+{
+ struct nfs_cache_defer_req *dreq;
+
+ dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
+ if (dreq) {
+ init_completion(&dreq->completion);
+ atomic_set(&dreq->count, 1);
+ dreq->req.defer = nfs_dns_cache_defer;
+ }
+ return dreq;
+}
+
+int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
+{
+ if (wait_for_completion_timeout(&dreq->completion,
+ nfs_cache_getent_timeout * HZ) == 0)
+ return -ETIMEDOUT;
+ return 0;
+}
+
+int nfs_cache_register(struct cache_detail *cd)
+{
+ struct nameidata nd;
+ struct vfsmount *mnt;
+ int ret;
+
+ mnt = rpc_get_mount();
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+ ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
+ if (ret)
+ goto err;
+ ret = sunrpc_cache_register_pipefs(nd.path.dentry,
+ cd->name, 0600, cd);
+ path_put(&nd.path);
+ if (!ret)
+ return ret;
+err:
+ rpc_put_mount();
+ return ret;
+}
+
+void nfs_cache_unregister(struct cache_detail *cd)
+{
+ sunrpc_cache_unregister_pipefs(cd);
+ rpc_put_mount();
+}
+
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
new file mode 100644
index 000000000000..76f856e284e4
--- /dev/null
+++ b/fs/nfs/cache_lib.h
@@ -0,0 +1,27 @@
+/*
+ * Helper routines for the NFS client caches
+ *
+ * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+
+#include <linux/completion.h>
+#include <linux/sunrpc/cache.h>
+#include <asm/atomic.h>
+
+/*
+ * Deferred request handling
+ */
+struct nfs_cache_defer_req {
+ struct cache_req req;
+ struct cache_deferred_req deferred_req;
+ struct completion completion;
+ atomic_t count;
+};
+
+extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
+extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
+extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
+extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
+
+extern int nfs_cache_register(struct cache_detail *cd);
+extern void nfs_cache_unregister(struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7f604c7941fb..293fa0528a6e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -43,21 +43,29 @@ static struct svc_program nfs4_callback_program;
unsigned int nfs_callback_set_tcpport;
unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
+#define NFS_CALLBACK_MAXPORTNR (65535U)
-static int param_set_port(const char *val, struct kernel_param *kp)
+static int param_set_portnr(const char *val, struct kernel_param *kp)
{
- char *endp;
- int num = simple_strtol(val, &endp, 0);
- if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+ unsigned long num;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+ ret = strict_strtoul(val, 0, &num);
+ if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
return -EINVAL;
- *((int *)kp->arg) = num;
+ *((unsigned int *)kp->arg) = num;
return 0;
}
-module_param_call(callback_tcpport, param_set_port, param_get_int,
- &nfs_callback_set_tcpport, 0644);
+static int param_get_portnr(char *buffer, struct kernel_param *kp)
+{
+ return param_get_uint(buffer, kp);
+}
+#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
+
+module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
/*
* This is the NFSv4 callback kernel thread.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb2d51d..e350bd6a2334 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
server->options = data->options;
+ server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
+ NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
+ NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
@@ -879,6 +882,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
server->rsize = NFS_MAX_FILE_IO_SIZE;
server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ server->backing_dev_info.name = "nfs";
server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
if (server->wsize > max_rpc_payload)
@@ -1074,10 +1078,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
spin_lock(&nfs_client_lock);
list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1274,7 +1274,7 @@ static int nfs4_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
- server->caps |= NFS_CAP_ATOMIC_OPEN;
+ server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
server->options = data->options;
/* Get a client record */
@@ -1359,10 +1359,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
spin_lock(&nfs_client_lock);
list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1400,7 +1396,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);
- server->caps |= NFS_CAP_ATOMIC_OPEN;
+ server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
/* Get a client representation.
* Note: NFSv4 always uses TCP, */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e4e089a8f294..6c3210099d51 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -934,9 +934,6 @@ out:
* back into its cache. We let the server do generic write
* parameter checking and report problems.
*
- * We also avoid an unnecessary invocation of generic_osync_inode(),
- * as it is fairly meaningless to sync the metadata of an NFS file.
- *
* We eliminate local atime updates, see direct read above.
*
* We avoid unnecessary page cache invalidations for normal cached
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
new file mode 100644
index 000000000000..f4d54ba97cc6
--- /dev/null
+++ b/fs/nfs/dns_resolve.c
@@ -0,0 +1,335 @@
+/*
+ * linux/fs/nfs/dns_resolve.c
+ *
+ * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * Resolves DNS hostnames into valid ip addresses
+ */
+
+#include <linux/hash.h>
+#include <linux/string.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/seq_file.h>
+#include <linux/inet.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/sunrpc/svcauth.h>
+
+#include "dns_resolve.h"
+#include "cache_lib.h"
+
+#define NFS_DNS_HASHBITS 4
+#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
+
+static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
+
+struct nfs_dns_ent {
+ struct cache_head h;
+
+ char *hostname;
+ size_t namelen;
+
+ struct sockaddr_storage addr;
+ size_t addrlen;
+};
+
+
+static void nfs_dns_ent_init(struct cache_head *cnew,
+ struct cache_head *ckey)
+{
+ struct nfs_dns_ent *new;
+ struct nfs_dns_ent *key;
+
+ new = container_of(cnew, struct nfs_dns_ent, h);
+ key = container_of(ckey, struct nfs_dns_ent, h);
+
+ kfree(new->hostname);
+ new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
+ if (new->hostname) {
+ new->namelen = key->namelen;
+ memcpy(&new->addr, &key->addr, key->addrlen);
+ new->addrlen = key->addrlen;
+ } else {
+ new->namelen = 0;
+ new->addrlen = 0;
+ }
+}
+
+static void nfs_dns_ent_put(struct kref *ref)
+{
+ struct nfs_dns_ent *item;
+
+ item = container_of(ref, struct nfs_dns_ent, h.ref);
+ kfree(item->hostname);
+ kfree(item);
+}
+
+static struct cache_head *nfs_dns_ent_alloc(void)
+{
+ struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL);
+
+ if (item != NULL) {
+ item->hostname = NULL;
+ item->namelen = 0;
+ item->addrlen = 0;
+ return &item->h;
+ }
+ return NULL;
+};
+
+static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key)
+{
+ return hash_str(key->hostname, NFS_DNS_HASHBITS);
+}
+
+static void nfs_dns_request(struct cache_detail *cd,
+ struct cache_head *ch,
+ char **bpp, int *blen)
+{
+ struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
+
+ qword_add(bpp, blen, key->hostname);
+ (*bpp)[-1] = '\n';
+}
+
+static int nfs_dns_upcall(struct cache_detail *cd,
+ struct cache_head *ch)
+{
+ struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
+ int ret;
+
+ ret = nfs_cache_upcall(cd, key->hostname);
+ if (ret)
+ ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
+ return ret;
+}
+
+static int nfs_dns_match(struct cache_head *ca,
+ struct cache_head *cb)
+{
+ struct nfs_dns_ent *a;
+ struct nfs_dns_ent *b;
+
+ a = container_of(ca, struct nfs_dns_ent, h);
+ b = container_of(cb, struct nfs_dns_ent, h);
+
+ if (a->namelen == 0 || a->namelen != b->namelen)
+ return 0;
+ return memcmp(a->hostname, b->hostname, a->namelen) == 0;
+}
+
+static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
+ struct cache_head *h)
+{
+ struct nfs_dns_ent *item;
+ long ttl;
+
+ if (h == NULL) {
+ seq_puts(m, "# ip address hostname ttl\n");
+ return 0;
+ }
+ item = container_of(h, struct nfs_dns_ent, h);
+ ttl = (long)item->h.expiry_time - (long)get_seconds();
+ if (ttl < 0)
+ ttl = 0;
+
+ if (!test_bit(CACHE_NEGATIVE, &h->flags)) {
+ char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1];
+
+ rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf));
+ seq_printf(m, "%15s ", buf);
+ } else
+ seq_puts(m, "<none> ");
+ seq_printf(m, "%15s %ld\n", item->hostname, ttl);
+ return 0;
+}
+
+struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
+ struct nfs_dns_ent *key)
+{
+ struct cache_head *ch;
+
+ ch = sunrpc_cache_lookup(cd,
+ &key->h,
+ nfs_dns_hash(key));
+ if (!ch)
+ return NULL;
+ return container_of(ch, struct nfs_dns_ent, h);
+}
+
+struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd,
+ struct nfs_dns_ent *new,
+ struct nfs_dns_ent *key)
+{
+ struct cache_head *ch;
+
+ ch = sunrpc_cache_update(cd,
+ &new->h, &key->h,
+ nfs_dns_hash(key));
+ if (!ch)
+ return NULL;
+ return container_of(ch, struct nfs_dns_ent, h);
+}
+
+static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+ char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
+ struct nfs_dns_ent key, *item;
+ unsigned long ttl;
+ ssize_t len;
+ int ret = -EINVAL;
+
+ if (buf[buflen-1] != '\n')
+ goto out;
+ buf[buflen-1] = '\0';
+
+ len = qword_get(&buf, buf1, sizeof(buf1));
+ if (len <= 0)
+ goto out;
+ key.addrlen = rpc_pton(buf1, len,
+ (struct sockaddr *)&key.addr,
+ sizeof(key.addr));
+
+ len = qword_get(&buf, buf1, sizeof(buf1));
+ if (len <= 0)
+ goto out;
+
+ key.hostname = buf1;
+ key.namelen = len;
+ memset(&key.h, 0, sizeof(key.h));
+
+ ttl = get_expiry(&buf);
+ if (ttl == 0)
+ goto out;
+ key.h.expiry_time = ttl + get_seconds();
+
+ ret = -ENOMEM;
+ item = nfs_dns_lookup(cd, &key);
+ if (item == NULL)
+ goto out;
+
+ if (key.addrlen == 0)
+ set_bit(CACHE_NEGATIVE, &key.h.flags);
+
+ item = nfs_dns_update(cd, &key, item);
+ if (item == NULL)
+ goto out;
+
+ ret = 0;
+ cache_put(&item->h, cd);
+out:
+ return ret;
+}
+
+static struct cache_detail nfs_dns_resolve = {
+ .owner = THIS_MODULE,
+ .hash_size = NFS_DNS_HASHTBL_SIZE,
+ .hash_table = nfs_dns_table,
+ .name = "dns_resolve",
+ .cache_put = nfs_dns_ent_put,
+ .cache_upcall = nfs_dns_upcall,
+ .cache_parse = nfs_dns_parse,
+ .cache_show = nfs_dns_show,
+ .match = nfs_dns_match,
+ .init = nfs_dns_ent_init,
+ .update = nfs_dns_ent_init,
+ .alloc = nfs_dns_ent_alloc,
+};
+
+static int do_cache_lookup(struct cache_detail *cd,
+ struct nfs_dns_ent *key,
+ struct nfs_dns_ent **item,
+ struct nfs_cache_defer_req *dreq)
+{
+ int ret = -ENOMEM;
+
+ *item = nfs_dns_lookup(cd, key);
+ if (*item) {
+ ret = cache_check(cd, &(*item)->h, &dreq->req);
+ if (ret)
+ *item = NULL;
+ }
+ return ret;
+}
+
+static int do_cache_lookup_nowait(struct cache_detail *cd,
+ struct nfs_dns_ent *key,
+ struct nfs_dns_ent **item)
+{
+ int ret = -ENOMEM;
+
+ *item = nfs_dns_lookup(cd, key);
+ if (!*item)
+ goto out_err;
+ ret = -ETIMEDOUT;
+ if (!test_bit(CACHE_VALID, &(*item)->h.flags)
+ || (*item)->h.expiry_time < get_seconds()
+ || cd->flush_time > (*item)->h.last_refresh)
+ goto out_put;
+ ret = -ENOENT;
+ if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
+ goto out_put;
+ return 0;
+out_put:
+ cache_put(&(*item)->h, cd);
+out_err:
+ *item = NULL;
+ return ret;
+}
+
+static int do_cache_lookup_wait(struct cache_detail *cd,
+ struct nfs_dns_ent *key,
+ struct nfs_dns_ent **item)
+{
+ struct nfs_cache_defer_req *dreq;
+ int ret = -ENOMEM;
+
+ dreq = nfs_cache_defer_req_alloc();
+ if (!dreq)
+ goto out;
+ ret = do_cache_lookup(cd, key, item, dreq);
+ if (ret == -EAGAIN) {
+ ret = nfs_cache_wait_for_upcall(dreq);
+ if (!ret)
+ ret = do_cache_lookup_nowait(cd, key, item);
+ }
+ nfs_cache_defer_req_put(dreq);
+out:
+ return ret;
+}
+
+ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
+ struct sockaddr *sa, size_t salen)
+{
+ struct nfs_dns_ent key = {
+ .hostname = name,
+ .namelen = namelen,
+ };
+ struct nfs_dns_ent *item = NULL;
+ ssize_t ret;
+
+ ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item);
+ if (ret == 0) {
+ if (salen >= item->addrlen) {
+ memcpy(sa, &item->addr, item->addrlen);
+ ret = item->addrlen;
+ } else
+ ret = -EOVERFLOW;
+ cache_put(&item->h, &nfs_dns_resolve);
+ } else if (ret == -ENOENT)
+ ret = -ESRCH;
+ return ret;
+}
+
+int nfs_dns_resolver_init(void)
+{
+ return nfs_cache_register(&nfs_dns_resolve);
+}
+
+void nfs_dns_resolver_destroy(void)
+{
+ nfs_cache_unregister(&nfs_dns_resolve);
+}
+
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
new file mode 100644
index 000000000000..a3f0938babf7
--- /dev/null
+++ b/fs/nfs/dns_resolve.h
@@ -0,0 +1,14 @@
+/*
+ * Resolve DNS hostnames into valid ip addresses
+ */
+#ifndef __LINUX_FS_NFS_DNS_RESOLVE_H
+#define __LINUX_FS_NFS_DNS_RESOLVE_H
+
+#define NFS_DNS_HOSTNAME_MAXLEN (128)
+
+extern int nfs_dns_resolver_init(void);
+extern void nfs_dns_resolver_destroy(void);
+extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
+ struct sockaddr *sa, size_t salen);
+
+#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 05062329b678..5021b75d2d1e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -328,6 +328,42 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
}
/*
+ * Decide whether a read/modify/write cycle may be more efficient
+ * then a modify/write/read cycle when writing to a page in the
+ * page cache.
+ *
+ * The modify/write/read cycle may occur if a page is read before
+ * being completely filled by the writer. In this situation, the
+ * page must be completely written to stable storage on the server
+ * before it can be refilled by reading in the page from the server.
+ * This can lead to expensive, small, FILE_SYNC mode writes being
+ * done.
+ *
+ * It may be more efficient to read the page first if the file is
+ * open for reading in addition to writing, the page is not marked
+ * as Uptodate, it is not dirty or waiting to be committed,
+ * indicating that it was previously allocated and then modified,
+ * that there were valid bytes of data in that range of the file,
+ * and that the new data won't completely replace the old data in
+ * that range of the file.
+ */
+static int nfs_want_read_modify_write(struct file *file, struct page *page,
+ loff_t pos, unsigned len)
+{
+ unsigned int pglen = nfs_page_length(page);
+ unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned int end = offset + len;
+
+ if ((file->f_mode & FMODE_READ) && /* open for read? */
+ !PageUptodate(page) && /* Uptodate? */
+ !PagePrivate(page) && /* i/o request already? */
+ pglen && /* valid bytes of file? */
+ (end < pglen || offset)) /* replace all valid bytes? */
+ return 1;
+ return 0;
+}
+
+/*
* This does the "real" work of the write. We must allocate and lock the
* page to be sent back to the generic routine, which then copies the
* data from user space.
@@ -340,15 +376,16 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
int ret;
- pgoff_t index;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
- index = pos >> PAGE_CACHE_SHIFT;
+ int once_thru = 0;
dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
+start:
/*
* Prevent starvation issues if someone is doing a consistency
* sync-to-disk
@@ -367,6 +404,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
if (ret) {
unlock_page(page);
page_cache_release(page);
+ } else if (!once_thru &&
+ nfs_want_read_modify_write(file, page, pos, len)) {
+ once_thru = 1;
+ ret = nfs_readpage(file, page);
+ page_cache_release(page);
+ if (!ret)
+ goto start;
}
return ret;
}
@@ -479,6 +523,7 @@ const struct address_space_operations nfs_file_aops = {
.invalidatepage = nfs_invalidate_page,
.releasepage = nfs_release_page,
.direct_IO = nfs_direct_IO,
+ .migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
};
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 86147b0ab2cf..21a84d45916f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static unsigned int fnvhash32(const void *, size_t);
-static struct rpc_pipe_ops idmap_upcall_ops = {
+static const struct rpc_pipe_ops idmap_upcall_ops = {
.upcall = idmap_pipe_upcall,
.downcall = idmap_pipe_downcall,
.destroy_msg = idmap_pipe_destroy_msg,
@@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp)
if (idmap == NULL)
return -ENOMEM;
- idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
- idmap, &idmap_upcall_ops, 0);
+ idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
+ "idmap", idmap, &idmap_upcall_ops, 0);
if (IS_ERR(idmap->idmap_dentry)) {
error = PTR_ERR(idmap->idmap_dentry);
kfree(idmap);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd7938eda6a8..060022b4651c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -46,6 +46,7 @@
#include "iostat.h"
#include "internal.h"
#include "fscache.h"
+#include "dns_resolve.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -286,6 +287,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
/* We can't support update_atime(), since the server will reset it */
inode->i_flags |= S_NOATIME|S_NOCMTIME;
inode->i_mode = fattr->mode;
+ if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
+ && nfs_server_capable(inode, NFS_CAP_MODE))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL;
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -330,20 +336,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
+ else if (nfs_server_capable(inode, NFS_CAP_ATIME))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
+ else if (nfs_server_capable(inode, NFS_CAP_MTIME))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
+ else if (nfs_server_capable(inode, NFS_CAP_CTIME))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL;
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
nfsi->change_attr = fattr->change_attr;
+ else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA;
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
+ else
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA
+ | NFS_INO_REVAL_PAGECACHE;
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
inode->i_nlink = fattr->nlink;
+ else if (nfs_server_capable(inode, NFS_CAP_NLINK))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
+ else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL;
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
+ else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL;
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1145,6 +1177,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
loff_t cur_isize, new_isize;
unsigned long invalid = 0;
unsigned long now = jiffies;
+ unsigned long save_cache_validity;
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
@@ -1171,10 +1204,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfsi->read_cache_jiffies = fattr->time_start;
- if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
- nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ATIME
- | NFS_INO_REVAL_PAGECACHE);
+ save_cache_validity = nfsi->cache_validity;
+ nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ATIME
+ | NFS_INO_REVAL_FORCED
+ | NFS_INO_REVAL_PAGECACHE);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
@@ -1189,7 +1223,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_force_lookup_revalidate(inode);
nfsi->change_attr = fattr->change_attr;
}
- }
+ } else if (server->caps & NFS_CAP_CHANGE_ATTR)
+ invalid |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
/* NFSv2/v3: Check if the mtime agrees */
@@ -1201,7 +1236,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_force_lookup_revalidate(inode);
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
}
- }
+ } else if (server->caps & NFS_CAP_MTIME)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA
+ | NFS_INO_REVAL_PAGECACHE
+ | NFS_INO_REVAL_FORCED);
+
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
@@ -1215,7 +1255,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
}
- }
+ } else if (server->caps & NFS_CAP_CTIME)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1231,30 +1275,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
dprintk("NFS: isize change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
}
- }
+ } else
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_PAGECACHE
+ | NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+ else if (server->caps & NFS_CAP_ATIME)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+ | NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_mode = fattr->mode;
}
- }
+ } else if (server->caps & NFS_CAP_MODE)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
+
if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
if (inode->i_uid != fattr->uid) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_uid = fattr->uid;
}
- }
+ } else if (server->caps & NFS_CAP_OWNER)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
+
if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
if (inode->i_gid != fattr->gid) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_gid = fattr->gid;
}
- }
+ } else if (server->caps & NFS_CAP_OWNER_GROUP)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
if (inode->i_nlink != fattr->nlink) {
@@ -1263,7 +1327,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_DATA;
inode->i_nlink = fattr->nlink;
}
- }
+ } else if (server->caps & NFS_CAP_NLINK)
+ invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/*
@@ -1293,9 +1359,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|| S_ISLNK(inode->i_mode)))
invalid &= ~NFS_INO_INVALID_DATA;
if (!nfs_have_delegation(inode, FMODE_READ) ||
- (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
+ (save_cache_validity & NFS_INO_REVAL_FORCED))
nfsi->cache_validity |= invalid;
- nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
return 0;
out_changed:
@@ -1442,6 +1507,10 @@ static int __init init_nfs_fs(void)
{
int err;
+ err = nfs_dns_resolver_init();
+ if (err < 0)
+ goto out8;
+
err = nfs_fscache_register();
if (err < 0)
goto out7;
@@ -1500,6 +1569,8 @@ out5:
out6:
nfs_fscache_unregister();
out7:
+ nfs_dns_resolver_destroy();
+out8:
return err;
}
@@ -1511,6 +1582,7 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_inodecache();
nfs_destroy_nfspagecache();
nfs_fscache_unregister();
+ nfs_dns_resolver_destroy();
#ifdef CONFIG_PROC_FS
rpc_proc_unregister("nfs");
#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7dd90a6769d0..e21b1bb9972f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -49,6 +49,11 @@ struct nfs_clone_mount {
#define NFS_MAX_SECFLAVORS (12)
/*
+ * Value used if the user did not specify a port value.
+ */
+#define NFS_UNSPEC_PORT (-1)
+
+/*
* In-kernel mount arguments
*/
struct nfs_parsed_mount_data {
@@ -63,6 +68,7 @@ struct nfs_parsed_mount_data {
unsigned int auth_flavor_len;
rpc_authflavor_t auth_flavors[1];
char *client_address;
+ unsigned int version;
unsigned int minorversion;
char *fscache_uniq;
@@ -71,7 +77,7 @@ struct nfs_parsed_mount_data {
size_t addrlen;
char *hostname;
u32 version;
- unsigned short port;
+ int port;
unsigned short protocol;
} mount_server;
@@ -80,7 +86,7 @@ struct nfs_parsed_mount_data {
size_t addrlen;
char *hostname;
char *export_path;
- unsigned short port;
+ int port;
unsigned short protocol;
} nfs_server;
@@ -102,6 +108,7 @@ struct nfs_mount_request {
};
extern int nfs_mount(struct nfs_mount_request *info);
+extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern struct rpc_program nfs_program;
@@ -213,7 +220,6 @@ void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(void *word);
/* super.c */
-void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
extern struct file_system_type nfs_xdev_fs_type;
#ifdef CONFIG_NFS_V4
extern struct file_system_type nfs4_xdev_fs_type;
@@ -248,6 +254,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
/* write.c */
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
+#ifdef CONFIG_MIGRATION
+extern int nfs_migrate_page(struct address_space *,
+ struct page *, struct page *);
+#else
+#define nfs_migrate_page NULL
+#endif
/* nfs4proc.c */
extern int _nfs4_call_sync(struct nfs_server *server,
@@ -368,24 +380,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
return ((unsigned long)len + (unsigned long)base +
PAGE_SIZE - 1) >> PAGE_SHIFT;
}
-
-#define IPV6_SCOPE_DELIMITER '%'
-
-/*
- * Set the port number in an address. Be agnostic about the address
- * family.
- */
-static inline void nfs_set_port(struct sockaddr *sap, unsigned short port)
-{
- struct sockaddr_in *ap = (struct sockaddr_in *)sap;
- struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap;
-
- switch (sap->sa_family) {
- case AF_INET:
- ap->sin_port = htons(port);
- break;
- case AF_INET6:
- ap6->sin6_port = htons(port);
- break;
- }
-}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 38ef9eaec407..0adefc40cc89 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -209,6 +209,71 @@ out_mnt_err:
goto out;
}
+/**
+ * nfs_umount - Notify a server that we have unmounted this export
+ * @info: pointer to umount request arguments
+ *
+ * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
+ * use UDP.
+ */
+void nfs_umount(const struct nfs_mount_request *info)
+{
+ static const struct rpc_timeout nfs_umnt_timeout = {
+ .to_initval = 1 * HZ,
+ .to_maxval = 3 * HZ,
+ .to_retries = 2,
+ };
+ struct rpc_create_args args = {
+ .protocol = IPPROTO_UDP,
+ .address = info->sap,
+ .addrsize = info->salen,
+ .timeout = &nfs_umnt_timeout,
+ .servername = info->hostname,
+ .program = &mnt_program,
+ .version = info->version,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = RPC_CLNT_CREATE_NOPING,
+ };
+ struct mountres result;
+ struct rpc_message msg = {
+ .rpc_argp = info->dirpath,
+ .rpc_resp = &result,
+ };
+ struct rpc_clnt *clnt;
+ int status;
+
+ if (info->noresvport)
+ args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+
+ clnt = rpc_create(&args);
+ if (unlikely(IS_ERR(clnt)))
+ goto out_clnt_err;
+
+ dprintk("NFS: sending UMNT request for %s:%s\n",
+ (info->hostname ? info->hostname : "server"), info->dirpath);
+
+ if (info->version == NFS_MNT3_VERSION)
+ msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
+ else
+ msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
+
+ status = rpc_call_sync(clnt, &msg, 0);
+ rpc_shutdown_client(clnt);
+
+ if (unlikely(status < 0))
+ goto out_call_err;
+
+ return;
+
+out_clnt_err:
+ dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
+ PTR_ERR(clnt));
+ return;
+
+out_call_err:
+ dprintk("NFS: UMNT request failed, status=%d\n", status);
+}
+
/*
* XDR encode/decode functions for MOUNT
*/
@@ -258,7 +323,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
return -EIO;
status = ntohl(*p);
- for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) {
+ for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
if (mnt_errtbl[i].status == status) {
res->errno = mnt_errtbl[i].errno;
return 0;
@@ -309,7 +374,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
return -EIO;
status = ntohl(*p);
- for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) {
+ for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
if (mnt3_errtbl[i].status == status) {
res->errno = mnt3_errtbl[i].errno;
return 0;
@@ -407,6 +472,13 @@ static struct rpc_procinfo mnt_procedures[] = {
.p_statidx = MOUNTPROC_MNT,
.p_name = "MOUNT",
},
+ [MOUNTPROC_UMNT] = {
+ .p_proc = MOUNTPROC_UMNT,
+ .p_encode = (kxdrproc_t)mnt_enc_dirpath,
+ .p_arglen = MNT_enc_dirpath_sz,
+ .p_statidx = MOUNTPROC_UMNT,
+ .p_name = "UMOUNT",
+ },
};
static struct rpc_procinfo mnt3_procedures[] = {
@@ -419,6 +491,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
.p_statidx = MOUNTPROC3_MNT,
.p_name = "MOUNT",
},
+ [MOUNTPROC3_UMNT] = {
+ .p_proc = MOUNTPROC3_UMNT,
+ .p_encode = (kxdrproc_t)mnt_enc_dirpath,
+ .p_arglen = MNT_enc_dirpath_sz,
+ .p_statidx = MOUNTPROC3_UMNT,
+ .p_name = "UMOUNT",
+ },
};
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0cc5ce0edfe..ee6a13f05443 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -299,7 +299,6 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
/*
* Create a regular file.
- * For now, we don't implement O_EXCL.
*/
static int
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2a2a0a7143ad..2636c26d56fa 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -17,6 +17,7 @@
#include <linux/inet.h>
#include "internal.h"
#include "nfs4_fs.h"
+#include "dns_resolve.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -95,6 +96,20 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
return 0;
}
+static size_t nfs_parse_server_name(char *string, size_t len,
+ struct sockaddr *sa, size_t salen)
+{
+ ssize_t ret;
+
+ ret = rpc_pton(string, len, sa, salen);
+ if (ret == 0) {
+ ret = nfs_dns_resolve_name(string, len, sa, salen);
+ if (ret < 0)
+ ret = 0;
+ }
+ return ret;
+}
+
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
char *page, char *page2,
const struct nfs4_fs_location *location)
@@ -121,11 +136,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
continue;
- nfs_parse_ip_address(buf->data, buf->len,
- mountdata->addr, &mountdata->addrlen);
- if (mountdata->addr->sa_family == AF_UNSPEC)
+ mountdata->addrlen = nfs_parse_server_name(buf->data,
+ buf->len,
+ mountdata->addr, mountdata->addrlen);
+ if (mountdata->addrlen == 0)
continue;
- nfs_set_port(mountdata->addr, NFS_PORT);
+ rpc_set_port(mountdata->addr, NFS_PORT);
memcpy(page2, buf->data, buf->len);
page2[buf->len] = '\0';
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6917311f201c..be6544aef41f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -61,6 +61,8 @@
#define NFS4_POLL_RETRY_MIN (HZ/10)
#define NFS4_POLL_RETRY_MAX (15*HZ)
+#define NFS4_MAX_LOOP_ON_RECOVER (10)
+
struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
@@ -426,17 +428,19 @@ out:
static int nfs4_recover_session(struct nfs4_session *session)
{
struct nfs_client *clp = session->clp;
+ unsigned int loop;
int ret;
- for (;;) {
+ for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
ret = nfs4_wait_clnt_recover(clp);
if (ret != 0)
- return ret;
+ break;
if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
break;
nfs4_schedule_state_manager(clp);
+ ret = -EIO;
}
- return 0;
+ return ret;
}
static int nfs41_setup_sequence(struct nfs4_session *session,
@@ -1444,18 +1448,20 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
static int nfs4_recover_expired_lease(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
+ unsigned int loop;
int ret;
- for (;;) {
+ for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
ret = nfs4_wait_clnt_recover(clp);
if (ret != 0)
- return ret;
+ break;
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
nfs4_schedule_state_recovery(clp);
+ ret = -EIO;
}
- return 0;
+ return ret;
}
/*
@@ -1997,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
status = nfs4_call_sync(server, &msg, &args, &res, 0);
if (status == 0) {
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
+ server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
+ NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
+ NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
+ NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
+ NFS_CAP_CTIME|NFS_CAP_MTIME);
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
server->caps |= NFS_CAP_ACLS;
if (res.has_links != 0)
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
+ if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
+ server->caps |= NFS_CAP_FILEID;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
+ server->caps |= NFS_CAP_MODE;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
+ server->caps |= NFS_CAP_NLINK;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
+ server->caps |= NFS_CAP_OWNER;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
+ server->caps |= NFS_CAP_OWNER_GROUP;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
+ server->caps |= NFS_CAP_ATIME;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
+ server->caps |= NFS_CAP_CTIME;
+ if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
+ server->caps |= NFS_CAP_MTIME;
+
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 65ca8c18476f..1434080aefeb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1250,8 +1250,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
continue;
}
/* Initialize or reset the session */
- if (nfs4_has_session(clp) &&
- test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
+ if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
+ && nfs4_has_session(clp)) {
if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
status = nfs4_initialize_session(clp);
else
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 617273e7d47f..cfc30d362f94 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -702,29 +702,12 @@ struct compound_hdr {
u32 minorversion;
};
-/*
- * START OF "GENERIC" ENCODE ROUTINES.
- * These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
-#define WRITE32(n) *p++ = htonl(n)
-#define WRITE64(n) do { \
- *p++ = htonl((uint32_t)((n) >> 32)); \
- *p++ = htonl((uint32_t)(n)); \
-} while (0)
-#define WRITEMEM(ptr,nbytes) do { \
- p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
-} while (0)
-
-#define RESERVE_SPACE(nbytes) do { \
- p = xdr_reserve_space(xdr, nbytes); \
- BUG_ON(!p); \
-} while (0)
+static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
+{
+ __be32 *p = xdr_reserve_space(xdr, nbytes);
+ BUG_ON(!p);
+ return p;
+}
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
- RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
- WRITE32(hdr->taglen);
- WRITEMEM(hdr->tag, hdr->taglen);
- WRITE32(hdr->minorversion);
+ p = reserve_space(xdr, 4 + hdr->taglen + 8);
+ p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
+ *p++ = cpu_to_be32(hdr->minorversion);
hdr->nops_p = p;
- WRITE32(hdr->nops);
+ *p = cpu_to_be32(hdr->nops);
}
static void encode_nops(struct compound_hdr *hdr)
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
len += 16;
else if (iap->ia_valid & ATTR_MTIME)
len += 4;
- RESERVE_SPACE(len);
+ p = reserve_space(xdr, len);
/*
* We write the bitmap length now, but leave the bitmap and the attribute
* buffer length to be backfilled at the end of this routine.
*/
- WRITE32(2);
+ *p++ = cpu_to_be32(2);
q = p;
p += 3;
if (iap->ia_valid & ATTR_SIZE) {
bmval0 |= FATTR4_WORD0_SIZE;
- WRITE64(iap->ia_size);
+ p = xdr_encode_hyper(p, iap->ia_size);
}
if (iap->ia_valid & ATTR_MODE) {
bmval1 |= FATTR4_WORD1_MODE;
- WRITE32(iap->ia_mode & S_IALLUGO);
+ *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
}
if (iap->ia_valid & ATTR_UID) {
bmval1 |= FATTR4_WORD1_OWNER;
- WRITE32(owner_namelen);
- WRITEMEM(owner_name, owner_namelen);
+ p = xdr_encode_opaque(p, owner_name, owner_namelen);
}
if (iap->ia_valid & ATTR_GID) {
bmval1 |= FATTR4_WORD1_OWNER_GROUP;
- WRITE32(owner_grouplen);
- WRITEMEM(owner_group, owner_grouplen);
+ p = xdr_encode_opaque(p, owner_group, owner_grouplen);
}
if (iap->ia_valid & ATTR_ATIME_SET) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
- WRITE32(NFS4_SET_TO_CLIENT_TIME);
- WRITE32(0);
- WRITE32(iap->ia_mtime.tv_sec);
- WRITE32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
}
else if (iap->ia_valid & ATTR_ATIME) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
- WRITE32(NFS4_SET_TO_SERVER_TIME);
+ *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
if (iap->ia_valid & ATTR_MTIME_SET) {
bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
- WRITE32(NFS4_SET_TO_CLIENT_TIME);
- WRITE32(0);
- WRITE32(iap->ia_mtime.tv_sec);
- WRITE32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
}
else if (iap->ia_valid & ATTR_MTIME) {
bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
- WRITE32(NFS4_SET_TO_SERVER_TIME);
+ *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
/*
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
len = (char *)p - (char *)q - 12;
*q++ = htonl(bmval0);
*q++ = htonl(bmval1);
- *q++ = htonl(len);
+ *q = htonl(len);
/* out: */
}
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
{
__be32 *p;
- RESERVE_SPACE(8);
- WRITE32(OP_ACCESS);
- WRITE32(access);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_ACCESS);
+ *p = cpu_to_be32(access);
hdr->nops++;
hdr->replen += decode_access_maxsz;
}
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
{
__be32 *p;
- RESERVE_SPACE(8+NFS4_STATEID_SIZE);
- WRITE32(OP_CLOSE);
- WRITE32(arg->seqid->sequence->counter);
- WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_CLOSE);
+ *p++ = cpu_to_be32(arg->seqid->sequence->counter);
+ xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_close_maxsz;
}
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
{
__be32 *p;
- RESERVE_SPACE(16);
- WRITE32(OP_COMMIT);
- WRITE64(args->offset);
- WRITE32(args->count);
+ p = reserve_space(xdr, 16);
+ *p++ = cpu_to_be32(OP_COMMIT);
+ p = xdr_encode_hyper(p, args->offset);
+ *p = cpu_to_be32(args->count);
hdr->nops++;
hdr->replen += decode_commit_maxsz;
}
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
{
__be32 *p;
- RESERVE_SPACE(8);
- WRITE32(OP_CREATE);
- WRITE32(create->ftype);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_CREATE);
+ *p = cpu_to_be32(create->ftype);
switch (create->ftype) {
case NF4LNK:
- RESERVE_SPACE(4);
- WRITE32(create->u.symlink.len);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(create->u.symlink.len);
xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
break;
case NF4BLK: case NF4CHR:
- RESERVE_SPACE(8);
- WRITE32(create->u.device.specdata1);
- WRITE32(create->u.device.specdata2);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(create->u.device.specdata1);
+ *p = cpu_to_be32(create->u.device.specdata2);
break;
default:
break;
}
- RESERVE_SPACE(4 + create->name->len);
- WRITE32(create->name->len);
- WRITEMEM(create->name->name, create->name->len);
+ encode_string(xdr, create->name->len, create->name->name);
hdr->nops++;
hdr->replen += decode_create_maxsz;
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
{
__be32 *p;
- RESERVE_SPACE(12);
- WRITE32(OP_GETATTR);
- WRITE32(1);
- WRITE32(bitmap);
+ p = reserve_space(xdr, 12);
+ *p++ = cpu_to_be32(OP_GETATTR);
+ *p++ = cpu_to_be32(1);
+ *p = cpu_to_be32(bitmap);
hdr->nops++;
hdr->replen += decode_getattr_maxsz;
}
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
{
__be32 *p;
- RESERVE_SPACE(16);
- WRITE32(OP_GETATTR);
- WRITE32(2);
- WRITE32(bm0);
- WRITE32(bm1);
+ p = reserve_space(xdr, 16);
+ *p++ = cpu_to_be32(OP_GETATTR);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(bm0);
+ *p = cpu_to_be32(bm1);
hdr->nops++;
hdr->replen += decode_getattr_maxsz;
}
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_GETFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_GETFH);
hdr->nops++;
hdr->replen += decode_getfh_maxsz;
}
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
{
__be32 *p;
- RESERVE_SPACE(8 + name->len);
- WRITE32(OP_LINK);
- WRITE32(name->len);
- WRITEMEM(name->name, name->len);
+ p = reserve_space(xdr, 8 + name->len);
+ *p++ = cpu_to_be32(OP_LINK);
+ xdr_encode_opaque(p, name->name, name->len);
hdr->nops++;
hdr->replen += decode_link_maxsz;
}
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
{
__be32 *p;
- RESERVE_SPACE(32);
- WRITE32(OP_LOCK);
- WRITE32(nfs4_lock_type(args->fl, args->block));
- WRITE32(args->reclaim);
- WRITE64(args->fl->fl_start);
- WRITE64(nfs4_lock_length(args->fl));
- WRITE32(args->new_lock_owner);
+ p = reserve_space(xdr, 32);
+ *p++ = cpu_to_be32(OP_LOCK);
+ *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
+ *p++ = cpu_to_be32(args->reclaim);
+ p = xdr_encode_hyper(p, args->fl->fl_start);
+ p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ *p = cpu_to_be32(args->new_lock_owner);
if (args->new_lock_owner){
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
- WRITE32(args->open_seqid->sequence->counter);
- WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
- WRITE32(args->lock_seqid->sequence->counter);
- WRITE64(args->lock_owner.clientid);
- WRITE32(16);
- WRITEMEM("lock id:", 8);
- WRITE64(args->lock_owner.id);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
+ *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+ p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+ p = xdr_encode_hyper(p, args->lock_owner.clientid);
+ *p++ = cpu_to_be32(16);
+ p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+ xdr_encode_hyper(p, args->lock_owner.id);
}
else {
- RESERVE_SPACE(NFS4_STATEID_SIZE+4);
- WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE);
- WRITE32(args->lock_seqid->sequence->counter);
+ p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+ p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(args->lock_seqid->sequence->counter);
}
hdr->nops++;
hdr->replen += decode_lock_maxsz;
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
{
__be32 *p;
- RESERVE_SPACE(52);
- WRITE32(OP_LOCKT);
- WRITE32(nfs4_lock_type(args->fl, 0));
- WRITE64(args->fl->fl_start);
- WRITE64(nfs4_lock_length(args->fl));
- WRITE64(args->lock_owner.clientid);
- WRITE32(16);
- WRITEMEM("lock id:", 8);
- WRITE64(args->lock_owner.id);
+ p = reserve_space(xdr, 52);
+ *p++ = cpu_to_be32(OP_LOCKT);
+ *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ p = xdr_encode_hyper(p, args->fl->fl_start);
+ p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ p = xdr_encode_hyper(p, args->lock_owner.clientid);
+ *p++ = cpu_to_be32(16);
+ p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+ xdr_encode_hyper(p, args->lock_owner.id);
hdr->nops++;
hdr->replen += decode_lockt_maxsz;
}
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
{
__be32 *p;
- RESERVE_SPACE(12+NFS4_STATEID_SIZE+16);
- WRITE32(OP_LOCKU);
- WRITE32(nfs4_lock_type(args->fl, 0));
- WRITE32(args->seqid->sequence->counter);
- WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE);
- WRITE64(args->fl->fl_start);
- WRITE64(nfs4_lock_length(args->fl));
+ p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
+ *p++ = cpu_to_be32(OP_LOCKU);
+ *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ *p++ = cpu_to_be32(args->seqid->sequence->counter);
+ p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_hyper(p, args->fl->fl_start);
+ xdr_encode_hyper(p, nfs4_lock_length(args->fl));
hdr->nops++;
hdr->replen += decode_locku_maxsz;
}
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
int len = name->len;
__be32 *p;
- RESERVE_SPACE(8 + len);
- WRITE32(OP_LOOKUP);
- WRITE32(len);
- WRITEMEM(name->name, len);
+ p = reserve_space(xdr, 8 + len);
+ *p++ = cpu_to_be32(OP_LOOKUP);
+ xdr_encode_opaque(p, name->name, len);
hdr->nops++;
hdr->replen += decode_lookup_maxsz;
}
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
{
__be32 *p;
- RESERVE_SPACE(8);
+ p = reserve_space(xdr, 8);
switch (fmode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ:
- WRITE32(NFS4_SHARE_ACCESS_READ);
+ *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ);
break;
case FMODE_WRITE:
- WRITE32(NFS4_SHARE_ACCESS_WRITE);
+ *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE);
break;
case FMODE_READ|FMODE_WRITE:
- WRITE32(NFS4_SHARE_ACCESS_BOTH);
+ *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH);
break;
default:
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
}
- WRITE32(0); /* for linux, share_deny = 0 always */
+ *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */
}
static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
* opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
* owner 4 = 32
*/
- RESERVE_SPACE(8);
- WRITE32(OP_OPEN);
- WRITE32(arg->seqid->sequence->counter);
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_OPEN);
+ *p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
- RESERVE_SPACE(28);
- WRITE64(arg->clientid);
- WRITE32(16);
- WRITEMEM("open id:", 8);
- WRITE64(arg->id);
+ p = reserve_space(xdr, 28);
+ p = xdr_encode_hyper(p, arg->clientid);
+ *p++ = cpu_to_be32(16);
+ p = xdr_encode_opaque_fixed(p, "open id:", 8);
+ xdr_encode_hyper(p, arg->id);
}
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
{
__be32 *p;
- RESERVE_SPACE(4);
+ p = reserve_space(xdr, 4);
switch(arg->open_flags & O_EXCL) {
case 0:
- WRITE32(NFS4_CREATE_UNCHECKED);
+ *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
encode_attrs(xdr, arg->u.attrs, arg->server);
break;
default:
- WRITE32(NFS4_CREATE_EXCLUSIVE);
+ *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
encode_nfs4_verifier(xdr, &arg->u.verifier);
}
}
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
{
__be32 *p;
- RESERVE_SPACE(4);
+ p = reserve_space(xdr, 4);
switch (arg->open_flags & O_CREAT) {
case 0:
- WRITE32(NFS4_OPEN_NOCREATE);
+ *p = cpu_to_be32(NFS4_OPEN_NOCREATE);
break;
default:
BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
- WRITE32(NFS4_OPEN_CREATE);
+ *p = cpu_to_be32(NFS4_OPEN_CREATE);
encode_createmode(xdr, arg);
}
}
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega
{
__be32 *p;
- RESERVE_SPACE(4);
+ p = reserve_space(xdr, 4);
switch (delegation_type) {
case 0:
- WRITE32(NFS4_OPEN_DELEGATE_NONE);
+ *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
break;
case FMODE_READ:
- WRITE32(NFS4_OPEN_DELEGATE_READ);
+ *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
break;
case FMODE_WRITE|FMODE_READ:
- WRITE32(NFS4_OPEN_DELEGATE_WRITE);
+ *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
break;
default:
BUG();
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(NFS4_OPEN_CLAIM_NULL);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL);
encode_string(xdr, name->len, name->name);
}
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(NFS4_OPEN_CLAIM_PREVIOUS);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS);
encode_delegation_type(xdr, type);
}
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
- WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
- WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+ xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
encode_string(xdr, name->len, name->name);
}
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+4);
- WRITE32(OP_OPEN_CONFIRM);
- WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
- WRITE32(arg->seqid->sequence->counter);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(arg->seqid->sequence->counter);
hdr->nops++;
hdr->replen += decode_open_confirm_maxsz;
}
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE+4);
- WRITE32(OP_OPEN_DOWNGRADE);
- WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
- WRITE32(arg->seqid->sequence->counter);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
hdr->nops++;
hdr->replen += decode_open_downgrade_maxsz;
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
int len = fh->size;
__be32 *p;
- RESERVE_SPACE(8 + len);
- WRITE32(OP_PUTFH);
- WRITE32(len);
- WRITEMEM(fh->data, len);
+ p = reserve_space(xdr, 8 + len);
+ *p++ = cpu_to_be32(OP_PUTFH);
+ xdr_encode_opaque(p, fh->data, len);
hdr->nops++;
hdr->replen += decode_putfh_maxsz;
}
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_PUTROOTFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_PUTROOTFH);
hdr->nops++;
hdr->replen += decode_putrootfh_maxsz;
}
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
nfs4_stateid stateid;
__be32 *p;
- RESERVE_SPACE(NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
- WRITEMEM(stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
} else
- WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
}
static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_READ);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_READ);
encode_stateid(xdr, args->context);
- RESERVE_SPACE(12);
- WRITE64(args->offset);
- WRITE32(args->count);
+ p = reserve_space(xdr, 12);
+ p = xdr_encode_hyper(p, args->offset);
+ *p = cpu_to_be32(args->count);
hdr->nops++;
hdr->replen += decode_read_maxsz;
}
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
};
__be32 *p;
- RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20);
- WRITE32(OP_READDIR);
- WRITE64(readdir->cookie);
- WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE);
- WRITE32(readdir->count >> 1); /* We're not doing readdirplus */
- WRITE32(readdir->count);
- WRITE32(2);
+ p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
+ *p++ = cpu_to_be32(OP_READDIR);
+ p = xdr_encode_hyper(p, readdir->cookie);
+ p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
+ *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */
+ *p++ = cpu_to_be32(readdir->count);
+ *p++ = cpu_to_be32(2);
/* Switch to mounted_on_fileid if the server supports it */
if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
attrs[0] &= ~FATTR4_WORD0_FILEID;
else
attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
- WRITE32(attrs[0] & readdir->bitmask[0]);
- WRITE32(attrs[1] & readdir->bitmask[1]);
+ *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
+ *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
hdr->nops++;
hdr->replen += decode_readdir_maxsz;
dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_READLINK);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_READLINK);
hdr->nops++;
hdr->replen += decode_readlink_maxsz;
}
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
{
__be32 *p;
- RESERVE_SPACE(8 + name->len);
- WRITE32(OP_REMOVE);
- WRITE32(name->len);
- WRITEMEM(name->name, name->len);
+ p = reserve_space(xdr, 8 + name->len);
+ *p++ = cpu_to_be32(OP_REMOVE);
+ xdr_encode_opaque(p, name->name, name->len);
hdr->nops++;
hdr->replen += decode_remove_maxsz;
}
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
{
__be32 *p;
- RESERVE_SPACE(8 + oldname->len);
- WRITE32(OP_RENAME);
- WRITE32(oldname->len);
- WRITEMEM(oldname->name, oldname->len);
-
- RESERVE_SPACE(4 + newname->len);
- WRITE32(newname->len);
- WRITEMEM(newname->name, newname->len);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_RENAME);
+ encode_string(xdr, oldname->len, oldname->name);
+ encode_string(xdr, newname->len, newname->name);
hdr->nops++;
hdr->replen += decode_rename_maxsz;
}
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
{
__be32 *p;
- RESERVE_SPACE(12);
- WRITE32(OP_RENEW);
- WRITE64(client_stateid->cl_clientid);
+ p = reserve_space(xdr, 12);
+ *p++ = cpu_to_be32(OP_RENEW);
+ xdr_encode_hyper(p, client_stateid->cl_clientid);
hdr->nops++;
hdr->replen += decode_renew_maxsz;
}
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_RESTOREFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_RESTOREFH);
hdr->nops++;
hdr->replen += decode_restorefh_maxsz;
}
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
- WRITE32(OP_SETATTR);
- WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
- RESERVE_SPACE(2*4);
- WRITE32(1);
- WRITE32(FATTR4_WORD0_ACL);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_SETATTR);
+ xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 2*4);
+ *p++ = cpu_to_be32(1);
+ *p = cpu_to_be32(FATTR4_WORD0_ACL);
if (arg->acl_len % 4)
return -EINVAL;
- RESERVE_SPACE(4);
- WRITE32(arg->acl_len);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
hdr->nops++;
hdr->replen += decode_setacl_maxsz;
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_SAVEFH);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_SAVEFH);
hdr->nops++;
hdr->replen += decode_savefh_maxsz;
}
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
- WRITE32(OP_SETATTR);
- WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_SETATTR);
+ xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_setattr_maxsz;
encode_attrs(xdr, arg->iap, server);
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
{
__be32 *p;
- RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE);
- WRITE32(OP_SETCLIENTID);
- WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
+ p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
+ *p++ = cpu_to_be32(OP_SETCLIENTID);
+ xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
- RESERVE_SPACE(4);
- WRITE32(setclientid->sc_prog);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(setclientid->sc_prog);
encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
- RESERVE_SPACE(4);
- WRITE32(setclientid->sc_cb_ident);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(setclientid->sc_cb_ident);
hdr->nops++;
hdr->replen += decode_setclientid_maxsz;
}
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
{
__be32 *p;
- RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
- WRITE32(OP_SETCLIENTID_CONFIRM);
- WRITE64(client_state->cl_clientid);
- WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
+ p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
+ *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
+ p = xdr_encode_hyper(p, client_state->cl_clientid);
+ xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
hdr->nops++;
hdr->replen += decode_setclientid_confirm_maxsz;
}
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
{
__be32 *p;
- RESERVE_SPACE(4);
- WRITE32(OP_WRITE);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_WRITE);
encode_stateid(xdr, args->context);
- RESERVE_SPACE(16);
- WRITE64(args->offset);
- WRITE32(args->stable);
- WRITE32(args->count);
+ p = reserve_space(xdr, 16);
+ p = xdr_encode_hyper(p, args->offset);
+ *p++ = cpu_to_be32(args->stable);
+ *p = cpu_to_be32(args->count);
xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
hdr->nops++;
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
{
__be32 *p;
- RESERVE_SPACE(4+NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
- WRITE32(OP_DELEGRETURN);
- WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_DELEGRETURN);
+ xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_delegreturn_maxsz;
}
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr,
{
__be32 *p;
- RESERVE_SPACE(4 + sizeof(args->verifier->data));
- WRITE32(OP_EXCHANGE_ID);
- WRITEMEM(args->verifier->data, sizeof(args->verifier->data));
+ p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
+ *p++ = cpu_to_be32(OP_EXCHANGE_ID);
+ xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
encode_string(xdr, args->id_len, args->id);
- RESERVE_SPACE(12);
- WRITE32(args->flags);
- WRITE32(0); /* zero length state_protect4_a */
- WRITE32(0); /* zero length implementation id array */
+ p = reserve_space(xdr, 12);
+ *p++ = cpu_to_be32(args->flags);
+ *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
+ *p = cpu_to_be32(0); /* zero length implementation id array */
hdr->nops++;
hdr->replen += decode_exchange_id_maxsz;
}
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr,
uint32_t len;
struct nfs_client *clp = args->client;
- RESERVE_SPACE(4);
- WRITE32(OP_CREATE_SESSION);
-
- RESERVE_SPACE(8);
- WRITE64(clp->cl_ex_clid);
+ len = scnprintf(machine_name, sizeof(machine_name), "%s",
+ clp->cl_ipaddr);
- RESERVE_SPACE(8);
- WRITE32(clp->cl_seqid); /*Sequence id */
- WRITE32(args->flags); /*flags */
+ p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
+ *p++ = cpu_to_be32(OP_CREATE_SESSION);
+ p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
+ *p++ = cpu_to_be32(args->flags); /*flags */
- RESERVE_SPACE(2*28); /* 2 channel_attrs */
/* Fore Channel */
- WRITE32(args->fc_attrs.headerpadsz); /* header padding size */
- WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */
- WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */
- WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
- WRITE32(args->fc_attrs.max_ops); /* max operations */
- WRITE32(args->fc_attrs.max_reqs); /* max requests */
- WRITE32(0); /* rdmachannel_attrs */
+ *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
+ *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
+ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
+ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
+ *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
+ *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
+ *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
/* Back Channel */
- WRITE32(args->fc_attrs.headerpadsz); /* header padding size */
- WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */
- WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */
- WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
- WRITE32(args->bc_attrs.max_ops); /* max operations */
- WRITE32(args->bc_attrs.max_reqs); /* max requests */
- WRITE32(0); /* rdmachannel_attrs */
-
- RESERVE_SPACE(4);
- WRITE32(args->cb_program); /* cb_program */
-
- RESERVE_SPACE(4); /* # of security flavors */
- WRITE32(1);
-
- RESERVE_SPACE(4);
- WRITE32(RPC_AUTH_UNIX); /* auth_sys */
+ *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
+ *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
+ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
+ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
+ *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */
+ *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */
+ *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
+
+ *p++ = cpu_to_be32(args->cb_program); /* cb_program */
+ *p++ = cpu_to_be32(1);
+ *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
/* authsys_parms rfc1831 */
- RESERVE_SPACE(4);
- WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
- len = scnprintf(machine_name, sizeof(machine_name), "%s",
- clp->cl_ipaddr);
- RESERVE_SPACE(16 + len);
- WRITE32(len);
- WRITEMEM(machine_name, len);
- WRITE32(0); /* UID */
- WRITE32(0); /* GID */
- WRITE32(0); /* No more gids */
+ *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
+ p = xdr_encode_opaque(p, machine_name, len);
+ *p++ = cpu_to_be32(0); /* UID */
+ *p++ = cpu_to_be32(0); /* GID */
+ *p = cpu_to_be32(0); /* No more gids */
hdr->nops++;
hdr->replen += decode_create_session_maxsz;
}
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr,
struct compound_hdr *hdr)
{
__be32 *p;
- RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN);
- WRITE32(OP_DESTROY_SESSION);
- WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+ p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(OP_DESTROY_SESSION);
+ xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
hdr->nops++;
hdr->replen += decode_destroy_session_maxsz;
}
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr,
WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
slot = tp->slots + args->sa_slotid;
- RESERVE_SPACE(4);
- WRITE32(OP_SEQUENCE);
+ p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
+ *p++ = cpu_to_be32(OP_SEQUENCE);
/*
* Sessionid + seqid + slotid + max slotid + cache_this
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr,
((u32 *)session->sess_id.data)[3],
slot->seq_nr, args->sa_slotid,
tp->highest_used_slotid, args->sa_cache_this);
- RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16);
- WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(slot->seq_nr);
- WRITE32(args->sa_slotid);
- WRITE32(tp->highest_used_slotid);
- WRITE32(args->sa_cache_this);
+ p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(slot->seq_nr);
+ *p++ = cpu_to_be32(args->sa_slotid);
+ *p++ = cpu_to_be32(tp->highest_used_slotid);
+ *p = cpu_to_be32(args->sa_cache_this);
hdr->nops++;
hdr->replen += decode_sequence_maxsz;
#endif /* CONFIG_NFS_V4_1 */
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
}
#endif /* CONFIG_NFS_V4_1 */
-/*
- * START OF "GENERIC" DECODE ROUTINES.
- * These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
-#define READ32(x) (x) = ntohl(*p++)
-#define READ64(x) do { \
- (x) = (u64)ntohl(*p++) << 32; \
- (x) |= ntohl(*p++); \
-} while (0)
-#define READTIME(x) do { \
- p++; \
- (x.tv_sec) = ntohl(*p++); \
- (x.tv_nsec) = ntohl(*p++); \
-} while (0)
-#define COPYMEM(x,nbytes) do { \
- memcpy((x), p, nbytes); \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-
-#define READ_BUF(nbytes) do { \
- p = xdr_inline_decode(xdr, nbytes); \
- if (unlikely(!p)) { \
- dprintk("nfs: %s: prematurely hit end of receive" \
- " buffer\n", __func__); \
- dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
- __func__, xdr->p, nbytes, xdr->end); \
- return -EIO; \
- } \
-} while (0)
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+ dprintk("nfs: %s: prematurely hit end of receive buffer. "
+ "Remaining buffer length is %tu words.\n",
+ func, xdr->end - xdr->p);
+}
static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
{
__be32 *p;
- READ_BUF(4);
- READ32(*len);
- READ_BUF(*len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, *len);
+ if (unlikely(!p))
+ goto out_overflow;
*string = (char *)p;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
__be32 *p;
- READ_BUF(8);
- READ32(hdr->status);
- READ32(hdr->taglen);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ hdr->status = be32_to_cpup(p++);
+ hdr->taglen = be32_to_cpup(p);
- READ_BUF(hdr->taglen + 4);
+ p = xdr_inline_decode(xdr, hdr->taglen + 4);
+ if (unlikely(!p))
+ goto out_overflow;
hdr->tag = (char *)p;
p += XDR_QUADLEN(hdr->taglen);
- READ32(hdr->nops);
+ hdr->nops = be32_to_cpup(p);
if (unlikely(hdr->nops < 1))
return nfs4_stat_to_errno(hdr->status);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
uint32_t opnum;
int32_t nfserr;
- READ_BUF(8);
- READ32(opnum);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ opnum = be32_to_cpup(p++);
if (opnum != expected) {
dprintk("nfs: Server returned operation"
" %d but we issued a request for %d\n",
opnum, expected);
return -EIO;
}
- READ32(nfserr);
+ nfserr = be32_to_cpup(p);
if (nfserr != NFS_OK)
return nfs4_stat_to_errno(nfserr);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
/* Dummy routine */
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
unsigned int strlen;
char *str;
- READ_BUF(12);
- return decode_opaque_inline(xdr, &strlen, &str);
+ p = xdr_inline_decode(xdr, 12);
+ if (likely(p))
+ return decode_opaque_inline(xdr, &strlen, &str);
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
uint32_t bmlen;
__be32 *p;
- READ_BUF(4);
- READ32(bmlen);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ bmlen = be32_to_cpup(p);
bitmap[0] = bitmap[1] = 0;
- READ_BUF((bmlen << 2));
+ p = xdr_inline_decode(xdr, (bmlen << 2));
+ if (unlikely(!p))
+ goto out_overflow;
if (bmlen > 0) {
- READ32(bitmap[0]);
+ bitmap[0] = be32_to_cpup(p++);
if (bmlen > 1)
- READ32(bitmap[1]);
+ bitmap[1] = be32_to_cpup(p);
}
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
{
__be32 *p;
- READ_BUF(4);
- READ32(*attrlen);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *attrlen = be32_to_cpup(p);
*savep = xdr->p;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
- READ_BUF(4);
- READ32(*type);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *type = be32_to_cpup(p);
if (*type < NF4REG || *type > NF4NAMEDATTR) {
dprintk("%s: bad type %d\n", __func__, *type);
return -EIO;
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
}
dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
- READ_BUF(8);
- READ64(*change);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, change);
bitmap[0] &= ~FATTR4_WORD0_CHANGE;
ret = NFS_ATTR_FATTR_CHANGE;
}
dprintk("%s: change attribute=%Lu\n", __func__,
(unsigned long long)*change);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
- READ_BUF(8);
- READ64(*size);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, size);
bitmap[0] &= ~FATTR4_WORD0_SIZE;
ret = NFS_ATTR_FATTR_SIZE;
}
dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
}
dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
}
dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
- READ_BUF(16);
- READ64(fsid->major);
- READ64(fsid->minor);
+ p = xdr_inline_decode(xdr, 16);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &fsid->major);
+ xdr_decode_hyper(p, &fsid->minor);
bitmap[0] &= ~FATTR4_WORD0_FSID;
ret = NFS_ATTR_FATTR_FSID;
}
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
(unsigned long long)fsid->major,
(unsigned long long)fsid->minor);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
}
dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
- READ_BUF(4);
- READ32(*res);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
}
dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
- READ_BUF(8);
- READ64(*fileid);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, fileid);
bitmap[0] &= ~FATTR4_WORD0_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
- READ_BUF(8);
- READ64(*fileid);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, fileid);
bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
}
dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
}
dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
}
dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
__be32 *p;
int status = 0;
- READ_BUF(4);
- READ32(n);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ n = be32_to_cpup(p);
if (n == 0)
goto root_path;
dprintk("path ");
@@ -2873,6 +2902,9 @@ out_eio:
dprintk(" status %d", status);
status = -EIO;
goto out;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
status = decode_pathname(xdr, &res->fs_path);
if (unlikely(status != 0))
goto out;
- READ_BUF(4);
- READ32(n);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ n = be32_to_cpup(p);
if (n <= 0)
goto out_eio;
res->nlocations = 0;
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
u32 m;
struct nfs4_fs_location *loc = &res->locations[res->nlocations];
- READ_BUF(4);
- READ32(m);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ m = be32_to_cpup(p);
loc->nservers = 0;
dprintk("%s: servers ", __func__);
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
out:
dprintk("%s: fs_locations done, error = %d\n", __func__, status);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
out_eio:
status = -EIO;
goto out;
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
}
dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
- READ_BUF(4);
- READ32(*maxlink);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *maxlink = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
}
dprintk("%s: maxlink=%u\n", __func__, *maxlink);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
- READ_BUF(4);
- READ32(*maxname);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *maxname = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
}
dprintk("%s: maxname=%u\n", __func__, *maxname);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
uint64_t maxread;
- READ_BUF(8);
- READ64(maxread);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, &maxread);
if (maxread > 0x7FFFFFFF)
maxread = 0x7FFFFFFF;
*res = (uint32_t)maxread;
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
}
dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
uint64_t maxwrite;
- READ_BUF(8);
- READ64(maxwrite);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, &maxwrite);
if (maxwrite > 0x7FFFFFFF)
maxwrite = 0x7FFFFFFF;
*res = (uint32_t)maxwrite;
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
}
dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
- READ_BUF(4);
- READ32(tmp);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ tmp = be32_to_cpup(p);
*mode = tmp & ~S_IFMT;
bitmap[1] &= ~FATTR4_WORD1_MODE;
ret = NFS_ATTR_FATTR_MODE;
}
dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3066,16 +3134,22 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
- READ_BUF(4);
- READ32(*nlink);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *nlink = be32_to_cpup(p);
bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
ret = NFS_ATTR_FATTR_NLINK;
}
dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs_client *clp, uint32_t *uid, int may_sleep)
{
uint32_t len;
__be32 *p;
@@ -3085,10 +3159,16 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
- if (len < XDR_MAX_NETOBJ) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
+ if (!may_sleep) {
+ /* do nothing */
+ } else if (len < XDR_MAX_NETOBJ) {
if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
ret = NFS_ATTR_FATTR_OWNER;
else
@@ -3101,9 +3181,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
}
dprintk("%s: uid=%d\n", __func__, (int)*uid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs_client *clp, uint32_t *gid, int may_sleep)
{
uint32_t len;
__be32 *p;
@@ -3113,10 +3197,16 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
- if (len < XDR_MAX_NETOBJ) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
+ if (!may_sleep) {
+ /* do nothing */
+ } else if (len < XDR_MAX_NETOBJ) {
if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
ret = NFS_ATTR_FATTR_GROUP;
else
@@ -3129,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
}
dprintk("%s: gid=%d\n", __func__, (int)*gid);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -3143,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
dev_t tmp;
- READ_BUF(8);
- READ32(major);
- READ32(minor);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ major = be32_to_cpup(p++);
+ minor = be32_to_cpup(p);
tmp = MKDEV(major, minor);
if (MAJOR(tmp) == major && MINOR(tmp) == minor)
*rdev = tmp;
@@ -3154,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
}
dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3165,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
}
dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3182,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
}
dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3199,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
- READ_BUF(8);
- READ64(*res);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
}
dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -3216,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
- READ_BUF(8);
- READ64(*used);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, used);
bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
ret = NFS_ATTR_FATTR_SPACE_USED;
}
dprintk("%s: space used=%Lu\n", __func__,
(unsigned long long)*used);
return ret;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -3232,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
uint64_t sec;
uint32_t nsec;
- READ_BUF(12);
- READ64(sec);
- READ32(nsec);
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &sec);
+ nsec = be32_to_cpup(p);
time->tv_sec = (time_t)sec;
time->tv_nsec = (long)nsec;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -3315,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
{
__be32 *p;
- READ_BUF(20);
- READ32(cinfo->atomic);
- READ64(cinfo->before);
- READ64(cinfo->after);
+ p = xdr_inline_decode(xdr, 20);
+ if (unlikely(!p))
+ goto out_overflow;
+ cinfo->atomic = be32_to_cpup(p++);
+ p = xdr_decode_hyper(p, &cinfo->before);
+ xdr_decode_hyper(p, &cinfo->after);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
@@ -3331,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
status = decode_op_hdr(xdr, OP_ACCESS);
if (status)
return status;
- READ_BUF(8);
- READ32(supp);
- READ32(acc);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ supp = be32_to_cpup(p++);
+ acc = be32_to_cpup(p);
access->supported = supp;
access->access = acc;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
-static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
{
__be32 *p;
+
+ p = xdr_inline_decode(xdr, len);
+ if (likely(p)) {
+ memcpy(buf, p, len);
+ return 0;
+ }
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+ return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
+}
+
+static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
int status;
status = decode_op_hdr(xdr, OP_CLOSE);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
- return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- return 0;
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ return status;
+}
+
+static int decode_verifier(struct xdr_stream *xdr, void *verifier)
+{
+ return decode_opaque_fixed(xdr, verifier, 8);
}
static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_COMMIT);
- if (status)
- return status;
- READ_BUF(8);
- COPYMEM(res->verf->verifier, 8);
- return 0;
+ if (!status)
+ status = decode_verifier(xdr, res->verf->verifier);
+ return status;
}
static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3378,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
return status;
if ((status = decode_change_info(xdr, cinfo)))
return status;
- READ_BUF(4);
- READ32(bmlen);
- READ_BUF(bmlen << 2);
- return 0;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ bmlen = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, bmlen << 2);
+ if (likely(p))
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
@@ -3466,7 +3622,8 @@ xdr_error:
return status;
}
-static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server)
+static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+ const struct nfs_server *server, int may_sleep)
{
__be32 *savep;
uint32_t attrlen,
@@ -3538,12 +3695,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid);
+ status = decode_attr_owner(xdr, bitmap, server->nfs_client,
+ &fattr->uid, may_sleep);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid);
+ status = decode_attr_group(xdr, bitmap, server->nfs_client,
+ &fattr->gid, may_sleep);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
@@ -3633,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (status)
return status;
- READ_BUF(4);
- READ32(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
if (len > NFS4_FHSIZE)
return -EIO;
fh->size = len;
- READ_BUF(len);
- COPYMEM(fh->data, len);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
+ memcpy(fh->data, p, len);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3662,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
__be32 *p;
uint32_t namelen, type;
- READ_BUF(32);
- READ64(offset);
- READ64(length);
- READ32(type);
+ p = xdr_inline_decode(xdr, 32);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &offset);
+ p = xdr_decode_hyper(p, &length);
+ type = be32_to_cpup(p++);
if (fl != NULL) {
fl->fl_start = (loff_t)offset;
fl->fl_end = fl->fl_start + (loff_t)length - 1;
@@ -3676,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
fl->fl_type = F_RDLCK;
fl->fl_pid = 0;
}
- READ64(clientid);
- READ32(namelen);
- READ_BUF(namelen);
- return -NFS4ERR_DENIED;
+ p = xdr_decode_hyper(p, &clientid);
+ namelen = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, namelen);
+ if (likely(p))
+ return -NFS4ERR_DENIED;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_LOCK);
if (status == -EIO)
goto out;
if (status == 0) {
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
+ status = decode_stateid(xdr, &res->stateid);
+ if (unlikely(status))
+ goto out;
} else if (status == -NFS4ERR_DENIED)
status = decode_lock_denied(xdr, NULL);
if (res->open_seqid != NULL)
@@ -3713,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_LOCKU);
if (status != -EIO)
nfs_increment_lock_seqid(status, res->seqid);
- if (status == 0) {
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- }
+ if (status == 0)
+ status = decode_stateid(xdr, &res->stateid);
return status;
}
@@ -3737,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
__be32 *p;
uint32_t limit_type, nblocks, blocksize;
- READ_BUF(12);
- READ32(limit_type);
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ limit_type = be32_to_cpup(p++);
switch (limit_type) {
case 1:
- READ64(*maxsize);
+ xdr_decode_hyper(p, maxsize);
break;
case 2:
- READ32(nblocks);
- READ32(blocksize);
+ nblocks = be32_to_cpup(p++);
+ blocksize = be32_to_cpup(p);
*maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
}
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
{
__be32 *p;
uint32_t delegation_type;
+ int status;
- READ_BUF(4);
- READ32(delegation_type);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ delegation_type = be32_to_cpup(p);
if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
res->delegation_type = 0;
return 0;
}
- READ_BUF(NFS4_STATEID_SIZE+4);
- COPYMEM(res->delegation.data, NFS4_STATEID_SIZE);
- READ32(res->do_recall);
+ status = decode_stateid(xdr, &res->delegation);
+ if (unlikely(status))
+ return status;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->do_recall = be32_to_cpup(p);
switch (delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
@@ -3776,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
return -EIO;
}
return decode_ace(xdr, NULL, res->server->nfs_client);
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3787,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
status = decode_op_hdr(xdr, OP_OPEN);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ if (unlikely(status))
return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
decode_change_info(xdr, &res->cinfo);
- READ_BUF(8);
- READ32(res->rflags);
- READ32(bmlen);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->rflags = be32_to_cpup(p++);
+ bmlen = be32_to_cpup(p);
if (bmlen > 10)
goto xdr_error;
- READ_BUF(bmlen << 2);
+ p = xdr_inline_decode(xdr, bmlen << 2);
+ if (unlikely(!p))
+ goto out_overflow;
savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
for (i = 0; i < savewords; ++i)
- READ32(res->attrset[i]);
+ res->attrset[i] = be32_to_cpup(p++);
for (; i < NFS4_BITMAP_SIZE; i++)
res->attrset[i] = 0;
@@ -3811,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
- return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- return 0;
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ return status;
}
static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
{
- __be32 *p;
int status;
status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
if (status != -EIO)
nfs_increment_open_seqid(status, res->seqid);
- if (status)
- return status;
- READ_BUF(NFS4_STATEID_SIZE);
- COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
- return 0;
+ if (!status)
+ status = decode_stateid(xdr, &res->stateid);
+ return status;
}
static int decode_putfh(struct xdr_stream *xdr)
@@ -3863,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
status = decode_op_hdr(xdr, OP_READ);
if (status)
return status;
- READ_BUF(8);
- READ32(eof);
- READ32(count);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ eof = be32_to_cpup(p++);
+ count = be32_to_cpup(p);
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
recvd = req->rq_rcv_buf.len - hdrlen;
if (count > recvd) {
@@ -3878,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
res->eof = eof;
res->count = count;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -3892,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
int status;
status = decode_op_hdr(xdr, OP_READDIR);
- if (status)
+ if (!status)
+ status = decode_verifier(xdr, readdir->verifier.data);
+ if (unlikely(status))
return status;
- READ_BUF(8);
- COPYMEM(readdir->verifier.data, 8);
dprintk("%s: verifier = %08x:%08x\n",
__func__,
((u32 *)readdir->verifier.data)[0],
((u32 *)readdir->verifier.data)[1]);
- hdrlen = (char *) p - (char *) iov->iov_base;
+ hdrlen = (char *) xdr->p - (char *) iov->iov_base;
recvd = rcvbuf->len - hdrlen;
if (pglen > recvd)
pglen = recvd;
@@ -3990,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
return status;
/* Convert length of symlink */
- READ_BUF(4);
- READ32(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
if (len >= rcvbuf->page_len || len <= 0) {
dprintk("nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
@@ -4015,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
kaddr[len+rcvbuf->page_base] = '\0';
kunmap_atomic(kaddr, KM_USER0);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -4112,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr)
status = decode_op_hdr(xdr, OP_SETATTR);
if (status)
return status;
- READ_BUF(4);
- READ32(bmlen);
- READ_BUF(bmlen << 2);
- return 0;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ bmlen = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, bmlen << 2);
+ if (likely(p))
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
@@ -4124,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
uint32_t opnum;
int32_t nfserr;
- READ_BUF(8);
- READ32(opnum);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ opnum = be32_to_cpup(p++);
if (opnum != OP_SETCLIENTID) {
dprintk("nfs: decode_setclientid: Server returned operation"
" %d\n", opnum);
return -EIO;
}
- READ32(nfserr);
+ nfserr = be32_to_cpup(p);
if (nfserr == NFS_OK) {
- READ_BUF(8 + NFS4_VERIFIER_SIZE);
- READ64(clp->cl_clientid);
- COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE);
+ p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &clp->cl_clientid);
+ memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
} else if (nfserr == NFSERR_CLID_INUSE) {
uint32_t len;
/* skip netid string */
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
/* skip uaddr string */
- READ_BUF(4);
- READ32(len);
- READ_BUF(len);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
return -NFSERR_CLID_INUSE;
} else
return nfs4_stat_to_errno(nfserr);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -4169,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
if (status)
return status;
- READ_BUF(16);
- READ32(res->count);
- READ32(res->verf->committed);
- COPYMEM(res->verf->verifier, 8);
+ p = xdr_inline_decode(xdr, 16);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->count = be32_to_cpup(p++);
+ res->verf->committed = be32_to_cpup(p++);
+ memcpy(res->verf->verifier, p, 8);
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_delegreturn(struct xdr_stream *xdr)
@@ -4187,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
{
__be32 *p;
uint32_t dummy;
+ char *dummy_str;
int status;
struct nfs_client *clp = res->client;
@@ -4194,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr,
if (status)
return status;
- READ_BUF(8);
- READ64(clp->cl_ex_clid);
- READ_BUF(12);
- READ32(clp->cl_seqid);
- READ32(clp->cl_exchange_flags);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, &clp->cl_ex_clid);
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ clp->cl_seqid = be32_to_cpup(p++);
+ clp->cl_exchange_flags = be32_to_cpup(p++);
/* We ask for SP4_NONE */
- READ32(dummy);
+ dummy = be32_to_cpup(p);
if (dummy != SP4_NONE)
return -EIO;
/* Throw away minor_id */
- READ_BUF(8);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
/* Throw away Major id */
- READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
/* Throw away server_scope */
- READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
/* Throw away Implementation id array */
- READ_BUF(4);
- READ32(dummy);
- READ_BUF(dummy);
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -4232,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
__be32 *p;
u32 nr_attrs;
- READ_BUF(28);
- READ32(attrs->headerpadsz);
- READ32(attrs->max_rqst_sz);
- READ32(attrs->max_resp_sz);
- READ32(attrs->max_resp_sz_cached);
- READ32(attrs->max_ops);
- READ32(attrs->max_reqs);
- READ32(nr_attrs);
+ p = xdr_inline_decode(xdr, 28);
+ if (unlikely(!p))
+ goto out_overflow;
+ attrs->headerpadsz = be32_to_cpup(p++);
+ attrs->max_rqst_sz = be32_to_cpup(p++);
+ attrs->max_resp_sz = be32_to_cpup(p++);
+ attrs->max_resp_sz_cached = be32_to_cpup(p++);
+ attrs->max_ops = be32_to_cpup(p++);
+ attrs->max_reqs = be32_to_cpup(p++);
+ nr_attrs = be32_to_cpup(p);
if (unlikely(nr_attrs > 1)) {
printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
__func__, nr_attrs);
return -EINVAL;
}
- if (nr_attrs == 1)
- READ_BUF(4); /* skip rdma_attrs */
+ if (nr_attrs == 1) {
+ p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
+ if (unlikely(!p))
+ goto out_overflow;
+ }
return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
+{
+ return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
}
static int decode_create_session(struct xdr_stream *xdr,
@@ -4259,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr,
struct nfs4_session *session = clp->cl_session;
status = decode_op_hdr(xdr, OP_CREATE_SESSION);
-
- if (status)
+ if (!status)
+ status = decode_sessionid(xdr, &session->sess_id);
+ if (unlikely(status))
return status;
- /* sessionid */
- READ_BUF(NFS4_MAX_SESSIONID_LEN);
- COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
-
/* seqid, flags */
- READ_BUF(8);
- READ32(clp->cl_seqid);
- READ32(session->flags);
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ clp->cl_seqid = be32_to_cpup(p++);
+ session->flags = be32_to_cpup(p);
/* Channel attributes */
status = decode_chan_attrs(xdr, &session->fc_attrs);
if (!status)
status = decode_chan_attrs(xdr, &session->bc_attrs);
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
}
static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -4300,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr,
return 0;
status = decode_op_hdr(xdr, OP_SEQUENCE);
- if (status)
+ if (!status)
+ status = decode_sessionid(xdr, &id);
+ if (unlikely(status))
goto out_err;
/*
@@ -4309,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr,
*/
status = -ESERVERFAULT;
- slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
- COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
if (memcmp(id.data, res->sr_session->sess_id.data,
NFS4_MAX_SESSIONID_LEN)) {
dprintk("%s Invalid session id\n", __func__);
goto out_err;
}
+
+ p = xdr_inline_decode(xdr, 20);
+ if (unlikely(!p))
+ goto out_overflow;
+
/* seqid */
- READ32(dummy);
+ slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
+ dummy = be32_to_cpup(p++);
if (dummy != slot->seq_nr) {
dprintk("%s Invalid sequence number\n", __func__);
goto out_err;
}
/* slot id */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
if (dummy != res->sr_slotid) {
dprintk("%s Invalid slot id\n", __func__);
goto out_err;
}
/* highest slot id - currently not processed */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
/* target highest slot id - currently not processed */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
/* result flags - currently not processed */
- READ32(dummy);
+ dummy = be32_to_cpup(p);
status = 0;
out_err:
res->sr_status = status;
return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ status = -EIO;
+ goto out_err;
#else /* CONFIG_NFS_V4_1 */
return 0;
#endif /* CONFIG_NFS_V4_1 */
@@ -4370,7 +4625,8 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct
status = decode_open_downgrade(&xdr, res);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4397,7 +4653,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
status = decode_access(&xdr, res);
if (status != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4424,7 +4681,8 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
goto out;
if ((status = decode_getfh(&xdr, res->fh)) != 0)
goto out;
- status = decode_getfattr(&xdr, res->fattr, res->server);
+ status = decode_getfattr(&xdr, res->fattr, res->server
+ ,!RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4448,7 +4706,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
if ((status = decode_putrootfh(&xdr)) != 0)
goto out;
if ((status = decode_getfh(&xdr, res->fh)) == 0)
- status = decode_getfattr(&xdr, res->fattr, res->server);
+ status = decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4473,7 +4732,8 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
goto out;
if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
goto out;
- decode_getfattr(&xdr, &res->dir_attr, res->server);
+ decode_getfattr(&xdr, &res->dir_attr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4503,11 +4763,13 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
goto out;
/* Current FH is target directory */
- if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0)
+ if (decode_getfattr(&xdr, res->new_fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
if ((status = decode_restorefh(&xdr)) != 0)
goto out;
- decode_getfattr(&xdr, res->old_fattr, res->server);
+ decode_getfattr(&xdr, res->old_fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4540,11 +4802,13 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
* Note order: OP_LINK leaves the directory as the current
* filehandle.
*/
- if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0)
+ if (decode_getfattr(&xdr, res->dir_attr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
if ((status = decode_restorefh(&xdr)) != 0)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4573,11 +4837,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
goto out;
if ((status = decode_getfh(&xdr, res->fh)) != 0)
goto out;
- if (decode_getfattr(&xdr, res->fattr, res->server) != 0)
+ if (decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
if ((status = decode_restorefh(&xdr)) != 0)
goto out;
- decode_getfattr(&xdr, res->dir_fattr, res->server);
+ decode_getfattr(&xdr, res->dir_fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4609,7 +4875,8 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
status = decode_putfh(&xdr);
if (status)
goto out;
- status = decode_getfattr(&xdr, res->fattr, res->server);
+ status = decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4716,7 +4983,8 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
* an ESTALE error. Shouldn't be a problem,
* though, since fattr->valid will remain unset.
*/
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4748,11 +5016,13 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
goto out;
if (decode_getfh(&xdr, &res->fh) != 0)
goto out;
- if (decode_getfattr(&xdr, res->f_attr, res->server) != 0)
+ if (decode_getfattr(&xdr, res->f_attr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
goto out;
if (decode_restorefh(&xdr) != 0)
goto out;
- decode_getfattr(&xdr, res->dir_attr, res->server);
+ decode_getfattr(&xdr, res->dir_attr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4800,7 +5070,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf
status = decode_open(&xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->f_attr, res->server);
+ decode_getfattr(&xdr, res->f_attr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -4827,7 +5098,8 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
status = decode_setattr(&xdr);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -5001,7 +5273,8 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ
status = decode_write(&xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
if (!status)
status = res->count;
out:
@@ -5030,7 +5303,8 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri
status = decode_commit(&xdr, res);
if (status)
goto out;
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -5194,7 +5468,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
if (status != 0)
goto out;
status = decode_delegreturn(&xdr);
- decode_getfattr(&xdr, res->fattr, res->server);
+ decode_getfattr(&xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -5222,7 +5497,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
goto out;
xdr_enter_page(&xdr, PAGE_SIZE);
status = decode_getfattr(&xdr, &res->fs_locations->fattr,
- res->fs_locations->server);
+ res->fs_locations->server,
+ !RPC_IS_ASYNC(req->rq_task));
out:
return status;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0b4cbdc60abd..867f70504531 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -73,7 +73,7 @@ enum {
Opt_cto, Opt_nocto,
Opt_ac, Opt_noac,
Opt_lock, Opt_nolock,
- Opt_v2, Opt_v3,
+ Opt_v2, Opt_v3, Opt_v4,
Opt_udp, Opt_tcp, Opt_rdma,
Opt_acl, Opt_noacl,
Opt_rdirplus, Opt_nordirplus,
@@ -127,6 +127,7 @@ static const match_table_t nfs_mount_option_tokens = {
{ Opt_nolock, "nolock" },
{ Opt_v2, "v2" },
{ Opt_v3, "v3" },
+ { Opt_v4, "v4" },
{ Opt_udp, "udp" },
{ Opt_tcp, "tcp" },
{ Opt_rdma, "rdma" },
@@ -158,7 +159,7 @@ static const match_table_t nfs_mount_option_tokens = {
{ Opt_mountvers, "mountvers=%s" },
{ Opt_nfsvers, "nfsvers=%s" },
{ Opt_nfsvers, "vers=%s" },
- { Opt_minorversion, "minorversion=%u" },
+ { Opt_minorversion, "minorversion=%s" },
{ Opt_sec, "sec=%s" },
{ Opt_proto, "proto=%s" },
@@ -272,6 +273,10 @@ static const struct super_operations nfs_sops = {
};
#ifdef CONFIG_NFS_V4
+static int nfs4_validate_text_mount_data(void *options,
+ struct nfs_parsed_mount_data *args, const char *dev_name);
+static int nfs4_try_mount(int flags, const char *dev_name,
+ struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static int nfs4_remote_get_sb(struct file_system_type *fs_type,
@@ -742,127 +747,23 @@ static int nfs_verify_server_address(struct sockaddr *addr)
}
}
+ dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
return 0;
}
-static void nfs_parse_ipv4_address(char *string, size_t str_len,
- struct sockaddr *sap, size_t *addr_len)
-{
- struct sockaddr_in *sin = (struct sockaddr_in *)sap;
- u8 *addr = (u8 *)&sin->sin_addr.s_addr;
-
- if (str_len <= INET_ADDRSTRLEN) {
- dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n",
- (int)str_len, string);
-
- sin->sin_family = AF_INET;
- *addr_len = sizeof(*sin);
- if (in4_pton(string, str_len, addr, '\0', NULL))
- return;
- }
-
- sap->sa_family = AF_UNSPEC;
- *addr_len = 0;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
- const char *delim,
- struct sockaddr_in6 *sin6)
-{
- char *p;
- size_t len;
-
- if ((string + str_len) == delim)
- return 1;
-
- if (*delim != IPV6_SCOPE_DELIMITER)
- return 0;
-
- if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
- return 0;
-
- len = (string + str_len) - delim - 1;
- p = kstrndup(delim + 1, len, GFP_KERNEL);
- if (p) {
- unsigned long scope_id = 0;
- struct net_device *dev;
-
- dev = dev_get_by_name(&init_net, p);
- if (dev != NULL) {
- scope_id = dev->ifindex;
- dev_put(dev);
- } else {
- if (strict_strtoul(p, 10, &scope_id) == 0) {
- kfree(p);
- return 0;
- }
- }
-
- kfree(p);
-
- sin6->sin6_scope_id = scope_id;
- dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
- return 1;
- }
-
- return 0;
-}
-
-static void nfs_parse_ipv6_address(char *string, size_t str_len,
- struct sockaddr *sap, size_t *addr_len)
-{
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
- u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
- const char *delim;
-
- if (str_len <= INET6_ADDRSTRLEN) {
- dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n",
- (int)str_len, string);
-
- sin6->sin6_family = AF_INET6;
- *addr_len = sizeof(*sin6);
- if (in6_pton(string, str_len, addr,
- IPV6_SCOPE_DELIMITER, &delim) != 0) {
- if (nfs_parse_ipv6_scope_id(string, str_len,
- delim, sin6) != 0)
- return;
- }
- }
-
- sap->sa_family = AF_UNSPEC;
- *addr_len = 0;
-}
-#else
-static void nfs_parse_ipv6_address(char *string, size_t str_len,
- struct sockaddr *sap, size_t *addr_len)
-{
- sap->sa_family = AF_UNSPEC;
- *addr_len = 0;
-}
-#endif
-
/*
- * Construct a sockaddr based on the contents of a string that contains
- * an IP address in presentation format.
- *
- * If there is a problem constructing the new sockaddr, set the address
- * family to AF_UNSPEC.
+ * Select between a default port value and a user-specified port value.
+ * If a zero value is set, then autobind will be used.
*/
-void nfs_parse_ip_address(char *string, size_t str_len,
- struct sockaddr *sap, size_t *addr_len)
+static void nfs_set_default_port(struct sockaddr *sap, const int parsed_port,
+ const unsigned short default_port)
{
- unsigned int i, colons;
+ unsigned short port = default_port;
- colons = 0;
- for (i = 0; i < str_len; i++)
- if (string[i] == ':')
- colons++;
+ if (parsed_port != NFS_UNSPEC_PORT)
+ port = parsed_port;
- if (colons >= 2)
- nfs_parse_ipv6_address(string, str_len, sap, addr_len);
- else
- nfs_parse_ipv4_address(string, str_len, sap, addr_len);
+ rpc_set_port(sap, port);
}
/*
@@ -904,8 +805,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
/*
* Parse the value of the 'sec=' option.
- *
- * The flavor_len setting is for v4 mounts.
*/
static int nfs_parse_security_flavors(char *value,
struct nfs_parsed_mount_data *mnt)
@@ -916,53 +815,43 @@ static int nfs_parse_security_flavors(char *value,
switch (match_token(value, nfs_secflavor_tokens, args)) {
case Opt_sec_none:
- mnt->auth_flavor_len = 0;
mnt->auth_flavors[0] = RPC_AUTH_NULL;
break;
case Opt_sec_sys:
- mnt->auth_flavor_len = 0;
mnt->auth_flavors[0] = RPC_AUTH_UNIX;
break;
case Opt_sec_krb5:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
break;
case Opt_sec_krb5i:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
break;
case Opt_sec_krb5p:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
break;
case Opt_sec_lkey:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
break;
case Opt_sec_lkeyi:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
break;
case Opt_sec_lkeyp:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
break;
case Opt_sec_spkm:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
break;
case Opt_sec_spkmi:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
break;
case Opt_sec_spkmp:
- mnt->auth_flavor_len = 1;
mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
break;
default:
return 0;
}
+ mnt->auth_flavor_len = 1;
return 1;
}
@@ -1001,7 +890,6 @@ static int nfs_parse_mount_options(char *raw,
while ((p = strsep(&raw, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
unsigned long option;
- int int_option;
int token;
if (!*p)
@@ -1047,10 +935,18 @@ static int nfs_parse_mount_options(char *raw,
break;
case Opt_v2:
mnt->flags &= ~NFS_MOUNT_VER3;
+ mnt->version = 2;
break;
case Opt_v3:
mnt->flags |= NFS_MOUNT_VER3;
+ mnt->version = 3;
break;
+#ifdef CONFIG_NFS_V4
+ case Opt_v4:
+ mnt->flags &= ~NFS_MOUNT_VER3;
+ mnt->version = 4;
+ break;
+#endif
case Opt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1264,20 +1160,33 @@ static int nfs_parse_mount_options(char *raw,
switch (option) {
case NFS2_VERSION:
mnt->flags &= ~NFS_MOUNT_VER3;
+ mnt->version = 2;
break;
case NFS3_VERSION:
mnt->flags |= NFS_MOUNT_VER3;
+ mnt->version = 3;
break;
+#ifdef CONFIG_NFS_V4
+ case NFS4_VERSION:
+ mnt->flags &= ~NFS_MOUNT_VER3;
+ mnt->version = 4;
+ break;
+#endif
default:
goto out_invalid_value;
}
break;
case Opt_minorversion:
- if (match_int(args, &int_option))
- return 0;
- if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION)
- return 0;
- mnt->minorversion = int_option;
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ rc = strict_strtoul(string, 10, &option);
+ kfree(string);
+ if (rc != 0)
+ goto out_invalid_value;
+ if (option > NFS4_MAX_MINOR_VERSION)
+ goto out_invalid_value;
+ mnt->minorversion = option;
break;
/*
@@ -1352,11 +1261,14 @@ static int nfs_parse_mount_options(char *raw,
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
- nfs_parse_ip_address(string, strlen(string),
- (struct sockaddr *)
- &mnt->nfs_server.address,
- &mnt->nfs_server.addrlen);
+ mnt->nfs_server.addrlen =
+ rpc_pton(string, strlen(string),
+ (struct sockaddr *)
+ &mnt->nfs_server.address,
+ sizeof(mnt->nfs_server.address));
kfree(string);
+ if (mnt->nfs_server.addrlen == 0)
+ goto out_invalid_address;
break;
case Opt_clientaddr:
string = match_strdup(args);
@@ -1376,11 +1288,14 @@ static int nfs_parse_mount_options(char *raw,
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
- nfs_parse_ip_address(string, strlen(string),
- (struct sockaddr *)
- &mnt->mount_server.address,
- &mnt->mount_server.addrlen);
+ mnt->mount_server.addrlen =
+ rpc_pton(string, strlen(string),
+ (struct sockaddr *)
+ &mnt->mount_server.address,
+ sizeof(mnt->mount_server.address));
kfree(string);
+ if (mnt->mount_server.addrlen == 0)
+ goto out_invalid_address;
break;
case Opt_lookupcache:
string = match_strdup(args);
@@ -1432,8 +1347,11 @@ static int nfs_parse_mount_options(char *raw,
return 1;
+out_invalid_address:
+ printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
+ return 0;
out_invalid_value:
- printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p);
+ printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
return 0;
out_nomem:
printk(KERN_INFO "NFS: not enough memory to parse option\n");
@@ -1445,13 +1363,60 @@ out_security_failure:
}
/*
+ * Match the requested auth flavors with the list returned by
+ * the server. Returns zero and sets the mount's authentication
+ * flavor on success; returns -EACCES if server does not support
+ * the requested flavor.
+ */
+static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
+ struct nfs_mount_request *request)
+{
+ unsigned int i, j, server_authlist_len = *(request->auth_flav_len);
+
+ /*
+ * Certain releases of Linux's mountd return an empty
+ * flavor list. To prevent behavioral regression with
+ * these servers (ie. rejecting mounts that used to
+ * succeed), revert to pre-2.6.32 behavior (no checking)
+ * if the returned flavor list is empty.
+ */
+ if (server_authlist_len == 0)
+ return 0;
+
+ /*
+ * We avoid sophisticated negotiating here, as there are
+ * plenty of cases where we can get it wrong, providing
+ * either too little or too much security.
+ *
+ * RFC 2623, section 2.7 suggests we SHOULD prefer the
+ * flavor listed first. However, some servers list
+ * AUTH_NULL first. Our caller plants AUTH_SYS, the
+ * preferred default, in args->auth_flavors[0] if user
+ * didn't specify sec= mount option.
+ */
+ for (i = 0; i < args->auth_flavor_len; i++)
+ for (j = 0; j < server_authlist_len; j++)
+ if (args->auth_flavors[i] == request->auth_flavs[j]) {
+ dfprintk(MOUNT, "NFS: using auth flavor %d\n",
+ request->auth_flavs[j]);
+ args->auth_flavors[0] = request->auth_flavs[j];
+ return 0;
+ }
+
+ dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n");
+ nfs_umount(request);
+ return -EACCES;
+}
+
+/*
* Use the remote server's MOUNT service to request the NFS file handle
* corresponding to the provided path.
*/
static int nfs_try_mount(struct nfs_parsed_mount_data *args,
struct nfs_fh *root_fh)
{
- unsigned int auth_flavor_len = 0;
+ rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
+ unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
struct nfs_mount_request request = {
.sap = (struct sockaddr *)
&args->mount_server.address,
@@ -1459,7 +1424,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
.protocol = args->mount_server.protocol,
.fh = root_fh,
.noresvport = args->flags & NFS_MOUNT_NORESVPORT,
- .auth_flav_len = &auth_flavor_len,
+ .auth_flav_len = &server_authlist_len,
+ .auth_flavs = server_authlist,
};
int status;
@@ -1485,23 +1451,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
args->mount_server.addrlen = args->nfs_server.addrlen;
}
request.salen = args->mount_server.addrlen;
-
- /*
- * autobind will be used if mount_server.port == 0
- */
- nfs_set_port(request.sap, args->mount_server.port);
+ nfs_set_default_port(request.sap, args->mount_server.port, 0);
/*
* Now ask the mount server to map our export path
* to a file handle.
*/
status = nfs_mount(&request);
- if (status == 0)
- return 0;
+ if (status != 0) {
+ dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
+ request.hostname, status);
+ return status;
+ }
- dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
- request.hostname, status);
- return status;
+ /*
+ * MNTv1 (NFSv2) does not support auth flavor negotiation.
+ */
+ if (args->mount_server.version != NFS_MNT3_VERSION)
+ return 0;
+ return nfs_walk_authlist(args, &request);
}
static int nfs_parse_simple_hostname(const char *dev_name,
@@ -1661,6 +1629,7 @@ static int nfs_validate_mount_data(void *options,
const char *dev_name)
{
struct nfs_mount_data *data = (struct nfs_mount_data *)options;
+ struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
if (data == NULL)
goto out_no_data;
@@ -1672,10 +1641,12 @@ static int nfs_validate_mount_data(void *options,
args->acregmax = NFS_DEF_ACREGMAX;
args->acdirmin = NFS_DEF_ACDIRMIN;
args->acdirmax = NFS_DEF_ACDIRMAX;
- args->mount_server.port = 0; /* autobind unless user sets port */
- args->nfs_server.port = 0; /* autobind unless user sets port */
+ args->mount_server.port = NFS_UNSPEC_PORT;
+ args->nfs_server.port = NFS_UNSPEC_PORT;
args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
args->auth_flavors[0] = RPC_AUTH_UNIX;
+ args->auth_flavor_len = 1;
+ args->minorversion = 0;
switch (data->version) {
case 1:
@@ -1697,8 +1668,11 @@ static int nfs_validate_mount_data(void *options,
if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
goto out_invalid_fh;
mntfh->size = data->root.size;
- } else
+ args->version = 3;
+ } else {
mntfh->size = NFS2_FHSIZE;
+ args->version = 2;
+ }
memcpy(mntfh->data, data->root.data, mntfh->size);
@@ -1720,11 +1694,9 @@ static int nfs_validate_mount_data(void *options,
args->acdirmin = data->acdirmin;
args->acdirmax = data->acdirmax;
- memcpy(&args->nfs_server.address, &data->addr,
- sizeof(data->addr));
+ memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
- if (!nfs_verify_server_address((struct sockaddr *)
- &args->nfs_server.address))
+ if (!nfs_verify_server_address(sap))
goto out_no_address;
if (!(data->flags & NFS_MOUNT_TCP))
@@ -1772,12 +1744,18 @@ static int nfs_validate_mount_data(void *options,
if (nfs_parse_mount_options((char *)options, args) == 0)
return -EINVAL;
- if (!nfs_verify_server_address((struct sockaddr *)
- &args->nfs_server.address))
+ if (!nfs_verify_server_address(sap))
goto out_no_address;
- nfs_set_port((struct sockaddr *)&args->nfs_server.address,
- args->nfs_server.port);
+ if (args->version == 4)
+#ifdef CONFIG_NFS_V4
+ return nfs4_validate_text_mount_data(options,
+ args, dev_name);
+#else
+ goto out_v4_not_compiled;
+#endif
+
+ nfs_set_default_port(sap, args->nfs_server.port, 0);
nfs_set_mount_transport_protocol(args);
@@ -1825,6 +1803,12 @@ out_v3_not_compiled:
return -EPROTONOSUPPORT;
#endif /* !CONFIG_NFS_V3 */
+#ifndef CONFIG_NFS_V4
+out_v4_not_compiled:
+ dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
+ return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V4 */
+
out_nomem:
dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
return -ENOMEM;
@@ -2120,6 +2104,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
if (error < 0)
goto out;
+#ifdef CONFIG_NFS_V4
+ if (data->version == 4) {
+ error = nfs4_try_mount(flags, dev_name, data, mnt);
+ kfree(data->client_address);
+ goto out;
+ }
+#endif /* CONFIG_NFS_V4 */
+
/* Get a volume representation */
server = nfs_create_server(data, mntfh);
if (IS_ERR(server)) {
@@ -2317,6 +2309,43 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
}
+static int nfs4_validate_text_mount_data(void *options,
+ struct nfs_parsed_mount_data *args,
+ const char *dev_name)
+{
+ struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
+
+ nfs_set_default_port(sap, args->nfs_server.port, NFS_PORT);
+
+ nfs_validate_transport_protocol(args);
+
+ nfs4_validate_mount_flags(args);
+
+ if (args->version != 4) {
+ dfprintk(MOUNT,
+ "NFS4: Illegal mount version\n");
+ return -EINVAL;
+ }
+
+ if (args->auth_flavor_len > 1) {
+ dfprintk(MOUNT,
+ "NFS4: Too many RPC auth flavours specified\n");
+ return -EINVAL;
+ }
+
+ if (args->client_address == NULL) {
+ dfprintk(MOUNT,
+ "NFS4: mount program didn't pass callback address\n");
+ return -EINVAL;
+ }
+
+ return nfs_parse_devname(dev_name,
+ &args->nfs_server.hostname,
+ NFS4_MAXNAMLEN,
+ &args->nfs_server.export_path,
+ NFS4_MAXPATHLEN);
+}
+
/*
* Validate NFSv4 mount options
*/
@@ -2324,7 +2353,7 @@ static int nfs4_validate_mount_data(void *options,
struct nfs_parsed_mount_data *args,
const char *dev_name)
{
- struct sockaddr_in *ap;
+ struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
char *c;
@@ -2337,23 +2366,22 @@ static int nfs4_validate_mount_data(void *options,
args->acregmax = NFS_DEF_ACREGMAX;
args->acdirmin = NFS_DEF_ACDIRMIN;
args->acdirmax = NFS_DEF_ACDIRMAX;
- args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */
+ args->nfs_server.port = NFS_UNSPEC_PORT;
args->auth_flavors[0] = RPC_AUTH_UNIX;
- args->auth_flavor_len = 0;
+ args->auth_flavor_len = 1;
+ args->version = 4;
args->minorversion = 0;
switch (data->version) {
case 1:
- ap = (struct sockaddr_in *)&args->nfs_server.address;
if (data->host_addrlen > sizeof(args->nfs_server.address))
goto out_no_address;
if (data->host_addrlen == 0)
goto out_no_address;
args->nfs_server.addrlen = data->host_addrlen;
- if (copy_from_user(ap, data->host_addr, data->host_addrlen))
+ if (copy_from_user(sap, data->host_addr, data->host_addrlen))
return -EFAULT;
- if (!nfs_verify_server_address((struct sockaddr *)
- &args->nfs_server.address))
+ if (!nfs_verify_server_address(sap))
goto out_no_address;
if (data->auth_flavourlen) {
@@ -2399,39 +2427,14 @@ static int nfs4_validate_mount_data(void *options,
nfs_validate_transport_protocol(args);
break;
- default: {
- int status;
-
+ default:
if (nfs_parse_mount_options((char *)options, args) == 0)
return -EINVAL;
- if (!nfs_verify_server_address((struct sockaddr *)
- &args->nfs_server.address))
+ if (!nfs_verify_server_address(sap))
return -EINVAL;
- nfs_set_port((struct sockaddr *)&args->nfs_server.address,
- args->nfs_server.port);
-
- nfs_validate_transport_protocol(args);
-
- nfs4_validate_mount_flags(args);
-
- if (args->auth_flavor_len > 1)
- goto out_inval_auth;
-
- if (args->client_address == NULL)
- goto out_no_client_address;
-
- status = nfs_parse_devname(dev_name,
- &args->nfs_server.hostname,
- NFS4_MAXNAMLEN,
- &args->nfs_server.export_path,
- NFS4_MAXPATHLEN);
- if (status < 0)
- return status;
-
- break;
- }
+ return nfs4_validate_text_mount_data(options, args, dev_name);
}
return 0;
@@ -2448,10 +2451,6 @@ out_inval_auth:
out_no_address:
dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
return -EINVAL;
-
-out_no_client_address:
- dfprintk(MOUNT, "NFS4: mount program didn't pass callback address\n");
- return -EINVAL;
}
/*
@@ -2618,6 +2617,34 @@ out_err:
return ret;
}
+static int nfs4_try_mount(int flags, const char *dev_name,
+ struct nfs_parsed_mount_data *data,
+ struct vfsmount *mnt)
+{
+ char *export_path;
+ struct vfsmount *root_mnt;
+ int error;
+
+ dfprintk(MOUNT, "--> nfs4_try_mount()\n");
+
+ export_path = data->nfs_server.export_path;
+ data->nfs_server.export_path = "/";
+ root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
+ data->nfs_server.hostname);
+ data->nfs_server.export_path = export_path;
+
+ error = PTR_ERR(root_mnt);
+ if (IS_ERR(root_mnt))
+ goto out;
+
+ error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+
+out:
+ dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error,
+ error != 0 ? " [error]" : "");
+ return error;
+}
+
/*
* Get the superblock for an NFS4 mountpoint
*/
@@ -2625,8 +2652,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
struct nfs_parsed_mount_data *data;
- char *export_path;
- struct vfsmount *root_mnt;
int error = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -2638,17 +2663,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
if (error < 0)
goto out;
- export_path = data->nfs_server.export_path;
- data->nfs_server.export_path = "/";
- root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
- data->nfs_server.hostname);
- data->nfs_server.export_path = export_path;
-
- error = PTR_ERR(root_mnt);
- if (IS_ERR(root_mnt))
- goto out;
-
- error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+ error = nfs4_try_mount(flags, dev_name, data, mnt);
out:
kfree(data->client_address);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a34fae21fe10..120acadc6a84 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/swap.h>
+#include <linux/migrate.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
@@ -26,6 +27,7 @@
#include "internal.h"
#include "iostat.h"
#include "nfs4_fs.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -218,24 +220,17 @@ static void nfs_end_page_writeback(struct page *page)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-/*
- * Find an associated nfs write request, and prepare to flush it out
- * May return an error if the user signalled nfs_wait_on_request().
- */
-static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page)
+static struct nfs_page *nfs_find_and_lock_request(struct page *page)
{
struct inode *inode = page->mapping->host;
struct nfs_page *req;
int ret;
spin_lock(&inode->i_lock);
- for(;;) {
+ for (;;) {
req = nfs_page_find_request_locked(page);
- if (req == NULL) {
- spin_unlock(&inode->i_lock);
- return 0;
- }
+ if (req == NULL)
+ break;
if (nfs_set_page_tag_locked(req))
break;
/* Note: If we hold the page lock, as is the case in nfs_writepage,
@@ -247,23 +242,40 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
ret = nfs_wait_on_request(req);
nfs_release_request(req);
if (ret != 0)
- return ret;
+ return ERR_PTR(ret);
spin_lock(&inode->i_lock);
}
- if (test_bit(PG_CLEAN, &req->wb_flags)) {
- spin_unlock(&inode->i_lock);
- BUG();
- }
- if (nfs_set_page_writeback(page) != 0) {
- spin_unlock(&inode->i_lock);
- BUG();
- }
spin_unlock(&inode->i_lock);
+ return req;
+}
+
+/*
+ * Find an associated nfs write request, and prepare to flush it out
+ * May return an error if the user signalled nfs_wait_on_request().
+ */
+static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
+ struct page *page)
+{
+ struct nfs_page *req;
+ int ret = 0;
+
+ req = nfs_find_and_lock_request(page);
+ if (!req)
+ goto out;
+ ret = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out;
+
+ ret = nfs_set_page_writeback(page);
+ BUG_ON(ret != 0);
+ BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
+
if (!nfs_pageio_add_request(pgio, req)) {
nfs_redirty_request(req);
- return pgio->pg_error;
+ ret = pgio->pg_error;
}
- return 0;
+out:
+ return ret;
}
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
@@ -1580,6 +1592,41 @@ int nfs_wb_page(struct inode *inode, struct page* page)
return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
}
+#ifdef CONFIG_MIGRATION
+int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
+ struct page *page)
+{
+ struct nfs_page *req;
+ int ret;
+
+ if (PageFsCache(page))
+ nfs_fscache_release_page(page, GFP_KERNEL);
+
+ req = nfs_find_and_lock_request(page);
+ ret = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out;
+
+ ret = migrate_page(mapping, newpage, page);
+ if (!req)
+ goto out;
+ if (ret)
+ goto out_unlock;
+ page_cache_get(newpage);
+ req->wb_page = newpage;
+ SetPagePrivate(newpage);
+ set_page_private(newpage, page_private(page));
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ page_cache_release(page);
+out_unlock:
+ nfs_clear_page_tag_locked(req);
+ nfs_release_request(req);
+out:
+ return ret;
+}
+#endif
+
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 5573508f707f..36fcabbf5186 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
int flags = nfsexp_flags(rqstp, exp);
int ret;
+ validate_process_creds();
+
/* discard any old override before preparing the new set */
revert_creds(get_cred(current->real_cred));
new = prepare_creds();
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
else
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
+ validate_process_creds();
put_cred(override_creds(new));
put_cred(new);
+ validate_process_creds();
return 0;
oom:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b92a27629fb7..d9462643155c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd,
(*bpp)[-1] = '\n';
}
+static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
+}
+
static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
static struct cache_detail svc_expkey_cache;
@@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = {
.hash_table = expkey_table,
.name = "nfsd.fh",
.cache_put = expkey_put,
- .cache_request = expkey_request,
+ .cache_upcall = expkey_upcall,
.cache_parse = expkey_parse,
.cache_show = expkey_show,
.match = expkey_match,
@@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd,
(*bpp)[-1] = '\n';
}
+static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+}
+
static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);
@@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = {
.hash_table = export_table,
.name = "nfsd.export",
.cache_put = svc_export_put,
- .cache_request = svc_export_request,
+ .cache_upcall = svc_export_upcall,
.cache_parse = svc_export_parse,
.cache_show = svc_export_show,
.match = svc_export_match,
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b398421b051..cdfa86fa1471 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -146,6 +146,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
}
static int
+idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
+{
+ return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
+}
+
+static int
idtoname_match(struct cache_head *ca, struct cache_head *cb)
{
struct ent *a = container_of(ca, struct ent, h);
@@ -175,10 +181,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
}
static void
-warn_no_idmapd(struct cache_detail *detail)
+warn_no_idmapd(struct cache_detail *detail, int has_died)
{
printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
- detail->last_close? "died" : "not been started");
+ has_died ? "died" : "not been started");
}
@@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = {
.hash_table = idtoname_table,
.name = "nfs4.idtoname",
.cache_put = ent_put,
- .cache_request = idtoname_request,
+ .cache_upcall = idtoname_upcall,
.cache_parse = idtoname_parse,
.cache_show = idtoname_show,
.warn_no_listener = warn_no_idmapd,
@@ -325,6 +331,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
}
static int
+nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
+{
+ return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
+}
+
+static int
nametoid_match(struct cache_head *ca, struct cache_head *cb)
{
struct ent *a = container_of(ca, struct ent, h);
@@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = {
.hash_table = nametoid_table,
.name = "nfs4.nametoid",
.cache_put = ent_put,
- .cache_request = nametoid_request,
+ .cache_upcall = nametoid_upcall,
.cache_parse = nametoid_parse,
.cache_show = nametoid_show,
.warn_no_listener = warn_no_idmapd,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6d0847562d87..7e906c5b7671 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -37,6 +37,7 @@
#include <linux/nfsd/xdr.h>
#include <linux/nfsd/syscall.h>
#include <linux/lockd/lockd.h>
+#include <linux/sunrpc/clnt.h>
#include <asm/uaccess.h>
#include <net/ipv6.h>
@@ -490,22 +491,18 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
*
* Input:
* buf: '\n'-terminated C string containing a
- * presentation format IPv4 address
+ * presentation format IP address
* size: length of C string in @buf
* Output:
* On success: returns zero if all specified locks were released;
* returns one if one or more locks were not released
* On error: return code is negative errno value
- *
- * Note: Only AF_INET client addresses are passed in
*/
static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- };
- int b1, b2, b3, b4;
- char c;
+ struct sockaddr_storage address;
+ struct sockaddr *sap = (struct sockaddr *)&address;
+ size_t salen = sizeof(address);
char *fo_path;
/* sanity check */
@@ -519,14 +516,10 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
if (qword_get(&buf, fo_path, size) < 0)
return -EINVAL;
- /* get ipv4 address */
- if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
- return -EINVAL;
- if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
+ if (rpc_pton(fo_path, size, sap, salen) == 0)
return -EINVAL;
- sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
- return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
+ return nlmsvc_unlock_all_by_ip(sap);
}
/**
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 492c79b7800b..24d58adfe5fd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -496,7 +496,9 @@ nfsd(void *vrqstp)
/* Lock the export hash tables for reading. */
exp_readlock();
+ validate_process_creds();
svc_process(rqstp);
+ validate_process_creds();
/* Unlock export hash tables */
exp_readunlock();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1063bc..8fa09bfbcba7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
__be32 err;
int host_err;
+ validate_process_creds();
+
/*
* If we get here, then the client has already done an "open",
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
out_nfserr:
err = nfserrno(host_err);
out:
+ validate_process_creds();
return err;
}
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 72da095d4009..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,6 @@
config NILFS2_FS
tristate "NILFS2 file system support (EXPERIMENTAL)"
- depends on BLOCK && EXPERIMENTAL
+ depends on EXPERIMENTAL
select CRC32
help
NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 99d58a028b94..08834df6ec68 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -36,6 +36,26 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
}
+/**
+ * nilfs_bmap_lookup_at_level - find a data block or node block
+ * @bmap: bmap
+ * @key: key
+ * @level: level
+ * @ptrp: place to store the value associated to @key
+ *
+ * Description: nilfs_bmap_lookup_at_level() finds a record whose key
+ * matches @key in the block at @level of the bmap.
+ *
+ * Return Value: On success, 0 is returned and the record associated with @key
+ * is stored in the place pointed by @ptrp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - A record associated with @key does not exist.
+ */
int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
__u64 *ptrp)
{
@@ -69,39 +89,6 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
return ret;
}
-/**
- * nilfs_bmap_lookup - find a record
- * @bmap: bmap
- * @key: key
- * @recp: pointer to record
- *
- * Description: nilfs_bmap_lookup() finds a record whose key matches @key in
- * @bmap.
- *
- * Return Value: On success, 0 is returned and the record associated with @key
- * is stored in the place pointed by @recp. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-ENOENT - A record associated with @key does not exist.
- */
-int nilfs_bmap_lookup(struct nilfs_bmap *bmap,
- unsigned long key,
- unsigned long *recp)
-{
- __u64 ptr;
- int ret;
-
- /* XXX: use macro for level 1 */
- ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr);
- if (recp != NULL)
- *recp = ptr;
- return ret;
-}
-
static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
{
__u64 keys[NILFS_BMAP_SMALL_HIGH + 1];
@@ -469,104 +456,6 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
(entries_per_group / NILFS_BMAP_GROUP_DIV);
}
-int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
-}
-
-void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
-}
-
-void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
-}
-
-int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req,
- sector_t blocknr)
-{
- struct inode *dat = nilfs_bmap_get_dat(bmap);
- int ret;
-
- ret = nilfs_dat_prepare_start(dat, &req->bpr_req);
- if (likely(!ret))
- nilfs_dat_commit_start(dat, &req->bpr_req, blocknr);
- return ret;
-}
-
-int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
-}
-
-void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req,
- bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
-}
-
-void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
-}
-
-int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr,
- sector_t blocknr)
-{
- return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr);
-}
-
-int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr)
-{
- return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr);
-}
-
-int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *oldreq,
- union nilfs_bmap_ptr_req *newreq)
-{
- struct inode *dat = nilfs_bmap_get_dat(bmap);
- int ret;
-
- ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req);
- if (ret < 0)
- return ret;
- ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req);
- if (ret < 0)
- nilfs_dat_abort_end(dat, &oldreq->bpr_req);
-
- return ret;
-}
-
-void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *oldreq,
- union nilfs_bmap_ptr_req *newreq)
-{
- struct inode *dat = nilfs_bmap_get_dat(bmap);
-
- nilfs_dat_commit_end(dat, &oldreq->bpr_req,
- bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
- nilfs_dat_commit_alloc(dat, &newreq->bpr_req);
-}
-
-void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *oldreq,
- union nilfs_bmap_ptr_req *newreq)
-{
- struct inode *dat = nilfs_bmap_get_dat(bmap);
-
- nilfs_dat_abort_end(dat, &oldreq->bpr_req);
- nilfs_dat_abort_alloc(dat, &newreq->bpr_req);
-}
-
static struct lock_class_key nilfs_bmap_dat_lock_key;
static struct lock_class_key nilfs_bmap_mdt_lock_key;
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b2890cdcef12..9980d7dbab91 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -28,6 +28,7 @@
#include <linux/buffer_head.h>
#include <linux/nilfs2_fs.h>
#include "alloc.h"
+#include "dat.h"
#define NILFS_BMAP_INVALID_PTR 0
@@ -141,7 +142,6 @@ struct nilfs_bmap {
int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
-int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *);
int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
@@ -160,90 +160,76 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
+static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key,
+ __u64 *ptr)
+{
+ return nilfs_bmap_lookup_at_level(bmap, key, 1, ptr);
+}
+
/*
* Internal use only
*/
struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *);
-int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
-void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
-void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+ union nilfs_bmap_ptr_req *req,
+ struct inode *dat)
{
- if (NILFS_BMAP_USE_VBN(bmap))
- return nilfs_bmap_prepare_alloc_v(bmap, req);
+ if (dat)
+ return nilfs_dat_prepare_alloc(dat, &req->bpr_req);
/* ignore target ptr */
req->bpr_ptr = bmap->b_last_allocated_ptr++;
return 0;
}
static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+ union nilfs_bmap_ptr_req *req,
+ struct inode *dat)
{
- if (NILFS_BMAP_USE_VBN(bmap))
- nilfs_bmap_commit_alloc_v(bmap, req);
+ if (dat)
+ nilfs_dat_commit_alloc(dat, &req->bpr_req);
}
static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+ union nilfs_bmap_ptr_req *req,
+ struct inode *dat)
{
- if (NILFS_BMAP_USE_VBN(bmap))
- nilfs_bmap_abort_alloc_v(bmap, req);
+ if (dat)
+ nilfs_dat_abort_alloc(dat, &req->bpr_req);
else
bmap->b_last_allocated_ptr--;
}
-int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
-void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
-void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
-
static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+ union nilfs_bmap_ptr_req *req,
+ struct inode *dat)
{
- return NILFS_BMAP_USE_VBN(bmap) ?
- nilfs_bmap_prepare_end_v(bmap, req) : 0;
+ return dat ? nilfs_dat_prepare_end(dat, &req->bpr_req) : 0;
}
static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+ union nilfs_bmap_ptr_req *req,
+ struct inode *dat)
{
- if (NILFS_BMAP_USE_VBN(bmap))
- nilfs_bmap_commit_end_v(bmap, req);
+ if (dat)
+ nilfs_dat_commit_end(dat, &req->bpr_req,
+ bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
}
static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+ union nilfs_bmap_ptr_req *req,
+ struct inode *dat)
{
- if (NILFS_BMAP_USE_VBN(bmap))
- nilfs_bmap_abort_end_v(bmap, req);
+ if (dat)
+ nilfs_dat_abort_end(dat, &req->bpr_req);
}
-int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *,
- sector_t);
-int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t);
-int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64);
-
-
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
const struct buffer_head *);
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
-int nilfs_bmap_prepare_update_v(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- union nilfs_bmap_ptr_req *);
-void nilfs_bmap_commit_update_v(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- union nilfs_bmap_ptr_req *);
-void nilfs_bmap_abort_update_v(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- union nilfs_bmap_ptr_req *);
-
void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 7e0b61be212e..c668bca579c1 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
* We cannot call radix_tree_preload for the kernels older
* than 2.6.23, because it is not exported for modules.
*/
+retry:
err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (err)
goto failed_unlock;
@@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
(unsigned long long)oldkey,
(unsigned long long)newkey);
-retry:
spin_lock_irq(&btnc->tree_lock);
err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
spin_unlock_irq(&btnc->tree_lock);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index aa412724b64e..e25b507a474f 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -71,21 +71,17 @@ void nilfs_btree_path_cache_destroy(void)
kmem_cache_destroy(nilfs_btree_path_cache);
}
-static inline struct nilfs_btree_path *
-nilfs_btree_alloc_path(const struct nilfs_btree *btree)
+static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
{
- return (struct nilfs_btree_path *)
- kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
+ return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
}
-static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
- struct nilfs_btree_path *path)
+static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
{
kmem_cache_free(nilfs_btree_path_cache, path);
}
-static void nilfs_btree_init_path(const struct nilfs_btree *btree,
- struct nilfs_btree_path *path)
+static void nilfs_btree_init_path(struct nilfs_btree_path *path)
{
int level;
@@ -101,26 +97,13 @@ static void nilfs_btree_init_path(const struct nilfs_btree *btree,
}
}
-static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
- struct nilfs_btree_path *path)
+static void nilfs_btree_release_path(struct nilfs_btree_path *path)
{
int level;
- for (level = NILFS_BTREE_LEVEL_DATA;
- level < NILFS_BTREE_LEVEL_MAX;
- level++) {
- if (path[level].bp_bh != NULL) {
- brelse(path[level].bp_bh);
- path[level].bp_bh = NULL;
- }
- /* sib_bh is released or deleted by prepare or commit
- * operations. */
- path[level].bp_sib_bh = NULL;
- path[level].bp_index = 0;
- path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
- path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
- path[level].bp_op = NULL;
- }
+ for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
+ level++)
+ brelse(path[level].bp_bh);
}
/*
@@ -148,129 +131,110 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
}
static inline int
-nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
{
return node->bn_flags;
}
static inline void
-nilfs_btree_node_set_flags(struct nilfs_btree *btree,
- struct nilfs_btree_node *node,
- int flags)
+nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
{
node->bn_flags = flags;
}
-static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
{
- return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
+ return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
}
static inline int
-nilfs_btree_node_get_level(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
{
return node->bn_level;
}
static inline void
-nilfs_btree_node_set_level(struct nilfs_btree *btree,
- struct nilfs_btree_node *node,
- int level)
+nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
{
node->bn_level = level;
}
static inline int
-nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
{
return le16_to_cpu(node->bn_nchildren);
}
static inline void
-nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
- struct nilfs_btree_node *node,
- int nchildren)
+nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
{
node->bn_nchildren = cpu_to_le16(nchildren);
}
-static inline int
-nilfs_btree_node_size(const struct nilfs_btree *btree)
+static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
{
return 1 << btree->bt_bmap.b_inode->i_blkbits;
}
static inline int
-nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
+ const struct nilfs_btree *btree)
{
- return nilfs_btree_node_root(btree, node) ?
+ return nilfs_btree_node_root(node) ?
NILFS_BTREE_ROOT_NCHILDREN_MIN :
NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
}
static inline int
-nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
+ const struct nilfs_btree *btree)
{
- return nilfs_btree_node_root(btree, node) ?
+ return nilfs_btree_node_root(node) ?
NILFS_BTREE_ROOT_NCHILDREN_MAX :
NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
}
static inline __le64 *
-nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
{
return (__le64 *)((char *)(node + 1) +
- (nilfs_btree_node_root(btree, node) ?
+ (nilfs_btree_node_root(node) ?
0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
}
static inline __le64 *
-nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node)
+nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
+ const struct nilfs_btree *btree)
{
- return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
- nilfs_btree_node_nchildren_max(btree, node));
+ return (__le64 *)(nilfs_btree_node_dkeys(node) +
+ nilfs_btree_node_nchildren_max(node, btree));
}
static inline __u64
-nilfs_btree_node_get_key(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node, int index)
+nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
{
- return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
- index));
+ return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
}
static inline void
-nilfs_btree_node_set_key(struct nilfs_btree *btree,
- struct nilfs_btree_node *node, int index, __u64 key)
+nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
{
- *(nilfs_btree_node_dkeys(btree, node) + index) =
- nilfs_bmap_key_to_dkey(key);
+ *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
}
static inline __u64
nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node,
- int index)
+ const struct nilfs_btree_node *node, int index)
{
- return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
+ return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
index));
}
static inline void
nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
- struct nilfs_btree_node *node,
- int index,
- __u64 ptr)
+ struct nilfs_btree_node *node, int index, __u64 ptr)
{
- *(nilfs_btree_node_dptrs(btree, node) + index) =
+ *(nilfs_btree_node_dptrs(node, btree) + index) =
nilfs_bmap_ptr_to_dptr(ptr);
}
@@ -283,12 +247,12 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree,
__le64 *dptrs;
int i;
- nilfs_btree_node_set_flags(btree, node, flags);
- nilfs_btree_node_set_level(btree, node, level);
- nilfs_btree_node_set_nchildren(btree, node, nchildren);
+ nilfs_btree_node_set_flags(node, flags);
+ nilfs_btree_node_set_level(node, level);
+ nilfs_btree_node_set_nchildren(node, nchildren);
- dkeys = nilfs_btree_node_dkeys(btree, node);
- dptrs = nilfs_btree_node_dptrs(btree, node);
+ dkeys = nilfs_btree_node_dkeys(node);
+ dptrs = nilfs_btree_node_dptrs(node, btree);
for (i = 0; i < nchildren; i++) {
dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
@@ -305,13 +269,13 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
__le64 *ldptrs, *rdptrs;
int lnchildren, rnchildren;
- ldkeys = nilfs_btree_node_dkeys(btree, left);
- ldptrs = nilfs_btree_node_dptrs(btree, left);
- lnchildren = nilfs_btree_node_get_nchildren(btree, left);
+ ldkeys = nilfs_btree_node_dkeys(left);
+ ldptrs = nilfs_btree_node_dptrs(left, btree);
+ lnchildren = nilfs_btree_node_get_nchildren(left);
- rdkeys = nilfs_btree_node_dkeys(btree, right);
- rdptrs = nilfs_btree_node_dptrs(btree, right);
- rnchildren = nilfs_btree_node_get_nchildren(btree, right);
+ rdkeys = nilfs_btree_node_dkeys(right);
+ rdptrs = nilfs_btree_node_dptrs(right, btree);
+ rnchildren = nilfs_btree_node_get_nchildren(right);
memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
@@ -320,8 +284,8 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
lnchildren += n;
rnchildren -= n;
- nilfs_btree_node_set_nchildren(btree, left, lnchildren);
- nilfs_btree_node_set_nchildren(btree, right, rnchildren);
+ nilfs_btree_node_set_nchildren(left, lnchildren);
+ nilfs_btree_node_set_nchildren(right, rnchildren);
}
/* Assume that the buffer heads corresponding to left and right are locked. */
@@ -334,13 +298,13 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
__le64 *ldptrs, *rdptrs;
int lnchildren, rnchildren;
- ldkeys = nilfs_btree_node_dkeys(btree, left);
- ldptrs = nilfs_btree_node_dptrs(btree, left);
- lnchildren = nilfs_btree_node_get_nchildren(btree, left);
+ ldkeys = nilfs_btree_node_dkeys(left);
+ ldptrs = nilfs_btree_node_dptrs(left, btree);
+ lnchildren = nilfs_btree_node_get_nchildren(left);
- rdkeys = nilfs_btree_node_dkeys(btree, right);
- rdptrs = nilfs_btree_node_dptrs(btree, right);
- rnchildren = nilfs_btree_node_get_nchildren(btree, right);
+ rdkeys = nilfs_btree_node_dkeys(right);
+ rdptrs = nilfs_btree_node_dptrs(right, btree);
+ rnchildren = nilfs_btree_node_get_nchildren(right);
memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
@@ -349,8 +313,8 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
lnchildren -= n;
rnchildren += n;
- nilfs_btree_node_set_nchildren(btree, left, lnchildren);
- nilfs_btree_node_set_nchildren(btree, right, rnchildren);
+ nilfs_btree_node_set_nchildren(left, lnchildren);
+ nilfs_btree_node_set_nchildren(right, rnchildren);
}
/* Assume that the buffer head corresponding to node is locked. */
@@ -362,9 +326,9 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
__le64 *dptrs;
int nchildren;
- dkeys = nilfs_btree_node_dkeys(btree, node);
- dptrs = nilfs_btree_node_dptrs(btree, node);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
+ dkeys = nilfs_btree_node_dkeys(node);
+ dptrs = nilfs_btree_node_dptrs(node, btree);
+ nchildren = nilfs_btree_node_get_nchildren(node);
if (index < nchildren) {
memmove(dkeys + index + 1, dkeys + index,
(nchildren - index) * sizeof(*dkeys));
@@ -374,7 +338,7 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
dkeys[index] = nilfs_bmap_key_to_dkey(key);
dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
nchildren++;
- nilfs_btree_node_set_nchildren(btree, node, nchildren);
+ nilfs_btree_node_set_nchildren(node, nchildren);
}
/* Assume that the buffer head corresponding to node is locked. */
@@ -388,11 +352,11 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
__le64 *dptrs;
int nchildren;
- dkeys = nilfs_btree_node_dkeys(btree, node);
- dptrs = nilfs_btree_node_dptrs(btree, node);
+ dkeys = nilfs_btree_node_dkeys(node);
+ dptrs = nilfs_btree_node_dptrs(node, btree);
key = nilfs_bmap_dkey_to_key(dkeys[index]);
ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
+ nchildren = nilfs_btree_node_get_nchildren(node);
if (keyp != NULL)
*keyp = key;
if (ptrp != NULL)
@@ -405,11 +369,10 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
(nchildren - index - 1) * sizeof(*dptrs));
}
nchildren--;
- nilfs_btree_node_set_nchildren(btree, node, nchildren);
+ nilfs_btree_node_set_nchildren(node, nchildren);
}
-static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
- const struct nilfs_btree_node *node,
+static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
__u64 key, int *indexp)
{
__u64 nkey;
@@ -417,12 +380,12 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
/* binary search */
low = 0;
- high = nilfs_btree_node_get_nchildren(btree, node) - 1;
+ high = nilfs_btree_node_get_nchildren(node) - 1;
index = 0;
s = 0;
while (low <= high) {
index = (low + high) / 2;
- nkey = nilfs_btree_node_get_key(btree, node, index);
+ nkey = nilfs_btree_node_get_key(node, index);
if (nkey == key) {
s = 0;
goto out;
@@ -436,9 +399,8 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
}
/* adjust index */
- if (nilfs_btree_node_get_level(btree, node) >
- NILFS_BTREE_LEVEL_NODE_MIN) {
- if ((s > 0) && (index > 0))
+ if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
+ if (s > 0 && index > 0)
index--;
} else if (s < 0)
index++;
@@ -456,25 +418,20 @@ nilfs_btree_get_root(const struct nilfs_btree *btree)
}
static inline struct nilfs_btree_node *
-nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
- const struct nilfs_btree_path *path,
- int level)
+nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
{
return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
}
static inline struct nilfs_btree_node *
-nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
- const struct nilfs_btree_path *path,
- int level)
+nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
{
return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
}
static inline int nilfs_btree_height(const struct nilfs_btree *btree)
{
- return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
- + 1;
+ return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
}
static inline struct nilfs_btree_node *
@@ -484,7 +441,7 @@ nilfs_btree_get_node(const struct nilfs_btree *btree,
{
return (level == nilfs_btree_height(btree) - 1) ?
nilfs_btree_get_root(btree) :
- nilfs_btree_get_nonroot_node(btree, path, level);
+ nilfs_btree_get_nonroot_node(path, level);
}
static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
@@ -496,12 +453,11 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
int level, index, found, ret;
node = nilfs_btree_get_root(btree);
- level = nilfs_btree_node_get_level(btree, node);
- if ((level < minlevel) ||
- (nilfs_btree_node_get_nchildren(btree, node) <= 0))
+ level = nilfs_btree_node_get_level(node);
+ if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
return -ENOENT;
- found = nilfs_btree_node_lookup(btree, node, key, &index);
+ found = nilfs_btree_node_lookup(node, key, &index);
ptr = nilfs_btree_node_get_ptr(btree, node, index);
path[level].bp_bh = NULL;
path[level].bp_index = index;
@@ -510,14 +466,13 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
if (ret < 0)
return ret;
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- BUG_ON(level != nilfs_btree_node_get_level(btree, node));
+ node = nilfs_btree_get_nonroot_node(path, level);
+ BUG_ON(level != nilfs_btree_node_get_level(node));
if (!found)
- found = nilfs_btree_node_lookup(btree, node, key,
- &index);
+ found = nilfs_btree_node_lookup(node, key, &index);
else
index = 0;
- if (index < nilfs_btree_node_nchildren_max(btree, node))
+ if (index < nilfs_btree_node_nchildren_max(node, btree))
ptr = nilfs_btree_node_get_ptr(btree, node, index);
else {
WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
@@ -544,10 +499,10 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
int index, level, ret;
node = nilfs_btree_get_root(btree);
- index = nilfs_btree_node_get_nchildren(btree, node) - 1;
+ index = nilfs_btree_node_get_nchildren(node) - 1;
if (index < 0)
return -ENOENT;
- level = nilfs_btree_node_get_level(btree, node);
+ level = nilfs_btree_node_get_level(node);
ptr = nilfs_btree_node_get_ptr(btree, node, index);
path[level].bp_bh = NULL;
path[level].bp_index = index;
@@ -556,15 +511,15 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
if (ret < 0)
return ret;
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- BUG_ON(level != nilfs_btree_node_get_level(btree, node));
- index = nilfs_btree_node_get_nchildren(btree, node) - 1;
+ node = nilfs_btree_get_nonroot_node(path, level);
+ BUG_ON(level != nilfs_btree_node_get_level(node));
+ index = nilfs_btree_node_get_nchildren(node) - 1;
ptr = nilfs_btree_node_get_ptr(btree, node, index);
path[level].bp_index = index;
}
if (keyp != NULL)
- *keyp = nilfs_btree_node_get_key(btree, node, index);
+ *keyp = nilfs_btree_node_get_key(node, index);
if (ptrp != NULL)
*ptrp = ptr;
@@ -580,18 +535,18 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
int ret;
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
if (ptrp != NULL)
*ptrp = ptr;
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -608,10 +563,10 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
int level = NILFS_BTREE_LEVEL_NODE_MIN;
int ret, cnt, index, maxlevel;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
if (ret < 0)
goto out;
@@ -631,8 +586,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
node = nilfs_btree_get_node(btree, path, level);
index = path[level].bp_index + 1;
for (;;) {
- while (index < nilfs_btree_node_get_nchildren(btree, node)) {
- if (nilfs_btree_node_get_key(btree, node, index) !=
+ while (index < nilfs_btree_node_get_nchildren(node)) {
+ if (nilfs_btree_node_get_key(node, index) !=
key + cnt)
goto end;
ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
@@ -653,8 +608,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
/* look-up right sibling node */
node = nilfs_btree_get_node(btree, path, level + 1);
index = path[level + 1].bp_index + 1;
- if (index >= nilfs_btree_node_get_nchildren(btree, node) ||
- nilfs_btree_node_get_key(btree, node, index) != key + cnt)
+ if (index >= nilfs_btree_node_get_nchildren(node) ||
+ nilfs_btree_node_get_key(node, index) != key + cnt)
break;
ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
path[level + 1].bp_index = index;
@@ -664,7 +619,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
if (ret < 0)
goto out;
- node = nilfs_btree_get_nonroot_node(btree, path, level);
+ node = nilfs_btree_get_nonroot_node(path, level);
index = 0;
path[level].bp_index = index;
}
@@ -672,8 +627,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
*ptrp = ptr;
ret = cnt;
out:
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -685,9 +640,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
do {
lock_buffer(path[level].bp_bh);
nilfs_btree_node_set_key(
- btree,
- nilfs_btree_get_nonroot_node(
- btree, path, level),
+ nilfs_btree_get_nonroot_node(path, level),
path[level].bp_index, key);
if (!buffer_dirty(path[level].bp_bh))
nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -698,8 +651,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
/* root */
if (level == nilfs_btree_height(btree) - 1) {
- nilfs_btree_node_set_key(btree,
- nilfs_btree_get_root(btree),
+ nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
path[level].bp_index, key);
}
}
@@ -712,7 +664,7 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
if (level < nilfs_btree_height(btree) - 1) {
lock_buffer(path[level].bp_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
+ node = nilfs_btree_get_nonroot_node(path, level);
nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
path[level].bp_index);
if (!buffer_dirty(path[level].bp_bh))
@@ -721,8 +673,8 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
- nilfs_btree_node_get_key(
- btree, node, 0));
+ nilfs_btree_node_get_key(node,
+ 0));
} else {
node = nilfs_btree_get_root(btree);
nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
@@ -740,10 +692,10 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- left = nilfs_btree_get_sib_node(btree, path, level);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
- lnchildren = nilfs_btree_node_get_nchildren(btree, left);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ left = nilfs_btree_get_sib_node(path, level);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+ lnchildren = nilfs_btree_node_get_nchildren(left);
move = 0;
n = (nchildren + lnchildren + 1) / 2 - lnchildren;
@@ -764,7 +716,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
unlock_buffer(path[level].bp_sib_bh);
nilfs_btree_promote_key(btree, path, level + 1,
- nilfs_btree_node_get_key(btree, node, 0));
+ nilfs_btree_node_get_key(node, 0));
if (move) {
brelse(path[level].bp_bh);
@@ -791,10 +743,10 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- right = nilfs_btree_get_sib_node(btree, path, level);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
- rnchildren = nilfs_btree_node_get_nchildren(btree, right);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ right = nilfs_btree_get_sib_node(path, level);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+ rnchildren = nilfs_btree_node_get_nchildren(right);
move = 0;
n = (nchildren + rnchildren + 1) / 2 - rnchildren;
@@ -816,15 +768,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
- nilfs_btree_node_get_key(btree, right, 0));
+ nilfs_btree_node_get_key(right, 0));
path[level + 1].bp_index--;
if (move) {
brelse(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
- path[level].bp_index -=
- nilfs_btree_node_get_nchildren(btree, node);
+ path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
path[level + 1].bp_index++;
} else {
brelse(path[level].bp_sib_bh);
@@ -846,9 +797,9 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- right = nilfs_btree_get_sib_node(btree, path, level);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ right = nilfs_btree_get_sib_node(path, level);
+ nchildren = nilfs_btree_node_get_nchildren(node);
move = 0;
n = (nchildren + 1) / 2;
@@ -867,16 +818,15 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
unlock_buffer(path[level].bp_bh);
unlock_buffer(path[level].bp_sib_bh);
- newkey = nilfs_btree_node_get_key(btree, right, 0);
+ newkey = nilfs_btree_node_get_key(right, 0);
newptr = path[level].bp_newreq.bpr_ptr;
if (move) {
- path[level].bp_index -=
- nilfs_btree_node_get_nchildren(btree, node);
+ path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
path[level].bp_index);
- *keyp = nilfs_btree_node_get_key(btree, right, 0);
+ *keyp = nilfs_btree_node_get_key(right, 0);
*ptrp = path[level].bp_newreq.bpr_ptr;
brelse(path[level].bp_bh);
@@ -885,7 +835,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
} else {
nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
- *keyp = nilfs_btree_node_get_key(btree, right, 0);
+ *keyp = nilfs_btree_node_get_key(right, 0);
*ptrp = path[level].bp_newreq.bpr_ptr;
brelse(path[level].bp_sib_bh);
@@ -905,12 +855,12 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
lock_buffer(path[level].bp_sib_bh);
root = nilfs_btree_get_root(btree);
- child = nilfs_btree_get_sib_node(btree, path, level);
+ child = nilfs_btree_get_sib_node(path, level);
- n = nilfs_btree_node_get_nchildren(btree, root);
+ n = nilfs_btree_node_get_nchildren(root);
nilfs_btree_node_move_right(btree, root, child, n);
- nilfs_btree_node_set_level(btree, root, level + 1);
+ nilfs_btree_node_set_level(root, level + 1);
if (!buffer_dirty(path[level].bp_sib_bh))
nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
@@ -922,7 +872,7 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
- *keyp = nilfs_btree_node_get_key(btree, child, 0);
+ *keyp = nilfs_btree_node_get_key(child, 0);
*ptrp = path[level].bp_newreq.bpr_ptr;
}
@@ -990,26 +940,29 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
struct nilfs_btree_node *node, *parent, *sib;
__u64 sibptr;
int pindex, level, ret;
+ struct inode *dat = NULL;
stats->bs_nblocks = 0;
level = NILFS_BTREE_LEVEL_DATA;
/* allocate a new ptr for data block */
- if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
+ if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
path[level].bp_newreq.bpr_ptr =
nilfs_btree_find_target_v(btree, path, key);
+ dat = nilfs_bmap_get_dat(&btree->bt_bmap);
+ }
ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
- &path[level].bp_newreq);
+ &path[level].bp_newreq, dat);
if (ret < 0)
goto err_out_data;
for (level = NILFS_BTREE_LEVEL_NODE_MIN;
level < nilfs_btree_height(btree) - 1;
level++) {
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- if (nilfs_btree_node_get_nchildren(btree, node) <
- nilfs_btree_node_nchildren_max(btree, node)) {
+ node = nilfs_btree_get_nonroot_node(path, level);
+ if (nilfs_btree_node_get_nchildren(node) <
+ nilfs_btree_node_nchildren_max(node, btree)) {
path[level].bp_op = nilfs_btree_do_insert;
stats->bs_nblocks++;
goto out;
@@ -1026,8 +979,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
if (ret < 0)
goto err_out_child_node;
sib = (struct nilfs_btree_node *)bh->b_data;
- if (nilfs_btree_node_get_nchildren(btree, sib) <
- nilfs_btree_node_nchildren_max(btree, sib)) {
+ if (nilfs_btree_node_get_nchildren(sib) <
+ nilfs_btree_node_nchildren_max(sib, btree)) {
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_carry_left;
stats->bs_nblocks++;
@@ -1038,15 +991,15 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
/* right sibling */
if (pindex <
- nilfs_btree_node_get_nchildren(btree, parent) - 1) {
+ nilfs_btree_node_get_nchildren(parent) - 1) {
sibptr = nilfs_btree_node_get_ptr(btree, parent,
pindex + 1);
ret = nilfs_btree_get_block(btree, sibptr, &bh);
if (ret < 0)
goto err_out_child_node;
sib = (struct nilfs_btree_node *)bh->b_data;
- if (nilfs_btree_node_get_nchildren(btree, sib) <
- nilfs_btree_node_nchildren_max(btree, sib)) {
+ if (nilfs_btree_node_get_nchildren(sib) <
+ nilfs_btree_node_nchildren_max(sib, btree)) {
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_carry_right;
stats->bs_nblocks++;
@@ -1059,7 +1012,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
path[level].bp_newreq.bpr_ptr =
path[level - 1].bp_newreq.bpr_ptr + 1;
ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
- &path[level].bp_newreq);
+ &path[level].bp_newreq, dat);
if (ret < 0)
goto err_out_child_node;
ret = nilfs_btree_get_new_block(btree,
@@ -1081,8 +1034,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
/* root */
node = nilfs_btree_get_root(btree);
- if (nilfs_btree_node_get_nchildren(btree, node) <
- nilfs_btree_node_nchildren_max(btree, node)) {
+ if (nilfs_btree_node_get_nchildren(node) <
+ nilfs_btree_node_nchildren_max(node, btree)) {
path[level].bp_op = nilfs_btree_do_insert;
stats->bs_nblocks++;
goto out;
@@ -1091,7 +1044,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
/* grow */
path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
- &path[level].bp_newreq);
+ &path[level].bp_newreq, dat);
if (ret < 0)
goto err_out_child_node;
ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
@@ -1119,16 +1072,18 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
/* error */
err_out_curr_node:
- nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
+ nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
+ dat);
err_out_child_node:
for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
nilfs_btnode_delete(path[level].bp_sib_bh);
nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
- &path[level].bp_newreq);
+ &path[level].bp_newreq, dat);
}
- nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
+ nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
+ dat);
err_out_data:
*levelp = level;
stats->bs_nblocks = 0;
@@ -1139,16 +1094,19 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
int maxlevel, __u64 key, __u64 ptr)
{
+ struct inode *dat = NULL;
int level;
set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
- if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
+ if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
nilfs_btree_set_target_v(btree, key, ptr);
+ dat = nilfs_bmap_get_dat(&btree->bt_bmap);
+ }
for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
- &path[level - 1].bp_newreq);
+ &path[level - 1].bp_newreq, dat);
path[level].bp_op(btree, path, level, &key, &ptr);
}
@@ -1164,10 +1122,10 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
int level, ret;
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
ret = nilfs_btree_do_lookup(btree, path, key, NULL,
NILFS_BTREE_LEVEL_NODE_MIN);
@@ -1184,8 +1142,8 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
out:
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -1197,7 +1155,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
if (level < nilfs_btree_height(btree) - 1) {
lock_buffer(path[level].bp_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
+ node = nilfs_btree_get_nonroot_node(path, level);
nilfs_btree_node_delete(btree, node, keyp, ptrp,
path[level].bp_index);
if (!buffer_dirty(path[level].bp_bh))
@@ -1205,7 +1163,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
unlock_buffer(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
- nilfs_btree_node_get_key(btree, node, 0));
+ nilfs_btree_node_get_key(node, 0));
} else {
node = nilfs_btree_get_root(btree);
nilfs_btree_node_delete(btree, node, keyp, ptrp,
@@ -1225,10 +1183,10 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- left = nilfs_btree_get_sib_node(btree, path, level);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
- lnchildren = nilfs_btree_node_get_nchildren(btree, left);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ left = nilfs_btree_get_sib_node(path, level);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+ lnchildren = nilfs_btree_node_get_nchildren(left);
n = (nchildren + lnchildren) / 2 - nchildren;
@@ -1243,7 +1201,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
unlock_buffer(path[level].bp_sib_bh);
nilfs_btree_promote_key(btree, path, level + 1,
- nilfs_btree_node_get_key(btree, node, 0));
+ nilfs_btree_node_get_key(node, 0));
brelse(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
@@ -1262,10 +1220,10 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- right = nilfs_btree_get_sib_node(btree, path, level);
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
- rnchildren = nilfs_btree_node_get_nchildren(btree, right);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ right = nilfs_btree_get_sib_node(path, level);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+ rnchildren = nilfs_btree_node_get_nchildren(right);
n = (nchildren + rnchildren) / 2 - nchildren;
@@ -1281,7 +1239,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
- nilfs_btree_node_get_key(btree, right, 0));
+ nilfs_btree_node_get_key(right, 0));
path[level + 1].bp_index--;
brelse(path[level].bp_sib_bh);
@@ -1300,10 +1258,10 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- left = nilfs_btree_get_sib_node(btree, path, level);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ left = nilfs_btree_get_sib_node(path, level);
- n = nilfs_btree_node_get_nchildren(btree, node);
+ n = nilfs_btree_node_get_nchildren(node);
nilfs_btree_node_move_left(btree, left, node, n);
@@ -1316,7 +1274,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
- path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
+ path[level].bp_index += nilfs_btree_node_get_nchildren(left);
}
static void nilfs_btree_concat_right(struct nilfs_btree *btree,
@@ -1331,10 +1289,10 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
lock_buffer(path[level].bp_sib_bh);
- node = nilfs_btree_get_nonroot_node(btree, path, level);
- right = nilfs_btree_get_sib_node(btree, path, level);
+ node = nilfs_btree_get_nonroot_node(path, level);
+ right = nilfs_btree_get_sib_node(path, level);
- n = nilfs_btree_node_get_nchildren(btree, right);
+ n = nilfs_btree_node_get_nchildren(right);
nilfs_btree_node_move_left(btree, node, right, n);
@@ -1360,11 +1318,11 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
lock_buffer(path[level].bp_bh);
root = nilfs_btree_get_root(btree);
- child = nilfs_btree_get_nonroot_node(btree, path, level);
+ child = nilfs_btree_get_nonroot_node(path, level);
nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
- nilfs_btree_node_set_level(btree, root, level);
- n = nilfs_btree_node_get_nchildren(btree, child);
+ nilfs_btree_node_set_level(root, level);
+ n = nilfs_btree_node_get_nchildren(child);
nilfs_btree_node_move_left(btree, root, child, n);
unlock_buffer(path[level].bp_bh);
@@ -1376,7 +1334,8 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
int *levelp,
- struct nilfs_bmap_stats *stats)
+ struct nilfs_bmap_stats *stats,
+ struct inode *dat)
{
struct buffer_head *bh;
struct nilfs_btree_node *node, *parent, *sib;
@@ -1388,17 +1347,17 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
for (level = NILFS_BTREE_LEVEL_NODE_MIN;
level < nilfs_btree_height(btree) - 1;
level++) {
- node = nilfs_btree_get_nonroot_node(btree, path, level);
+ node = nilfs_btree_get_nonroot_node(path, level);
path[level].bp_oldreq.bpr_ptr =
nilfs_btree_node_get_ptr(btree, node,
path[level].bp_index);
ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
- &path[level].bp_oldreq);
+ &path[level].bp_oldreq, dat);
if (ret < 0)
goto err_out_child_node;
- if (nilfs_btree_node_get_nchildren(btree, node) >
- nilfs_btree_node_nchildren_min(btree, node)) {
+ if (nilfs_btree_node_get_nchildren(node) >
+ nilfs_btree_node_nchildren_min(node, btree)) {
path[level].bp_op = nilfs_btree_do_delete;
stats->bs_nblocks++;
goto out;
@@ -1415,8 +1374,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
if (ret < 0)
goto err_out_curr_node;
sib = (struct nilfs_btree_node *)bh->b_data;
- if (nilfs_btree_node_get_nchildren(btree, sib) >
- nilfs_btree_node_nchildren_min(btree, sib)) {
+ if (nilfs_btree_node_get_nchildren(sib) >
+ nilfs_btree_node_nchildren_min(sib, btree)) {
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_borrow_left;
stats->bs_nblocks++;
@@ -1428,7 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
/* continue; */
}
} else if (pindex <
- nilfs_btree_node_get_nchildren(btree, parent) - 1) {
+ nilfs_btree_node_get_nchildren(parent) - 1) {
/* right sibling */
sibptr = nilfs_btree_node_get_ptr(btree, parent,
pindex + 1);
@@ -1436,8 +1395,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
if (ret < 0)
goto err_out_curr_node;
sib = (struct nilfs_btree_node *)bh->b_data;
- if (nilfs_btree_node_get_nchildren(btree, sib) >
- nilfs_btree_node_nchildren_min(btree, sib)) {
+ if (nilfs_btree_node_get_nchildren(sib) >
+ nilfs_btree_node_nchildren_min(sib, btree)) {
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_borrow_right;
stats->bs_nblocks++;
@@ -1452,7 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
/* no siblings */
/* the only child of the root node */
WARN_ON(level != nilfs_btree_height(btree) - 2);
- if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
+ if (nilfs_btree_node_get_nchildren(node) - 1 <=
NILFS_BTREE_ROOT_NCHILDREN_MAX) {
path[level].bp_op = nilfs_btree_shrink;
stats->bs_nblocks += 2;
@@ -1471,7 +1430,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
- &path[level].bp_oldreq);
+ &path[level].bp_oldreq, dat);
if (ret < 0)
goto err_out_child_node;
@@ -1486,12 +1445,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
/* error */
err_out_curr_node:
- nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
+ nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
err_out_child_node:
for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
brelse(path[level].bp_sib_bh);
nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
- &path[level].bp_oldreq);
+ &path[level].bp_oldreq, dat);
}
*levelp = level;
stats->bs_nblocks = 0;
@@ -1500,13 +1459,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int maxlevel)
+ int maxlevel, struct inode *dat)
{
int level;
for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
- &path[level].bp_oldreq);
+ &path[level].bp_oldreq, dat);
path[level].bp_op(btree, path, level, NULL, NULL);
}
@@ -1520,27 +1479,32 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
struct nilfs_btree *btree;
struct nilfs_btree_path *path;
struct nilfs_bmap_stats stats;
+ struct inode *dat;
int level, ret;
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
ret = nilfs_btree_do_lookup(btree, path, key, NULL,
NILFS_BTREE_LEVEL_NODE_MIN);
if (ret < 0)
goto out;
- ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
+
+ dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
+ nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
+
+ ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
if (ret < 0)
goto out;
- nilfs_btree_commit_delete(btree, path, level);
+ nilfs_btree_commit_delete(btree, path, level, dat);
nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
out:
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -1551,15 +1515,15 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
int ret;
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -1581,7 +1545,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
node = root;
break;
case 3:
- nchildren = nilfs_btree_node_get_nchildren(btree, root);
+ nchildren = nilfs_btree_node_get_nchildren(root);
if (nchildren > 1)
return 0;
ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
@@ -1594,10 +1558,10 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
return 0;
}
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
- maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+ maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
nextmaxkey = (nchildren > 1) ?
- nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
+ nilfs_btree_node_get_key(node, nchildren - 2) : 0;
if (bh != NULL)
brelse(bh);
@@ -1623,7 +1587,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
node = root;
break;
case 3:
- nchildren = nilfs_btree_node_get_nchildren(btree, root);
+ nchildren = nilfs_btree_node_get_nchildren(root);
WARN_ON(nchildren > 1);
ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
ret = nilfs_btree_get_block(btree, ptr, &bh);
@@ -1636,11 +1600,11 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
return -EINVAL;
}
- nchildren = nilfs_btree_node_get_nchildren(btree, node);
+ nchildren = nilfs_btree_node_get_nchildren(node);
if (nchildren < nitems)
nitems = nchildren;
- dkeys = nilfs_btree_node_dkeys(btree, node);
- dptrs = nilfs_btree_node_dptrs(btree, node);
+ dkeys = nilfs_btree_node_dkeys(node);
+ dptrs = nilfs_btree_node_dptrs(node, btree);
for (i = 0; i < nitems; i++) {
keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
@@ -1660,18 +1624,20 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
struct nilfs_bmap_stats *stats)
{
struct buffer_head *bh;
- struct nilfs_btree *btree;
+ struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
+ struct inode *dat = NULL;
int ret;
- btree = (struct nilfs_btree *)bmap;
stats->bs_nblocks = 0;
/* for data */
/* cannot find near ptr */
- if (NILFS_BMAP_USE_VBN(bmap))
+ if (NILFS_BMAP_USE_VBN(bmap)) {
dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
+ dat = nilfs_bmap_get_dat(bmap);
+ }
- ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
+ ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
if (ret < 0)
return ret;
@@ -1679,7 +1645,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
stats->bs_nblocks++;
if (nreq != NULL) {
nreq->bpr_ptr = dreq->bpr_ptr + 1;
- ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
+ ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
if (ret < 0)
goto err_out_dreq;
@@ -1696,9 +1662,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
/* error */
err_out_nreq:
- nilfs_bmap_abort_alloc_ptr(bmap, nreq);
+ nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
err_out_dreq:
- nilfs_bmap_abort_alloc_ptr(bmap, dreq);
+ nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
stats->bs_nblocks = 0;
return ret;
@@ -1713,8 +1679,9 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
union nilfs_bmap_ptr_req *nreq,
struct buffer_head *bh)
{
- struct nilfs_btree *btree;
+ struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
struct nilfs_btree_node *node;
+ struct inode *dat;
__u64 tmpptr;
/* free resources */
@@ -1725,11 +1692,11 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
/* convert and insert */
- btree = (struct nilfs_btree *)bmap;
+ dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
nilfs_btree_init(bmap);
if (nreq != NULL) {
- nilfs_bmap_commit_alloc_ptr(bmap, dreq);
- nilfs_bmap_commit_alloc_ptr(bmap, nreq);
+ nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
+ nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
/* create child node at level 1 */
lock_buffer(bh);
@@ -1751,7 +1718,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
2, 1, &keys[0], &tmpptr);
} else {
- nilfs_bmap_commit_alloc_ptr(bmap, dreq);
+ nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
/* create root node at level 1 */
node = nilfs_btree_get_root(btree);
@@ -1822,7 +1789,7 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int level)
+ int level, struct inode *dat)
{
struct nilfs_btree_node *parent;
int ret;
@@ -1832,9 +1799,8 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
nilfs_btree_node_get_ptr(btree, parent,
path[level + 1].bp_index);
path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
- ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
+ &path[level].bp_newreq.bpr_req);
if (ret < 0)
return ret;
@@ -1846,9 +1812,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
&path[level].bp_ctxt);
if (ret < 0) {
- nilfs_bmap_abort_update_v(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ nilfs_dat_abort_update(dat,
+ &path[level].bp_oldreq.bpr_req,
+ &path[level].bp_newreq.bpr_req);
return ret;
}
}
@@ -1858,13 +1824,13 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int level)
+ int level, struct inode *dat)
{
struct nilfs_btree_node *parent;
- nilfs_bmap_commit_update_v(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
+ &path[level].bp_newreq.bpr_req,
+ btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
if (buffer_nilfs_node(path[level].bp_bh)) {
nilfs_btnode_commit_change_key(
@@ -1881,11 +1847,10 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int level)
+ int level, struct inode *dat)
{
- nilfs_bmap_abort_update_v(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
+ &path[level].bp_newreq.bpr_req);
if (buffer_nilfs_node(path[level].bp_bh))
nilfs_btnode_abort_change_key(
&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
@@ -1894,14 +1859,14 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int minlevel,
- int *maxlevelp)
+ int minlevel, int *maxlevelp,
+ struct inode *dat)
{
int level, ret;
level = minlevel;
if (!buffer_nilfs_volatile(path[level].bp_bh)) {
- ret = nilfs_btree_prepare_update_v(btree, path, level);
+ ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
if (ret < 0)
return ret;
}
@@ -1909,7 +1874,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
!buffer_dirty(path[level].bp_bh)) {
WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
- ret = nilfs_btree_prepare_update_v(btree, path, level);
+ ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
if (ret < 0)
goto out;
}
@@ -1921,39 +1886,40 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
/* error */
out:
while (--level > minlevel)
- nilfs_btree_abort_update_v(btree, path, level);
+ nilfs_btree_abort_update_v(btree, path, level, dat);
if (!buffer_nilfs_volatile(path[level].bp_bh))
- nilfs_btree_abort_update_v(btree, path, level);
+ nilfs_btree_abort_update_v(btree, path, level, dat);
return ret;
}
static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int minlevel,
- int maxlevel,
- struct buffer_head *bh)
+ int minlevel, int maxlevel,
+ struct buffer_head *bh,
+ struct inode *dat)
{
int level;
if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
- nilfs_btree_commit_update_v(btree, path, minlevel);
+ nilfs_btree_commit_update_v(btree, path, minlevel, dat);
for (level = minlevel + 1; level <= maxlevel; level++)
- nilfs_btree_commit_update_v(btree, path, level);
+ nilfs_btree_commit_update_v(btree, path, level, dat);
}
static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
- int level,
- struct buffer_head *bh)
+ int level, struct buffer_head *bh)
{
int maxlevel, ret;
struct nilfs_btree_node *parent;
+ struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
__u64 ptr;
get_bh(bh);
path[level].bp_bh = bh;
- ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
+ ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
+ dat);
if (ret < 0)
goto out;
@@ -1961,12 +1927,12 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
parent = nilfs_btree_get_node(btree, path, level + 1);
ptr = nilfs_btree_node_get_ptr(btree, parent,
path[level + 1].bp_index);
- ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
+ ret = nilfs_dat_mark_dirty(dat, ptr);
if (ret < 0)
goto out;
}
- nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
+ nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
out:
brelse(path[level].bp_bh);
@@ -1986,15 +1952,15 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
WARN_ON(!buffer_dirty(bh));
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
if (buffer_nilfs_node(bh)) {
node = (struct nilfs_btree_node *)bh->b_data;
- key = nilfs_btree_node_get_key(btree, node, 0);
- level = nilfs_btree_node_get_level(btree, node);
+ key = nilfs_btree_node_get_key(node, 0);
+ level = nilfs_btree_node_get_level(node);
} else {
key = nilfs_bmap_data_get_key(bmap, bh);
level = NILFS_BTREE_LEVEL_DATA;
@@ -2013,8 +1979,8 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
nilfs_btree_propagate_p(btree, path, level, bh);
out:
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -2022,7 +1988,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
struct buffer_head *bh)
{
- return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
+ return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
}
static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
@@ -2037,12 +2003,12 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
get_bh(bh);
node = (struct nilfs_btree_node *)bh->b_data;
- key = nilfs_btree_node_get_key(btree, node, 0);
- level = nilfs_btree_node_get_level(btree, node);
+ key = nilfs_btree_node_get_key(node, 0);
+ level = nilfs_btree_node_get_level(node);
list_for_each(head, &lists[level]) {
cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
cnode = (struct nilfs_btree_node *)cbh->b_data;
- ckey = nilfs_btree_node_get_key(btree, cnode, 0);
+ ckey = nilfs_btree_node_get_key(cnode, 0);
if (key < ckey)
break;
}
@@ -2120,8 +2086,7 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree,
nilfs_btree_node_set_ptr(btree, parent,
path[level + 1].bp_index, blocknr);
- key = nilfs_btree_node_get_key(btree, parent,
- path[level + 1].bp_index);
+ key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
/* on-disk format */
binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
binfo->bi_dat.bi_level = level;
@@ -2137,6 +2102,7 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
union nilfs_binfo *binfo)
{
struct nilfs_btree_node *parent;
+ struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
__u64 key;
__u64 ptr;
union nilfs_bmap_ptr_req req;
@@ -2146,12 +2112,12 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
ptr = nilfs_btree_node_get_ptr(btree, parent,
path[level + 1].bp_index);
req.bpr_ptr = ptr;
- ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
- if (unlikely(ret < 0))
+ ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
+ if (ret < 0)
return ret;
+ nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
- key = nilfs_btree_node_get_key(btree, parent,
- path[level + 1].bp_index);
+ key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
/* on-disk format */
binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
@@ -2171,15 +2137,15 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
int level, ret;
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
if (buffer_nilfs_node(*bh)) {
node = (struct nilfs_btree_node *)(*bh)->b_data;
- key = nilfs_btree_node_get_key(btree, node, 0);
- level = nilfs_btree_node_get_level(btree, node);
+ key = nilfs_btree_node_get_key(node, 0);
+ level = nilfs_btree_node_get_level(node);
} else {
key = nilfs_bmap_data_get_key(bmap, *bh);
level = NILFS_BTREE_LEVEL_DATA;
@@ -2196,8 +2162,8 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
out:
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
@@ -2207,19 +2173,18 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
sector_t blocknr,
union nilfs_binfo *binfo)
{
- struct nilfs_btree *btree;
struct nilfs_btree_node *node;
__u64 key;
int ret;
- btree = (struct nilfs_btree *)bmap;
- ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
+ ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
+ blocknr);
if (ret < 0)
return ret;
if (buffer_nilfs_node(*bh)) {
node = (struct nilfs_btree_node *)(*bh)->b_data;
- key = nilfs_btree_node_get_key(btree, node, 0);
+ key = nilfs_btree_node_get_key(node, 0);
} else
key = nilfs_bmap_data_get_key(bmap, *bh);
@@ -2239,10 +2204,10 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
int ret;
btree = (struct nilfs_btree *)bmap;
- path = nilfs_btree_alloc_path(btree);
+ path = nilfs_btree_alloc_path();
if (path == NULL)
return -ENOMEM;
- nilfs_btree_init_path(btree, path);
+ nilfs_btree_init_path(path);
ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
if (ret < 0) {
@@ -2262,8 +2227,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
nilfs_bmap_set_dirty(&btree->bt_bmap);
out:
- nilfs_btree_clear_path(btree, path);
- nilfs_btree_free_path(btree, path);
+ nilfs_btree_release_path(path);
+ nilfs_btree_free_path(path);
return ret;
}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index aec942cf79e3..1c6cfb59128d 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -815,8 +815,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
void *kaddr;
int ret;
- if (cno == 0)
- return -ENOENT; /* checkpoint number 0 is invalid */
+ /* CP number is invalid if it's zero or larger than the
+ largest exist one.*/
+ if (cno == 0 || cno >= nilfs_mdt_cno(cpfile))
+ return -ENOENT;
down_read(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
@@ -824,7 +826,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
goto out;
kaddr = kmap_atomic(bh->b_page, KM_USER0);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
- ret = nilfs_checkpoint_snapshot(cp);
+ if (nilfs_checkpoint_invalid(cp))
+ ret = -ENOENT;
+ else
+ ret = nilfs_checkpoint_snapshot(cp);
kunmap_atomic(kaddr, KM_USER0);
brelse(bh);
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index 788a45950197..debea896e701 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -27,8 +27,6 @@
#include <linux/buffer_head.h>
#include <linux/nilfs2_fs.h>
-#define NILFS_CPFILE_GFP NILFS_MDT_GFP
-
int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
struct nilfs_checkpoint **,
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 8927ca27e6f7..1ff8e15bd36b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -109,12 +109,6 @@ void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req)
nilfs_palloc_commit_free_entry(dat, req);
}
-void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req)
-{
- nilfs_dat_abort_entry(dat, req);
- nilfs_palloc_abort_free_entry(dat, req);
-}
-
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
@@ -140,11 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
nilfs_dat_commit_entry(dat, req);
}
-void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req)
-{
- nilfs_dat_abort_entry(dat, req);
-}
-
int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
@@ -222,6 +211,37 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
nilfs_dat_abort_entry(dat, req);
}
+int nilfs_dat_prepare_update(struct inode *dat,
+ struct nilfs_palloc_req *oldreq,
+ struct nilfs_palloc_req *newreq)
+{
+ int ret;
+
+ ret = nilfs_dat_prepare_end(dat, oldreq);
+ if (!ret) {
+ ret = nilfs_dat_prepare_alloc(dat, newreq);
+ if (ret < 0)
+ nilfs_dat_abort_end(dat, oldreq);
+ }
+ return ret;
+}
+
+void nilfs_dat_commit_update(struct inode *dat,
+ struct nilfs_palloc_req *oldreq,
+ struct nilfs_palloc_req *newreq, int dead)
+{
+ nilfs_dat_commit_end(dat, oldreq, dead);
+ nilfs_dat_commit_alloc(dat, newreq);
+}
+
+void nilfs_dat_abort_update(struct inode *dat,
+ struct nilfs_palloc_req *oldreq,
+ struct nilfs_palloc_req *newreq)
+{
+ nilfs_dat_abort_end(dat, oldreq);
+ nilfs_dat_abort_alloc(dat, newreq);
+}
+
/**
* nilfs_dat_mark_dirty -
* @dat: DAT file inode
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index d328b81eead4..406070d3ff49 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -27,7 +27,6 @@
#include <linux/buffer_head.h>
#include <linux/fs.h>
-#define NILFS_DAT_GFP NILFS_MDT_GFP
struct nilfs_palloc_req;
@@ -39,10 +38,15 @@ void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *);
int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *);
void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *,
sector_t);
-void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *);
int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *);
void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int);
void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
+int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *,
+ struct nilfs_palloc_req *);
+void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
+ struct nilfs_palloc_req *, int);
+void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
+ struct nilfs_palloc_req *);
int nilfs_dat_mark_dirty(struct inode *, __u64);
int nilfs_dat_freev(struct inode *, __u64 *, size_t);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 342d9765df8d..d369ac718277 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -125,106 +125,64 @@ static void nilfs_direct_set_target_v(struct nilfs_direct *direct,
direct->d_bmap.b_last_allocated_ptr = ptr;
}
-static int nilfs_direct_prepare_insert(struct nilfs_direct *direct,
- __u64 key,
- union nilfs_bmap_ptr_req *req,
- struct nilfs_bmap_stats *stats)
-{
- int ret;
-
- if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
- req->bpr_ptr = nilfs_direct_find_target_v(direct, key);
- ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req);
- if (ret < 0)
- return ret;
-
- stats->bs_nblocks = 1;
- return 0;
-}
-
-static void nilfs_direct_commit_insert(struct nilfs_direct *direct,
- union nilfs_bmap_ptr_req *req,
- __u64 key, __u64 ptr)
-{
- struct buffer_head *bh;
-
- /* ptr must be a pointer to a buffer head. */
- bh = (struct buffer_head *)((unsigned long)ptr);
- set_buffer_nilfs_volatile(bh);
-
- nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req);
- nilfs_direct_set_ptr(direct, key, req->bpr_ptr);
-
- if (!nilfs_bmap_dirty(&direct->d_bmap))
- nilfs_bmap_set_dirty(&direct->d_bmap);
-
- if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
- nilfs_direct_set_target_v(direct, key, req->bpr_ptr);
-}
-
static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
{
- struct nilfs_direct *direct;
+ struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
union nilfs_bmap_ptr_req req;
- struct nilfs_bmap_stats stats;
+ struct inode *dat = NULL;
+ struct buffer_head *bh;
int ret;
- direct = (struct nilfs_direct *)bmap;
if (key > NILFS_DIRECT_KEY_MAX)
return -ENOENT;
if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR)
return -EEXIST;
- ret = nilfs_direct_prepare_insert(direct, key, &req, &stats);
- if (ret < 0)
- return ret;
- nilfs_direct_commit_insert(direct, &req, key, ptr);
- nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
+ if (NILFS_BMAP_USE_VBN(bmap)) {
+ req.bpr_ptr = nilfs_direct_find_target_v(direct, key);
+ dat = nilfs_bmap_get_dat(bmap);
+ }
+ ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat);
+ if (!ret) {
+ /* ptr must be a pointer to a buffer head. */
+ bh = (struct buffer_head *)((unsigned long)ptr);
+ set_buffer_nilfs_volatile(bh);
- return 0;
-}
+ nilfs_bmap_commit_alloc_ptr(bmap, &req, dat);
+ nilfs_direct_set_ptr(direct, key, req.bpr_ptr);
-static int nilfs_direct_prepare_delete(struct nilfs_direct *direct,
- union nilfs_bmap_ptr_req *req,
- __u64 key,
- struct nilfs_bmap_stats *stats)
-{
- int ret;
+ if (!nilfs_bmap_dirty(bmap))
+ nilfs_bmap_set_dirty(bmap);
- req->bpr_ptr = nilfs_direct_get_ptr(direct, key);
- ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req);
- if (!ret)
- stats->bs_nblocks = 1;
- return ret;
-}
+ if (NILFS_BMAP_USE_VBN(bmap))
+ nilfs_direct_set_target_v(direct, key, req.bpr_ptr);
-static void nilfs_direct_commit_delete(struct nilfs_direct *direct,
- union nilfs_bmap_ptr_req *req,
- __u64 key)
-{
- nilfs_bmap_commit_end_ptr(&direct->d_bmap, req);
- nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
+ nilfs_bmap_add_blocks(bmap, 1);
+ }
+ return ret;
}
static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
{
- struct nilfs_direct *direct;
+ struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
union nilfs_bmap_ptr_req req;
- struct nilfs_bmap_stats stats;
+ struct inode *dat;
int ret;
- direct = (struct nilfs_direct *)bmap;
- if ((key > NILFS_DIRECT_KEY_MAX) ||
+ if (key > NILFS_DIRECT_KEY_MAX ||
nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR)
return -ENOENT;
- ret = nilfs_direct_prepare_delete(direct, &req, key, &stats);
- if (ret < 0)
- return ret;
- nilfs_direct_commit_delete(direct, &req, key);
- nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
+ dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
+ req.bpr_ptr = nilfs_direct_get_ptr(direct, key);
- return 0;
+ ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat);
+ if (!ret) {
+ nilfs_bmap_commit_end_ptr(bmap, &req, dat);
+ nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
+ nilfs_bmap_sub_blocks(bmap, 1);
+ }
+ return ret;
}
static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
@@ -310,59 +268,56 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
return 0;
}
-static int nilfs_direct_propagate_v(struct nilfs_direct *direct,
- struct buffer_head *bh)
+static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
+ struct buffer_head *bh)
{
- union nilfs_bmap_ptr_req oldreq, newreq;
+ struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
+ struct nilfs_palloc_req oldreq, newreq;
+ struct inode *dat;
__u64 key;
__u64 ptr;
int ret;
- key = nilfs_bmap_data_get_key(&direct->d_bmap, bh);
+ if (!NILFS_BMAP_USE_VBN(bmap))
+ return 0;
+
+ dat = nilfs_bmap_get_dat(bmap);
+ key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(direct, key);
if (!buffer_nilfs_volatile(bh)) {
- oldreq.bpr_ptr = ptr;
- newreq.bpr_ptr = ptr;
- ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq,
- &newreq);
+ oldreq.pr_entry_nr = ptr;
+ newreq.pr_entry_nr = ptr;
+ ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq);
if (ret < 0)
return ret;
- nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq);
+ nilfs_dat_commit_update(dat, &oldreq, &newreq,
+ bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
set_buffer_nilfs_volatile(bh);
- nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr);
+ nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr);
} else
- ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr);
+ ret = nilfs_dat_mark_dirty(dat, ptr);
return ret;
}
-static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
- struct buffer_head *bh)
-{
- struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
-
- return NILFS_BMAP_USE_VBN(bmap) ?
- nilfs_direct_propagate_v(direct, bh) : 0;
-}
-
static int nilfs_direct_assign_v(struct nilfs_direct *direct,
__u64 key, __u64 ptr,
struct buffer_head **bh,
sector_t blocknr,
union nilfs_binfo *binfo)
{
+ struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap);
union nilfs_bmap_ptr_req req;
int ret;
req.bpr_ptr = ptr;
- ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr);
- if (unlikely(ret < 0))
- return ret;
-
- binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
- binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
-
- return 0;
+ ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
+ if (!ret) {
+ nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
+ binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
+ binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
+ }
+ return ret;
}
static int nilfs_direct_assign_p(struct nilfs_direct *direct,
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 5d30a35679b5..ecc3ba76db47 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -31,7 +31,6 @@
#include "mdt.h"
#include "alloc.h"
-#define NILFS_IFILE_GFP NILFS_MDT_GFP
static inline struct nilfs_inode *
nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index fe9d8f2a13f8..807e584b163d 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -430,7 +430,8 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
- if (nilfs_read_inode_common(inode, raw_inode))
+ err = nilfs_read_inode_common(inode, raw_inode);
+ if (err)
goto failed_unmap;
if (S_ISREG(inode->i_mode)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 6ea5f872e2de..6572ea4bc4df 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -442,12 +442,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
const char *msg;
int ret;
- ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
- if (ret < 0) {
- msg = "cannot read source blocks";
- goto failed;
- }
-
ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]);
if (ret < 0) {
/*
@@ -548,7 +542,25 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
}
}
- ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+ /*
+ * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(),
+ * which will operates an inode list without blocking.
+ * To protect the list from concurrent operations,
+ * nilfs_ioctl_move_blocks should be atomic operation.
+ */
+ if (test_and_set_bit(THE_NILFS_GC_RUNNING, &nilfs->ns_flags)) {
+ ret = -EBUSY;
+ goto out_free;
+ }
+
+ ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
+ if (ret < 0)
+ printk(KERN_ERR "NILFS: GC failed during preparation: "
+ "cannot read source blocks: err=%d\n", ret);
+ else
+ ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+
+ clear_nilfs_gc_running(nilfs);
out_free:
while (--n >= 0)
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 2dfd47714ae5..156bf6091a96 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -103,15 +103,12 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
goto failed_unlock;
err = -EEXIST;
- if (buffer_uptodate(bh) || buffer_mapped(bh))
+ if (buffer_uptodate(bh))
goto failed_bh;
-#if 0
- /* The uptodate flag is not protected by the page lock, but
- the mapped flag is. Thus, we don't have to wait the buffer. */
+
wait_on_buffer(bh);
if (buffer_uptodate(bh))
goto failed_bh;
-#endif
bh->b_bdev = nilfs->ns_bdev;
err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
@@ -139,7 +136,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
int mode, struct buffer_head **out_bh)
{
struct buffer_head *bh;
- unsigned long blknum = 0;
+ __u64 blknum = 0;
int ret = -ENOMEM;
bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
@@ -162,17 +159,15 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
unlock_buffer(bh);
goto out;
}
- if (!buffer_mapped(bh)) { /* unused buffer */
- ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff,
- &blknum);
- if (unlikely(ret)) {
- unlock_buffer(bh);
- goto failed_bh;
- }
- bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
- bh->b_blocknr = blknum;
- set_buffer_mapped(bh);
+
+ ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
+ if (unlikely(ret)) {
+ unlock_buffer(bh);
+ goto failed_bh;
}
+ bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
+ bh->b_blocknr = (sector_t)blknum;
+ set_buffer_mapped(bh);
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
@@ -402,6 +397,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
struct inode *inode = container_of(page->mapping,
struct inode, i_data);
struct super_block *sb = inode->i_sb;
+ struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs;
struct nilfs_sb_info *writer = NULL;
int err = 0;
@@ -411,9 +407,10 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
if (page->mapping->assoc_mapping)
return 0; /* Do not request flush for shadow page cache */
if (!sb) {
- writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs);
+ down_read(&nilfs->ns_writer_sem);
+ writer = nilfs->ns_writer;
if (!writer) {
- nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs);
+ up_read(&nilfs->ns_writer_sem);
return -EROFS;
}
sb = writer->s_super;
@@ -425,7 +422,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
nilfs_flush_segment(sb, inode->i_ino);
if (writer)
- nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs);
+ up_read(&nilfs->ns_writer_sem);
return err;
}
@@ -516,9 +513,10 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
}
struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
- ino_t ino, gfp_t gfp_mask)
+ ino_t ino)
{
- struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask);
+ struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino,
+ NILFS_MDT_GFP);
if (!inode)
return NULL;
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index df683e0bca6a..431599733c9b 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -74,8 +74,7 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long);
int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
int nilfs_mdt_fetch_dirty(struct inode *);
-struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t,
- gfp_t);
+struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t);
struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
ino_t, gfp_t);
void nilfs_mdt_destroy(struct inode *);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index d80cc71be749..6dc83591d118 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -552,7 +552,8 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
printk(KERN_WARNING
"NILFS warning: error recovering data block "
"(err=%d, ino=%lu, block-offset=%llu)\n",
- err, rb->ino, (unsigned long long)rb->blkoff);
+ err, (unsigned long)rb->ino,
+ (unsigned long long)rb->blkoff);
if (!err2)
err2 = err;
next:
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 9e3fe17bb96b..e6d9e37fa241 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -316,10 +316,10 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
{
struct bio *bio;
- bio = bio_alloc(GFP_NOWAIT, nr_vecs);
+ bio = bio_alloc(GFP_NOIO, nr_vecs);
if (bio == NULL) {
while (!bio && (nr_vecs >>= 1))
- bio = bio_alloc(GFP_NOWAIT, nr_vecs);
+ bio = bio_alloc(GFP_NOIO, nr_vecs);
}
if (likely(bio)) {
bio->bi_bdev = sb->s_bdev;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 51ff3d0a4ee2..683df89dbae5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2501,7 +2501,8 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci,
if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
nilfs_discontinued(nilfs)) {
down_write(&nilfs->ns_sem);
- req->sb_err = nilfs_commit_super(sbi, 0);
+ req->sb_err = nilfs_commit_super(sbi,
+ nilfs_altsb_need_update(nilfs));
up_write(&nilfs->ns_sem);
}
}
@@ -2689,6 +2690,7 @@ static int nilfs_segctor_thread(void *arg)
} else {
DEFINE_WAIT(wait);
int should_sleep = 1;
+ struct the_nilfs *nilfs;
prepare_to_wait(&sci->sc_wait_daemon, &wait,
TASK_INTERRUPTIBLE);
@@ -2709,6 +2711,9 @@ static int nilfs_segctor_thread(void *arg)
finish_wait(&sci->sc_wait_daemon, &wait);
timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
time_after_eq(jiffies, sci->sc_timer->expires));
+ nilfs = sci->sc_sbi->s_nilfs;
+ if (sci->sc_super->s_dirt && nilfs_sb_need_update(nilfs))
+ set_nilfs_discontinued(nilfs);
}
goto loop;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a2c4d76c3366..0e99e5c0bd0f 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -28,7 +28,6 @@
#include <linux/nilfs2_fs.h>
#include "mdt.h"
-#define NILFS_SUFILE_GFP NILFS_MDT_GFP
static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8e2ec43b18f4..55f3d6b60732 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -50,6 +50,8 @@
#include <linux/writeback.h>
#include <linux/kobject.h>
#include <linux/exportfs.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
#include "nilfs.h"
#include "mdt.h"
#include "alloc.h"
@@ -65,7 +67,6 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
"(NILFS)");
MODULE_LICENSE("GPL");
-static void nilfs_write_super(struct super_block *sb);
static int nilfs_remount(struct super_block *sb, int *flags, char *data);
/**
@@ -311,9 +312,6 @@ static void nilfs_put_super(struct super_block *sb)
lock_kernel();
- if (sb->s_dirt)
- nilfs_write_super(sb);
-
nilfs_detach_segment_constructor(sbi);
if (!(sb->s_flags & MS_RDONLY)) {
@@ -336,63 +334,21 @@ static void nilfs_put_super(struct super_block *sb)
unlock_kernel();
}
-/**
- * nilfs_write_super - write super block(s) of NILFS
- * @sb: super_block
- *
- * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and
- * clears s_dirt. This function is called in the section protected by
- * lock_super().
- *
- * The s_dirt flag is managed by each filesystem and we protect it by ns_sem
- * of the struct the_nilfs. Lock order must be as follows:
- *
- * 1. lock_super()
- * 2. down_write(&nilfs->ns_sem)
- *
- * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer
- * of the super block (nilfs->ns_sbp[]).
- *
- * In most cases, VFS functions call lock_super() before calling these
- * methods. So we must be careful not to bring on deadlocks when using
- * lock_super(); see generic_shutdown_super(), write_super(), and so on.
- *
- * Note that order of lock_kernel() and lock_super() depends on contexts
- * of VFS. We should also note that lock_kernel() can be used in its
- * protective section and only the outermost one has an effect.
- */
-static void nilfs_write_super(struct super_block *sb)
+static int nilfs_sync_fs(struct super_block *sb, int wait)
{
struct nilfs_sb_info *sbi = NILFS_SB(sb);
struct the_nilfs *nilfs = sbi->s_nilfs;
-
- down_write(&nilfs->ns_sem);
- if (!(sb->s_flags & MS_RDONLY)) {
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u64 t = get_seconds();
- int dupsb;
-
- if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] &&
- t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) {
- up_write(&nilfs->ns_sem);
- return;
- }
- dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
- nilfs_commit_super(sbi, dupsb);
- }
- sb->s_dirt = 0;
- up_write(&nilfs->ns_sem);
-}
-
-static int nilfs_sync_fs(struct super_block *sb, int wait)
-{
int err = 0;
- nilfs_write_super(sb);
-
/* This function is called when super block should be written back */
if (wait)
err = nilfs_construct_segment(sb);
+
+ down_write(&nilfs->ns_sem);
+ if (sb->s_dirt)
+ nilfs_commit_super(sbi, 1);
+ up_write(&nilfs->ns_sem);
+
return err;
}
@@ -407,8 +363,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
list_add(&sbi->s_list, &nilfs->ns_supers);
up_write(&nilfs->ns_super_sem);
- sbi->s_ifile = nilfs_mdt_new(
- nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
+ sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO);
if (!sbi->s_ifile)
return -ENOMEM;
@@ -416,8 +371,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
if (unlikely(err))
goto failed;
+ down_read(&nilfs->ns_segctor_sem);
err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
&bh_cp);
+ up_read(&nilfs->ns_segctor_sem);
if (unlikely(err)) {
if (err == -ENOENT || err == -EINVAL) {
printk(KERN_ERR
@@ -527,6 +484,26 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+ struct super_block *sb = vfs->mnt_sb;
+ struct nilfs_sb_info *sbi = NILFS_SB(sb);
+
+ if (!nilfs_test_opt(sbi, BARRIER))
+ seq_printf(seq, ",barrier=off");
+ if (nilfs_test_opt(sbi, SNAPSHOT))
+ seq_printf(seq, ",cp=%llu",
+ (unsigned long long int)sbi->s_snapshot_cno);
+ if (nilfs_test_opt(sbi, ERRORS_RO))
+ seq_printf(seq, ",errors=remount-ro");
+ if (nilfs_test_opt(sbi, ERRORS_PANIC))
+ seq_printf(seq, ",errors=panic");
+ if (nilfs_test_opt(sbi, STRICT_ORDER))
+ seq_printf(seq, ",order=strict");
+
+ return 0;
+}
+
static struct super_operations nilfs_sops = {
.alloc_inode = nilfs_alloc_inode,
.destroy_inode = nilfs_destroy_inode,
@@ -536,7 +513,7 @@ static struct super_operations nilfs_sops = {
/* .drop_inode = nilfs_drop_inode, */
.delete_inode = nilfs_delete_inode,
.put_super = nilfs_put_super,
- .write_super = nilfs_write_super,
+ /* .write_super = nilfs_write_super, */
.sync_fs = nilfs_sync_fs,
/* .write_super_lockfs */
/* .unlockfs */
@@ -544,7 +521,7 @@ static struct super_operations nilfs_sops = {
.remount_fs = nilfs_remount,
.clear_inode = nilfs_clear_inode,
/* .umount_begin */
- /* .show_options */
+ .show_options = nilfs_show_options
};
static struct inode *
@@ -814,10 +791,15 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
if (sb->s_flags & MS_RDONLY) {
if (nilfs_test_opt(sbi, SNAPSHOT)) {
+ down_read(&nilfs->ns_segctor_sem);
err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile,
sbi->s_snapshot_cno);
- if (err < 0)
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ if (err == -ENOENT)
+ err = -EINVAL;
goto failed_sbi;
+ }
if (!err) {
printk(KERN_ERR
"NILFS: The specified checkpoint is "
@@ -1125,10 +1107,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
*/
sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
- if (!sd.cno)
- /* trying to get the latest checkpoint. */
- sd.cno = nilfs_last_cno(nilfs);
-
/*
* Get super block instance holding the nilfs_sb_info struct.
* A new instance is allocated if no existing mount is present or
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8b8889825716..d4168e269c5d 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -68,12 +68,11 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
nilfs->ns_bdev = bdev;
atomic_set(&nilfs->ns_count, 1);
- atomic_set(&nilfs->ns_writer_refcount, -1);
atomic_set(&nilfs->ns_ndirtyblks, 0);
init_rwsem(&nilfs->ns_sem);
init_rwsem(&nilfs->ns_super_sem);
mutex_init(&nilfs->ns_mount_mutex);
- mutex_init(&nilfs->ns_writer_mutex);
+ init_rwsem(&nilfs->ns_writer_sem);
INIT_LIST_HEAD(&nilfs->ns_list);
INIT_LIST_HEAD(&nilfs->ns_supers);
spin_lock_init(&nilfs->ns_last_segment_lock);
@@ -188,23 +187,19 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
inode_size = nilfs->ns_inode_size;
err = -ENOMEM;
- nilfs->ns_dat = nilfs_mdt_new(
- nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
+ nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
if (unlikely(!nilfs->ns_dat))
goto failed;
- nilfs->ns_gc_dat = nilfs_mdt_new(
- nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
+ nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
if (unlikely(!nilfs->ns_gc_dat))
goto failed_dat;
- nilfs->ns_cpfile = nilfs_mdt_new(
- nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP);
+ nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO);
if (unlikely(!nilfs->ns_cpfile))
goto failed_gc_dat;
- nilfs->ns_sufile = nilfs_mdt_new(
- nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP);
+ nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO);
if (unlikely(!nilfs->ns_sufile))
goto failed_cpfile;
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index e8adbffc626f..20abd55881e0 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -37,6 +37,7 @@ enum {
THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
the latest checkpoint was loaded */
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
+ THE_NILFS_GC_RUNNING, /* gc process is running */
};
/**
@@ -50,8 +51,7 @@ enum {
* @ns_sem: semaphore for shared states
* @ns_super_sem: semaphore for global operations across super block instances
* @ns_mount_mutex: mutex protecting mount process of nilfs
- * @ns_writer_mutex: mutex protecting ns_writer attach/detach
- * @ns_writer_refcount: number of referrers on ns_writer
+ * @ns_writer_sem: semaphore protecting ns_writer attach/detach
* @ns_current: back pointer to current mount
* @ns_sbh: buffer heads of on-disk super blocks
* @ns_sbp: pointers to super block data
@@ -100,8 +100,7 @@ struct the_nilfs {
struct rw_semaphore ns_sem;
struct rw_semaphore ns_super_sem;
struct mutex ns_mount_mutex;
- struct mutex ns_writer_mutex;
- atomic_t ns_writer_refcount;
+ struct rw_semaphore ns_writer_sem;
/*
* components protected by ns_super_sem
@@ -197,11 +196,26 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \
THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(LOADED, loaded)
THE_NILFS_FNS(DISCONTINUED, discontinued)
+THE_NILFS_FNS(GC_RUNNING, gc_running)
/* Minimum interval of periodical update of superblocks (in seconds) */
#define NILFS_SB_FREQ 10
#define NILFS_ALTSB_FREQ 60 /* spare superblock */
+static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
+{
+ u64 t = get_seconds();
+ return t < nilfs->ns_sbwtime[0] ||
+ t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ;
+}
+
+static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs)
+{
+ u64 t = get_seconds();
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
+}
+
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
struct the_nilfs *find_or_create_nilfs(struct block_device *);
void put_nilfs(struct the_nilfs *);
@@ -221,39 +235,26 @@ static inline void get_nilfs(struct the_nilfs *nilfs)
atomic_inc(&nilfs->ns_count);
}
-static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs)
-{
- if (atomic_inc_and_test(&nilfs->ns_writer_refcount))
- mutex_lock(&nilfs->ns_writer_mutex);
- return nilfs->ns_writer;
-}
-
-static inline void nilfs_put_writer(struct the_nilfs *nilfs)
-{
- if (atomic_add_negative(-1, &nilfs->ns_writer_refcount))
- mutex_unlock(&nilfs->ns_writer_mutex);
-}
-
static inline void
nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
{
- mutex_lock(&nilfs->ns_writer_mutex);
+ down_write(&nilfs->ns_writer_sem);
nilfs->ns_writer = sbi;
- mutex_unlock(&nilfs->ns_writer_mutex);
+ up_write(&nilfs->ns_writer_sem);
}
static inline void
nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
{
- mutex_lock(&nilfs->ns_writer_mutex);
+ down_write(&nilfs->ns_writer_sem);
if (sbi == nilfs->ns_writer)
nilfs->ns_writer = NULL;
- mutex_unlock(&nilfs->ns_writer_mutex);
+ up_write(&nilfs->ns_writer_sem);
}
static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
{
- if (!atomic_dec_and_test(&sbi->s_count))
+ if (atomic_dec_and_test(&sbi->s_count))
kfree(sbi);
}
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 47cd258fd24d..c9ee67b442e1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
event_priv->wd = wd;
ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
- /* EEXIST is not an error */
- if (ret == -EEXIST)
- ret = 0;
-
- /* did event_priv get attached? */
- if (list_empty(&fsn_event_priv->event_list))
+ if (ret) {
inotify_free_event_priv(fsn_event_priv);
+ /* EEXIST says we tail matched, EOVERFLOW isn't something
+ * to report up the stack. */
+ if ((ret == -EEXIST) ||
+ (ret == -EOVERFLOW))
+ ret = 0;
+ }
/*
* If we hold the entry until after the event is on the queue
@@ -104,16 +105,45 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
return send;
}
+/*
+ * This is NEVER supposed to be called. Inotify marks should either have been
+ * removed from the idr when the watch was removed or in the
+ * fsnotify_destroy_mark_by_group() call when the inotify instance was being
+ * torn down. This is only called if the idr is about to be freed but there
+ * are still marks in it.
+ */
static int idr_callback(int id, void *p, void *data)
{
- BUG();
+ struct fsnotify_mark_entry *entry;
+ struct inotify_inode_mark_entry *ientry;
+ static bool warned = false;
+
+ if (warned)
+ return 0;
+
+ warned = false;
+ entry = p;
+ ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+
+ WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
+ "idr. Probably leaking memory\n", id, p, data);
+
+ /*
+ * I'm taking the liberty of assuming that the mark in question is a
+ * valid address and I'm dereferencing it. This might help to figure
+ * out why we got here and the panic is no worse than the original
+ * BUG() that was here.
+ */
+ if (entry)
+ printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
+ entry->group, entry->inode, ientry->wd);
return 0;
}
static void inotify_free_group_priv(struct fsnotify_group *group)
{
/* ideally the idr is empty and we won't hit the BUG in teh callback */
- idr_for_each(&group->inotify_data.idr, idr_callback, NULL);
+ idr_for_each(&group->inotify_data.idr, idr_callback, group);
idr_remove_all(&group->inotify_data.idr);
idr_destroy(&group->inotify_data.idr);
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f30d9bbc2e1b..dcd2040d330c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -47,9 +47,6 @@
static struct vfsmount *inotify_mnt __read_mostly;
-/* this just sits here and wastes global memory. used to just pad userspace messages with zeros */
-static struct inotify_event nul_inotify_event;
-
/* these are configurable via /proc/sys/fs/inotify/ */
static int inotify_max_user_instances __read_mostly;
static int inotify_max_queued_events __read_mostly;
@@ -157,7 +154,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
event = fsnotify_peek_notify_event(group);
- event_size += roundup(event->name_len, event_size);
+ if (event->name_len)
+ event_size += roundup(event->name_len + 1, event_size);
if (event_size > count)
return ERR_PTR(-EINVAL);
@@ -183,7 +181,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event_private_data *fsn_priv;
struct inotify_event_private_data *priv;
size_t event_size = sizeof(struct inotify_event);
- size_t name_len;
+ size_t name_len = 0;
/* we get the inotify watch descriptor from the event private data */
spin_lock(&event->lock);
@@ -199,8 +197,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
inotify_free_event_priv(fsn_priv);
}
- /* round up event->name_len so it is a multiple of event_size */
- name_len = roundup(event->name_len, event_size);
+ /*
+ * round up event->name_len so it is a multiple of event_size
+ * plus an extra byte for the terminating '\0'.
+ */
+ if (event->name_len)
+ name_len = roundup(event->name_len + 1, event_size);
inotify_event.len = name_len;
inotify_event.mask = inotify_mask_to_arg(event->mask);
@@ -224,8 +226,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
return -EFAULT;
buf += event->name_len;
- /* fill userspace with 0's from nul_inotify_event */
- if (copy_to_user(buf, &nul_inotify_event, len_to_zero))
+ /* fill userspace with 0's */
+ if (clear_user(buf, len_to_zero))
return -EFAULT;
buf += len_to_zero;
event_size += name_len;
@@ -326,8 +328,9 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
list_for_each_entry(holder, &group->notification_list, event_list) {
event = holder->event;
send_len += sizeof(struct inotify_event);
- send_len += roundup(event->name_len,
- sizeof(struct inotify_event));
+ if (event->name_len)
+ send_len += roundup(event->name_len + 1,
+ sizeof(struct inotify_event));
}
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
@@ -364,20 +367,53 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
return error;
}
+/*
+ * Remove the mark from the idr (if present) and drop the reference
+ * on the mark because it was in the idr.
+ */
static void inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark_entry *ientry)
{
struct idr *idr;
+ struct fsnotify_mark_entry *entry;
+ struct inotify_inode_mark_entry *found_ientry;
+ int wd;
spin_lock(&group->inotify_data.idr_lock);
idr = &group->inotify_data.idr;
- idr_remove(idr, ientry->wd);
- spin_unlock(&group->inotify_data.idr_lock);
+ wd = ientry->wd;
+
+ if (wd == -1)
+ goto out;
+
+ entry = idr_find(&group->inotify_data.idr, wd);
+ if (unlikely(!entry))
+ goto out;
+
+ found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+ if (unlikely(found_ientry != ientry)) {
+ /* We found an entry in the idr with the right wd, but it's
+ * not the entry we were told to remove. eparis seriously
+ * fucked up somewhere. */
+ WARN_ON(1);
+ ientry->wd = -1;
+ goto out;
+ }
+
+ /* One ref for being in the idr, one ref held by the caller */
+ BUG_ON(atomic_read(&entry->refcnt) < 2);
+
+ idr_remove(idr, wd);
ientry->wd = -1;
+
+ /* removed from the idr, drop that ref */
+ fsnotify_put_mark(entry);
+out:
+ spin_unlock(&group->inotify_data.idr_lock);
}
+
/*
- * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the
- * internal reference help on the mark because it is in the idr.
+ * Send IN_IGNORED for this wd, remove this wd from the idr.
*/
void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
struct fsnotify_group *group)
@@ -386,6 +422,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
struct fsnotify_event *ignored_event;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
+ int ret;
ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +441,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
fsn_event_priv->group = group;
event_priv->wd = ientry->wd;
- fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
-
- /* did the private data get added? */
- if (list_empty(&fsn_event_priv->event_list))
+ ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
+ if (ret)
inotify_free_event_priv(fsn_event_priv);
skip_send_ignore:
@@ -418,9 +453,6 @@ skip_send_ignore:
/* remove this entry from the idr */
inotify_remove_from_idr(group, ientry);
- /* removed from idr, drop that reference */
- fsnotify_put_mark(entry);
-
atomic_dec(&group->inotify_data.user->inotify_watches);
}
@@ -432,80 +464,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry)
kmem_cache_free(inotify_inode_mark_cachep, ientry);
}
-static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+static int inotify_update_existing_watch(struct fsnotify_group *group,
+ struct inode *inode,
+ u32 arg)
{
- struct fsnotify_mark_entry *entry = NULL;
+ struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
- struct inotify_inode_mark_entry *tmp_ientry;
- int ret = 0;
- int add = (arg & IN_MASK_ADD);
- __u32 mask;
__u32 old_mask, new_mask;
+ __u32 mask;
+ int add = (arg & IN_MASK_ADD);
+ int ret;
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
if (unlikely(!mask))
return -EINVAL;
- tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
- if (unlikely(!tmp_ientry))
- return -ENOMEM;
- /* we set the mask at the end after attaching it */
- fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
- tmp_ientry->wd = -1;
-
-find_entry:
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
- if (entry) {
- ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
- } else {
- ret = -ENOSPC;
- if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
- goto out_err;
-retry:
- ret = -ENOMEM;
- if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
- goto out_err;
-
- spin_lock(&group->inotify_data.idr_lock);
- ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
- group->inotify_data.last_wd,
- &tmp_ientry->wd);
- spin_unlock(&group->inotify_data.idr_lock);
- if (ret) {
- if (ret == -EAGAIN)
- goto retry;
- goto out_err;
- }
+ if (!entry)
+ return -ENOENT;
- ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
- if (ret) {
- inotify_remove_from_idr(group, tmp_ientry);
- if (ret == -EEXIST)
- goto find_entry;
- goto out_err;
- }
-
- /* tmp_ientry has been added to the inode, so we are all set up.
- * now we just need to make sure tmp_ientry doesn't get freed and
- * we need to set up entry and ientry so the generic code can
- * do its thing. */
- ientry = tmp_ientry;
- entry = &ientry->fsn_entry;
- tmp_ientry = NULL;
-
- atomic_inc(&group->inotify_data.user->inotify_watches);
-
- /* update the idr hint */
- group->inotify_data.last_wd = ientry->wd;
-
- /* we put the mark on the idr, take a reference */
- fsnotify_get_mark(entry);
- }
-
- ret = ientry->wd;
+ ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
spin_lock(&entry->lock);
@@ -537,18 +518,107 @@ retry:
fsnotify_recalc_group_mask(group);
}
- /* this either matches fsnotify_find_mark_entry, or init_mark_entry
- * depending on which path we took... */
+ /* return the wd */
+ ret = ientry->wd;
+
+ /* match the get from fsnotify_find_mark_entry() */
fsnotify_put_mark(entry);
+ return ret;
+}
+
+static int inotify_new_watch(struct fsnotify_group *group,
+ struct inode *inode,
+ u32 arg)
+{
+ struct inotify_inode_mark_entry *tmp_ientry;
+ __u32 mask;
+ int ret;
+
+ /* don't allow invalid bits: we don't want flags set */
+ mask = inotify_arg_to_mask(arg);
+ if (unlikely(!mask))
+ return -EINVAL;
+
+ tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+ if (unlikely(!tmp_ientry))
+ return -ENOMEM;
+
+ fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
+ tmp_ientry->fsn_entry.mask = mask;
+ tmp_ientry->wd = -1;
+
+ ret = -ENOSPC;
+ if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
+ goto out_err;
+retry:
+ ret = -ENOMEM;
+ if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
+ goto out_err;
+
+ spin_lock(&group->inotify_data.idr_lock);
+ ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
+ group->inotify_data.last_wd,
+ &tmp_ientry->wd);
+ spin_unlock(&group->inotify_data.idr_lock);
+ if (ret) {
+ /* idr was out of memory allocate and try again */
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out_err;
+ }
+
+ /* we put the mark on the idr, take a reference */
+ fsnotify_get_mark(&tmp_ientry->fsn_entry);
+
+ /* we are on the idr, now get on the inode */
+ ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
+ if (ret) {
+ /* we failed to get on the inode, get off the idr */
+ inotify_remove_from_idr(group, tmp_ientry);
+ goto out_err;
+ }
+
+ /* update the idr hint, who cares about races, it's just a hint */
+ group->inotify_data.last_wd = tmp_ientry->wd;
+
+ /* increment the number of watches the user has */
+ atomic_inc(&group->inotify_data.user->inotify_watches);
+
+ /* return the watch descriptor for this new entry */
+ ret = tmp_ientry->wd;
+
+ /* match the ref from fsnotify_init_markentry() */
+ fsnotify_put_mark(&tmp_ientry->fsn_entry);
+
+ /* if this mark added a new event update the group mask */
+ if (mask & ~group->mask)
+ fsnotify_recalc_group_mask(group);
+
out_err:
- /* could be an error, could be that we found an existing mark */
- if (tmp_ientry) {
- /* on the idr but didn't make it on the inode */
- if (tmp_ientry->wd != -1)
- inotify_remove_from_idr(group, tmp_ientry);
+ if (ret < 0)
kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
- }
+
+ return ret;
+}
+
+static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+{
+ int ret = 0;
+
+retry:
+ /* try to update and existing watch with the new arg */
+ ret = inotify_update_existing_watch(group, inode, arg);
+ /* no mark present, try to add a new one */
+ if (ret == -ENOENT)
+ ret = inotify_new_watch(group, inode, arg);
+ /*
+ * inotify_new_watch could race with another thread which did an
+ * inotify_new_watch between the update_existing and the add watch
+ * here, go back and try to update an existing mark again.
+ */
+ if (ret == -EEXIST)
+ goto retry;
return ret;
}
@@ -568,7 +638,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
- group->inotify_data.last_wd = 0;
+ group->inotify_data.last_wd = 1;
group->inotify_data.user = user;
group->inotify_data.fa = NULL;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 521368574e97..3816d5750dd5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
return true;
break;
case (FSNOTIFY_EVENT_NONE):
+ if (old->mask & FS_Q_OVERFLOW)
+ return true;
+ else if (old->mask & FS_IN_IGNORED)
+ return false;
return false;
};
}
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
struct list_head *list = &group->notification_list;
struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
-
- /* easy to tell if priv was attached to the event */
- INIT_LIST_HEAD(&priv->event_list);
+ int ret = 0;
/*
* There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
if (group->q_len >= group->max_events) {
event = &q_overflow_event;
+ ret = -EOVERFLOW;
/* sorry, no private data on the overflow event */
priv = NULL;
}
@@ -235,7 +238,7 @@ alloc_holder:
mutex_unlock(&group->notification_mutex);
wake_up(&group->notification_waitq);
- return 0;
+ return ret;
}
/*
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3140a4429af1..4350d4993b18 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2076,14 +2076,6 @@ err_out:
*ppos = pos;
if (cached_page)
page_cache_release(cached_page);
- /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
- if (likely(!status)) {
- if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
- if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
- status = generic_osync_inode(vi, mapping,
- OSYNC_METADATA|OSYNC_DATA);
- }
- }
pagevec_lru_add_file(&lru_pvec);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written,
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
mutex_lock(&inode->i_mutex);
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err = sync_page_range(inode, mapping, pos, ret);
+ if (ret > 0) {
+ int err = generic_write_sync(file, pos, ret);
if (err < 0)
ret = err;
}
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
if (ret == -EIOCBQUEUED)
ret = wait_on_sync_kiocb(&kiocb);
mutex_unlock(&inode->i_mutex);
- if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err = sync_page_range(inode, mapping, *ppos - ret, ret);
+ if (ret > 0) {
+ int err = generic_write_sync(file, *ppos - ret, ret);
if (err < 0)
ret = err;
}
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 23bf68453d7d..1caa0ef0b2bb 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -384,13 +384,12 @@ unm_err_out:
* it is dirty in the inode meta data rather than the data page cache of the
* inode, and thus there are no data pages that need writing out. Therefore, a
* full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
- * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to
- * ensure ->write_inode is called from generic_osync_inode() and this needs to
- * happen or the file data would not necessarily hit the device synchronously,
- * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC
- * simply "feels" better than just I_DIRTY_SYNC, since the file data has not
- * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own
- * would suggest.
+ * other hand, is not sufficient, because ->write_inode needs to be called even
+ * in case of fdatasync. This needs to happen or the file data would not
+ * necessarily hit the device synchronously, even though the vfs inode has the
+ * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
+ * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
+ * which is not what I_DIRTY_SYNC on its own would suggest.
*/
void __mark_mft_record_dirty(ntfs_inode *ni)
{
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f9a3e8942669..ab513ddaeff2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6851,7 +6851,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
}
status = 0;
bail:
-
+ brelse(last_eb_bh);
mlog_exit(status);
return status;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b401654011a2..8a1e61545f41 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1747,8 +1747,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* we know zeros will only be needed in the first and/or last cluster.
*/
if (clusters_to_alloc || extents_to_split ||
- wc->w_desc[0].c_needs_zero ||
- wc->w_desc[wc->w_clen - 1].c_needs_zero)
+ (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
+ wc->w_desc[wc->w_clen - 1].c_needs_zero)))
cluster_of_pages = 1;
else
cluster_of_pages = 0;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 2f28b7de2c8d..b4957c7d9fe2 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
goto bail;
}
+ /*
+ * If the last lookup failed to create dentry lock, let us
+ * redo it.
+ */
+ if (!dentry->d_fsdata) {
+ mlog(0, "Inode %llu doesn't have dentry lock, "
+ "returning false\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ goto bail;
+ }
+
ret = 1;
bail:
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 1c9efb406a96..02bf17808bdc 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -325,6 +325,7 @@ clear_fields:
}
static struct backing_dev_info dlmfs_backing_dev_info = {
+ .name = "ocfs2-dlmfs",
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index fcf879ed6930..756f5b0998e0 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -122,7 +122,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
* that still has AST's pending... */
in_use = !list_empty(&lock->ast_list);
spin_unlock(&dlm->ast_lock);
- if (in_use) {
+ if (in_use && !(flags & LKM_CANCEL)) {
mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
"while waiting for an ast!", res->lockname.len,
res->lockname.name);
@@ -131,7 +131,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
- if (master_node) {
+ if (master_node && !(flags & LKM_CANCEL)) {
mlog(ML_ERROR, "lockres in progress!\n");
spin_unlock(&res->spinlock);
return DLM_FORWARD;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..221c5e98957b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1871,8 +1871,7 @@ relock:
goto out_dio;
}
} else {
- written = generic_file_aio_write_nolock(iocb, iov, nr_segs,
- *ppos);
+ written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
}
out_dio:
@@ -1880,18 +1879,21 @@ out_dio:
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
- /*
- * The generic write paths have handled getting data
- * to disk, but since we don't make use of the dirty
- * inode list, a manual journal commit is necessary
- * here.
- */
- if (old_size != i_size_read(inode) ||
- old_clusters != OCFS2_I(inode)->ip_clusters) {
+ ret = filemap_fdatawrite_range(file->f_mapping, pos,
+ pos + count - 1);
+ if (ret < 0)
+ written = ret;
+
+ if (!ret && (old_size != i_size_read(inode) ||
+ old_clusters != OCFS2_I(inode)->ip_clusters)) {
ret = jbd2_journal_force_commit(osb->journal->j_journal);
if (ret < 0)
written = ret;
}
+
+ if (!ret)
+ ret = filemap_fdatawait_range(file->f_mapping, pos,
+ pos + count - 1);
}
/*
@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
if (ret > 0) {
unsigned long nr_pages;
+ int err;
- *ppos += ret;
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- /*
- * If file or inode is SYNC and we actually wrote some data,
- * sync it.
- */
- if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- mutex_lock(&inode->i_mutex);
- err = ocfs2_rw_lock(inode, 1);
- if (err < 0) {
- mlog_errno(err);
- } else {
- err = generic_osync_inode(inode, mapping,
- OSYNC_METADATA|OSYNC_DATA);
- ocfs2_rw_unlock(inode, 1);
- }
- mutex_unlock(&inode->i_mutex);
+ err = generic_write_sync(out, *ppos, ret);
+ if (err)
+ ret = err;
+ else
+ *ppos += ret;
- if (err)
- ret = err;
- }
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
}
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index fcdba091af3d..c212cf5a2bdf 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -108,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_OPEN] = "Open",
[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
[OCFS2_LOCK_TYPE_QINFO] = "Quota",
+ [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
[OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
};
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index bf7742d0ee3b..44f2a5e1d042 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -23,6 +23,7 @@
#include "sysfile.h"
#include "dlmglue.h"
#include "uptodate.h"
+#include "super.h"
#include "quota.h"
static struct workqueue_struct *ocfs2_quota_wq = NULL;
@@ -114,6 +115,15 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
int rc = 0;
struct buffer_head *tmp = *bh;
+ if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) {
+ ocfs2_error(inode->i_sb,
+ "Quota file %llu is probably corrupted! Requested "
+ "to read block %Lu but file has size only %Lu\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)v_block,
+ (unsigned long long)i_size_read(inode));
+ return -EIO;
+ }
rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
ocfs2_validate_quota_block);
if (rc)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index b0ee0fdf799a..a3f8871d21fd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1218,13 +1218,17 @@ static void ocfs2_kill_sb(struct super_block *sb)
{
struct ocfs2_super *osb = OCFS2_SB(sb);
+ /* Failed mount? */
+ if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED)
+ goto out;
+
/* Prevent further queueing of inode drop events */
spin_lock(&dentry_list_lock);
ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
spin_unlock(&dentry_list_lock);
/* Wait for work to finish and/or remove it */
cancel_work_sync(&osb->dentry_lock_work);
-
+out:
kill_block_super(sb);
}
diff --git a/fs/open.c b/fs/open.c
index dd98e8076024..31191bf513e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -199,7 +199,7 @@ out:
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
struct file *filp)
{
- int err;
+ int ret;
struct iattr newattrs;
/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
}
/* Remove suid/sgid on truncate too */
- newattrs.ia_valid |= should_remove_suid(dentry);
+ ret = should_remove_suid(dentry);
+ if (ret)
+ newattrs.ia_valid |= ret | ATTR_FORCE;
mutex_lock(&dentry->d_inode->i_mutex);
- err = notify_change(dentry, &newattrs);
+ ret = notify_change(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
- return err;
+ return ret;
}
static long do_sys_truncate(const char __user *pathname, loff_t length)
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
int error;
struct file *f;
+ validate_creds(cred);
+
/*
* We must always pass in a valid mount pointer. Historically
* callers got away with not passing it, but we must enforce this at
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ea4e6cb29e13..619ba99dfe39 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
part_stat_read(p, merges[WRITE]),
(unsigned long long)part_stat_read(p, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
- p->in_flight,
+ part_in_flight(p),
jiffies_to_msecs(part_stat_read(p, io_ticks)),
jiffies_to_msecs(part_stat_read(p, time_in_queue)));
}
+ssize_t part_inflight_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+}
+
#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
&dev_attr_size.attr,
&dev_attr_alignment_offset.attr,
&dev_attr_stat.attr,
+ &dev_attr_inflight.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 175db258942f..6f742f6658a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
if (!task)
return -ESRCH;
- task_lock(task);
- if (task->mm)
- oom_adjust = task->mm->oom_adj;
- else
- oom_adjust = OOM_DISABLE;
- task_unlock(task);
+ oom_adjust = task->oomkilladj;
put_task_struct(task);
len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
task = get_proc_task(file->f_path.dentry->d_inode);
if (!task)
return -ESRCH;
- task_lock(task);
- if (!task->mm) {
- task_unlock(task);
- put_task_struct(task);
- return -EINVAL;
- }
- if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
- task_unlock(task);
+ if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
put_task_struct(task);
return -EACCES;
}
- task->mm->oom_adj = oom_adjust;
- task_unlock(task);
+ task->oomkilladj = oom_adjust;
put_task_struct(task);
if (end - buffer == 0)
return -EIO;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0ff7566c767c..a7f0110fca4c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops;
static const struct inode_operations ramfs_dir_inode_operations;
static struct backing_dev_info ramfs_backing_dev_info = {
+ .name = "ramfs",
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
diff --git a/fs/select.c b/fs/select.c
index d870237e42c7..8084834e123e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
{
init_poll_funcptr(&pwq->pt, __pollwait);
pwq->polling_task = current;
+ pwq->triggered = 0;
pwq->error = 0;
pwq->table = NULL;
pwq->inline_index = 0;
diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..7394e9e17534 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
len = left;
ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
- if (ret > 0)
+ if (ret > 0) {
*ppos += ret;
+ file_accessed(in);
+ }
return ret;
}
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
ret = file_remove_suid(out);
- if (!ret)
+ if (!ret) {
+ file_update_time(out);
ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+ }
mutex_unlock(&inode->i_mutex);
} while (ret > 0);
splice_from_pipe_end(pipe, &sd);
@@ -976,25 +980,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
if (ret > 0) {
unsigned long nr_pages;
+ int err;
- *ppos += ret;
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- /*
- * If file or inode is SYNC and we actually wrote some data,
- * sync it.
- */
- if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
- mutex_lock(&inode->i_mutex);
- err = generic_osync_inode(inode, mapping,
- OSYNC_METADATA|OSYNC_DATA);
- mutex_unlock(&inode->i_mutex);
-
- if (err)
- ret = err;
- }
+ err = generic_write_sync(out, *ppos, ret);
+ if (err)
+ ret = err;
+ else
+ *ppos += ret;
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
}
diff --git a/fs/super.c b/fs/super.c
index 2761d3e22ed9..9cda337ddae2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
s = NULL;
goto out;
}
- INIT_LIST_HEAD(&s->s_dirty);
- INIT_LIST_HEAD(&s->s_io);
- INIT_LIST_HEAD(&s->s_more_io);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
@@ -171,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb)
* Drops a temporary reference, frees superblock if there's no
* references left.
*/
-static void put_super(struct super_block *sb)
+void put_super(struct super_block *sb)
{
spin_lock(&sb_lock);
__put_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 3422ba61d86d..192340930bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -19,20 +19,22 @@
SYNC_FILE_RANGE_WAIT_AFTER)
/*
- * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
- * just dirties buffers with inodes so we have to submit IO for these buffers
- * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
- * case write_inode() functions do sync_dirty_buffer() and thus effectively
- * write one block at a time.
+ * Do the filesystem syncing work. For simple filesystems
+ * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
+ * submit IO for these buffers via __sync_blockdev(). This also speeds up the
+ * wait == 1 case since in that case write_inode() functions do
+ * sync_dirty_buffer() and thus effectively write one block at a time.
*/
static int __sync_filesystem(struct super_block *sb, int wait)
{
/* Avoid doing twice syncing and cache pruning for quota sync */
- if (!wait)
+ if (!wait) {
writeout_quota_sb(sb, -1);
- else
+ writeback_inodes_sb(sb);
+ } else {
sync_quota_sb(sb, -1);
- sync_inodes_sb(sb, wait);
+ sync_inodes_sb(sb);
+ }
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
return __sync_blockdev(sb->s_bdev, wait);
@@ -118,7 +120,7 @@ restart:
*/
SYSCALL_DEFINE0(sync)
{
- wakeup_pdflush(0);
+ wakeup_flusher_threads(0);
sync_filesystems(0);
sync_filesystems(1);
if (unlikely(laptop_mode))
@@ -176,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
}
/**
- * vfs_fsync - perform a fsync or fdatasync on a file
+ * vfs_fsync_range - helper to sync a range of data & metadata to disk
* @file: file to sync
* @dentry: dentry of @file
- * @data: only perform a fdatasync operation
+ * @start: offset in bytes of the beginning of data range to sync
+ * @end: offset in bytes of the end of data range (inclusive)
+ * @datasync: perform only datasync
*
- * Write back data and metadata for @file to disk. If @datasync is
- * set only metadata needed to access modified file data is written.
+ * Write back data in range @start..@end and metadata for @file to disk. If
+ * @datasync is set only metadata needed to access modified file data is
+ * written.
*
* In case this function is called from nfsd @file may be %NULL and
* only @dentry is set. This can only happen when the filesystem
* implements the export_operations API.
*/
-int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
+ loff_t end, int datasync)
{
const struct file_operations *fop;
struct address_space *mapping;
@@ -212,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
goto out;
}
- ret = filemap_fdatawrite(mapping);
+ ret = filemap_write_and_wait_range(mapping, start, end);
/*
* We need to protect against concurrent writers, which could cause
@@ -223,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
if (!ret)
ret = err;
mutex_unlock(&mapping->host->i_mutex);
- err = filemap_fdatawait(mapping);
- if (!ret)
- ret = err;
+
out:
return ret;
}
+EXPORT_SYMBOL(vfs_fsync_range);
+
+/**
+ * vfs_fsync - perform a fsync or fdatasync on a file
+ * @file: file to sync
+ * @dentry: dentry of @file
+ * @datasync: only perform a fdatasync operation
+ *
+ * Write back data and metadata for @file to disk. If @datasync is
+ * set only metadata needed to access modified file data is written.
+ *
+ * In case this function is called from nfsd @file may be %NULL and
+ * only @dentry is set. This can only happen when the filesystem
+ * implements the export_operations API.
+ */
+int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
+}
EXPORT_SYMBOL(vfs_fsync);
static int do_fsync(unsigned int fd, int datasync)
@@ -254,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
return do_fsync(fd, 1);
}
+/**
+ * generic_write_sync - perform syncing after a write if file / inode is sync
+ * @file: file to which the write happened
+ * @pos: offset where the write started
+ * @count: length of the write
+ *
+ * This is just a simple wrapper about our general syncing function.
+ */
+int generic_write_sync(struct file *file, loff_t pos, loff_t count)
+{
+ if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
+ return 0;
+ return vfs_fsync_range(file, file->f_path.dentry, pos,
+ pos + count - 1, 1);
+}
+EXPORT_SYMBOL(generic_write_sync);
+
/*
* sys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 14f2d71ea3ce..0050fc40e8c9 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
const struct inode_operations sysfs_dir_inode_operations = {
.lookup = sysfs_lookup,
.setattr = sysfs_setattr,
+ .setxattr = sysfs_setxattr,
};
static void remove_dir(struct sysfs_dirent *sd)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 555f0ff988df..e28cecf179f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -18,6 +18,8 @@
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
#include "sysfs.h"
extern struct super_block * sysfs_sb;
@@ -29,12 +31,14 @@ static const struct address_space_operations sysfs_aops = {
};
static struct backing_dev_info sysfs_backing_dev_info = {
+ .name = "sysfs",
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
static const struct inode_operations sysfs_inode_operations ={
.setattr = sysfs_setattr,
+ .setxattr = sysfs_setxattr,
};
int __init sysfs_inode_init(void)
@@ -42,18 +46,37 @@ int __init sysfs_inode_init(void)
return bdi_init(&sysfs_backing_dev_info);
}
+struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
+{
+ struct sysfs_inode_attrs *attrs;
+ struct iattr *iattrs;
+
+ attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
+ if (!attrs)
+ return NULL;
+ iattrs = &attrs->ia_iattr;
+
+ /* assign default attributes */
+ iattrs->ia_mode = sd->s_mode;
+ iattrs->ia_uid = 0;
+ iattrs->ia_gid = 0;
+ iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
+
+ return attrs;
+}
int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
{
struct inode * inode = dentry->d_inode;
struct sysfs_dirent * sd = dentry->d_fsdata;
- struct iattr * sd_iattr;
+ struct sysfs_inode_attrs *sd_attrs;
+ struct iattr *iattrs;
unsigned int ia_valid = iattr->ia_valid;
int error;
if (!sd)
return -EINVAL;
- sd_iattr = sd->s_iattr;
+ sd_attrs = sd->s_iattr;
error = inode_change_ok(inode, iattr);
if (error)
@@ -65,42 +88,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (error)
return error;
- if (!sd_iattr) {
+ if (!sd_attrs) {
/* setting attributes for the first time, allocate now */
- sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
- if (!sd_iattr)
+ sd_attrs = sysfs_init_inode_attrs(sd);
+ if (!sd_attrs)
return -ENOMEM;
- /* assign default attributes */
- sd_iattr->ia_mode = sd->s_mode;
- sd_iattr->ia_uid = 0;
- sd_iattr->ia_gid = 0;
- sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
- sd->s_iattr = sd_iattr;
+ sd->s_iattr = sd_attrs;
+ } else {
+ /* attributes were changed at least once in past */
+ iattrs = &sd_attrs->ia_iattr;
+
+ if (ia_valid & ATTR_UID)
+ iattrs->ia_uid = iattr->ia_uid;
+ if (ia_valid & ATTR_GID)
+ iattrs->ia_gid = iattr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+ iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_MTIME)
+ iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_CTIME)
+ iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = iattr->ia_mode;
+
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ mode &= ~S_ISGID;
+ iattrs->ia_mode = sd->s_mode = mode;
+ }
}
+ return error;
+}
- /* attributes were changed atleast once in past */
-
- if (ia_valid & ATTR_UID)
- sd_iattr->ia_uid = iattr->ia_uid;
- if (ia_valid & ATTR_GID)
- sd_iattr->ia_gid = iattr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_MTIME)
- sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_CTIME)
- sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
-
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- mode &= ~S_ISGID;
- sd_iattr->ia_mode = sd->s_mode = mode;
- }
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ struct sysfs_inode_attrs *iattrs;
+ void *secdata;
+ int error;
+ u32 secdata_len = 0;
+
+ if (!sd)
+ return -EINVAL;
+ if (!sd->s_iattr)
+ sd->s_iattr = sysfs_init_inode_attrs(sd);
+ if (!sd->s_iattr)
+ return -ENOMEM;
+
+ iattrs = sd->s_iattr;
+
+ if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+ error = security_inode_setsecurity(dentry->d_inode, suffix,
+ value, size, flags);
+ if (error)
+ goto out;
+ error = security_inode_getsecctx(dentry->d_inode,
+ &secdata, &secdata_len);
+ if (error)
+ goto out;
+ if (iattrs->ia_secdata)
+ security_release_secctx(iattrs->ia_secdata,
+ iattrs->ia_secdata_len);
+ iattrs->ia_secdata = secdata;
+ iattrs->ia_secdata_len = secdata_len;
+ } else
+ return -EINVAL;
+out:
return error;
}
@@ -146,6 +204,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
+ struct sysfs_inode_attrs *iattrs;
inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
@@ -154,16 +213,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
inode->i_ino = sd->s_ino;
lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
- if (sd->s_iattr) {
+ iattrs = sd->s_iattr;
+ if (iattrs) {
/* sysfs_dirent has non-default attributes
* get them for the new inode from persistent copy
* in sysfs_dirent
*/
- set_inode_attr(inode, sd->s_iattr);
+ set_inode_attr(inode, &iattrs->ia_iattr);
+ if (iattrs->ia_secdata)
+ security_inode_notifysecctx(inode,
+ iattrs->ia_secdata,
+ iattrs->ia_secdata_len);
} else
set_default_inode_attr(inode, sd->s_mode);
-
/* initialize inode according to type */
switch (sysfs_type(sd)) {
case SYSFS_DIR:
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 1d897ad808e0..c5081ad77026 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -16,6 +16,7 @@
#include <linux/kobject.h>
#include <linux/namei.h>
#include <linux/mutex.h>
+#include <linux/security.h>
#include "sysfs.h"
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
}
const struct inode_operations sysfs_symlink_inode_operations = {
+ .setxattr = sysfs_setxattr,
.readlink = generic_readlink,
.follow_link = sysfs_follow_link,
.put_link = sysfs_put_link,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3fa0d98481e2..af4c4e7482ac 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,8 @@
* This file is released under the GPLv2.
*/
+#include <linux/fs.h>
+
struct sysfs_open_dirent;
/* type-specific structures for sysfs_dirent->s_* union members */
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr {
struct hlist_head buffers;
};
+struct sysfs_inode_attrs {
+ struct iattr ia_iattr;
+ void *ia_secdata;
+ u32 ia_secdata_len;
+};
+
/*
* sysfs_dirent - the building block of sysfs hierarchy. Each and
* every sysfs node is represented by single sysfs_dirent.
@@ -56,7 +64,7 @@ struct sysfs_dirent {
unsigned int s_flags;
ino_t s_ino;
umode_t s_mode;
- struct iattr *s_iattr;
+ struct sysfs_inode_attrs *s_iattr;
};
#define SD_DEACTIVATED_BIAS INT_MIN
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
void sysfs_delete_inode(struct inode *inode);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags);
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
int sysfs_inode_init(void);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d891d46f..1c8991b0db13 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -65,26 +65,14 @@
static int shrink_liability(struct ubifs_info *c, int nr_to_write)
{
int nr_written;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
- .range_end = LLONG_MAX,
- .nr_to_write = nr_to_write,
- };
-
- generic_sync_sb_inodes(c->vfs_sb, &wbc);
- nr_written = nr_to_write - wbc.nr_to_write;
+ nr_written = writeback_inodes_sb(c->vfs_sb);
if (!nr_written) {
/*
* Re-try again but wait on pages/inodes which are being
* written-back concurrently (e.g., by pdflush).
*/
- memset(&wbc, 0, sizeof(struct writeback_control));
- wbc.sync_mode = WB_SYNC_ALL;
- wbc.range_end = LLONG_MAX;
- wbc.nr_to_write = nr_to_write;
- generic_sync_sb_inodes(c->vfs_sb, &wbc);
- nr_written = nr_to_write - wbc.nr_to_write;
+ nr_written = sync_inodes_sb(c->vfs_sb);
}
dbg_budg("%d pages were written back", nr_written);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 26d2e0d80465..51763aa8f4de 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
{
int i, err;
struct ubifs_info *c = sb->s_fs_info;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .range_start = 0,
- .range_end = LLONG_MAX,
- .nr_to_write = LONG_MAX,
- };
/*
* Zero @wait is just an advisory thing to help the file system shove
@@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
* the user be able to get more accurate results of 'statfs()' after
* they synchronize the file system.
*/
- generic_sync_sb_inodes(sb, &wbc);
+ sync_inodes_sb(sb);
/*
* Synchronize write buffers, because 'ubifs_run_commit()' does not
@@ -1971,6 +1965,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
*
* Read-ahead will be disabled because @c->bdi.ra_pages is 0.
*/
+ c->bdi.name = "ubifs",
c->bdi.capabilities = BDI_CAP_MAP_COPY;
c->bdi.unplug_io_fn = default_unplug_io_fn;
err = bdi_init(&c->bdi);
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 1d2c570704c8..2ffdb6733af1 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -18,59 +18,6 @@
#include <linux/string.h>
#include <linux/buffer_head.h>
-#if 0
-static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad,
- uint8_t ad_size, struct kernel_lb_addr fe_loc,
- int *pos, int *offset, struct buffer_head **bh,
- int *error)
-{
- int loffset = *offset;
- int block;
- uint8_t *ad;
- int remainder;
-
- *error = 0;
-
- ad = (uint8_t *)(*bh)->b_data + *offset;
- *offset += ad_size;
-
- if (!ad) {
- brelse(*bh);
- *error = 1;
- return NULL;
- }
-
- if (*offset == dir->i_sb->s_blocksize) {
- brelse(*bh);
- block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
- if (!block)
- return NULL;
- *bh = udf_tread(dir->i_sb, block);
- if (!*bh)
- return NULL;
- } else if (*offset > dir->i_sb->s_blocksize) {
- ad = tmpad;
-
- remainder = dir->i_sb->s_blocksize - loffset;
- memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder);
-
- brelse(*bh);
- block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
- if (!block)
- return NULL;
- (*bh) = udf_tread(dir->i_sb, block);
- if (!*bh)
- return NULL;
-
- memcpy((uint8_t *)ad + remainder, (*bh)->b_data,
- ad_size - remainder);
- *offset = ad_size - remainder;
- }
-
- return ad;
-}
-#endif
-
struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
struct udf_fileident_bh *fibh,
struct fileIdentDesc *cfi,
@@ -248,39 +195,6 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
return fi;
}
-#if 0
-static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
-{
- struct extent_ad *ext;
- struct fileEntry *fe;
- uint8_t *ptr;
-
- if ((!buffer) || (!offset)) {
- printk(KERN_ERR "udf: udf_get_fileextent() invalidparms\n");
- return NULL;
- }
-
- fe = (struct fileEntry *)buffer;
-
- if (fe->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FE)) {
- udf_debug("0x%x != TAG_IDENT_FE\n",
- le16_to_cpu(fe->descTag.tagIdent));
- return NULL;
- }
-
- ptr = (uint8_t *)(fe->extendedAttr) +
- le32_to_cpu(fe->lengthExtendedAttr);
-
- if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)))
- ptr += *offset;
-
- ext = (struct extent_ad *)ptr;
-
- *offset = *offset + sizeof(struct extent_ad);
- return ext;
-}
-#endif
-
struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
int inc)
{
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7464305382b5..b80cbd78833c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -193,9 +193,11 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
static int udf_release_file(struct inode *inode, struct file *filp)
{
if (filp->f_mode & FMODE_WRITE) {
+ mutex_lock(&inode->i_mutex);
lock_kernel();
udf_discard_prealloc(inode);
unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
}
return 0;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e7533f785636..6d24c2c63f93 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -90,19 +90,16 @@ no_delete:
}
/*
- * If we are going to release inode from memory, we discard preallocation and
- * truncate last inode extent to proper length. We could use drop_inode() but
- * it's called under inode_lock and thus we cannot mark inode dirty there. We
- * use clear_inode() but we have to make sure to write inode as it's not written
- * automatically.
+ * If we are going to release inode from memory, we truncate last inode extent
+ * to proper length. We could use drop_inode() but it's called under inode_lock
+ * and thus we cannot mark inode dirty there. We use clear_inode() but we have
+ * to make sure to write inode as it's not written automatically.
*/
void udf_clear_inode(struct inode *inode)
{
struct udf_inode_info *iinfo;
if (!(inode->i_sb->s_flags & MS_RDONLY)) {
lock_kernel();
- /* Discard preallocation for directories, symlinks, etc. */
- udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
unlock_kernel();
write_inode_now(inode, 0);
@@ -664,8 +661,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum);
#ifdef UDF_PREALLOCATE
- /* preallocate blocks */
- udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
+ /* We preallocate blocks only for regular files. It also makes sense
+ * for directories but there's a problem when to drop the
+ * preallocation. We might use some delayed work for that but I feel
+ * it's overengineering for a filesystem like UDF. */
+ if (S_ISREG(inode->i_mode))
+ udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
#endif
/* merge any continuous blocks in laarr */
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 1b88fd5df05d..43e24a3b8e10 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -36,14 +36,10 @@ unsigned int udf_get_last_session(struct super_block *sb)
ms_info.addr_format = CDROM_LBA;
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
-#define WE_OBEY_THE_WRITTEN_STANDARDS 1
-
if (i == 0) {
udf_debug("XA disk: %s, vol_desc_start=%d\n",
(ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba);
-#if WE_OBEY_THE_WRITTEN_STANDARDS
if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
-#endif
vol_desc_start = ms_info.addr.lba;
} else {
udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6a29fa34c478..21dad8c608f9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -943,7 +943,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
pc->componentType = 1;
pc->lengthComponentIdent = 0;
pc->componentFileVersionNum = 0;
- pc += sizeof(struct pathComponent);
elen += sizeof(struct pathComponent);
}
diff --git a/fs/xattr.c b/fs/xattr.c
index 1c3d0af59ddf..6d4f6d3449fb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask)
return inode_permission(inode, mask);
}
-int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
+/**
+ * __vfs_setxattr_noperm - perform setxattr operation without performing
+ * permission checks.
+ *
+ * @dentry - object to perform setxattr on
+ * @name - xattr name to set
+ * @value - value to set @name to
+ * @size - size of @value
+ * @flags - flags to pass into filesystem operations
+ *
+ * returns the result of the internal setxattr or setsecurity operations.
+ *
+ * This function requires the caller to lock the inode's i_mutex before it
+ * is executed. It also assumes that the caller will make the appropriate
+ * permission checks.
+ */
+int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
- int error;
-
- error = xattr_permission(inode, name, MAY_WRITE);
- if (error)
- return error;
+ int error = -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
- error = security_inode_setxattr(dentry, name, value, size, flags);
- if (error)
- goto out;
- error = -EOPNOTSUPP;
if (inode->i_op->setxattr) {
error = inode->i_op->setxattr(dentry, name, value, size, flags);
if (!error) {
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (!error)
fsnotify_xattr(dentry);
}
+
+ return error;
+}
+
+
+int
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ error = xattr_permission(inode, name, MAY_WRITE);
+ if (error)
+ return error;
+
+ mutex_lock(&inode->i_mutex);
+ error = security_inode_setxattr(dentry, name, value, size, flags);
+ if (error)
+ goto out;
+
+ error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+
out:
mutex_unlock(&inode->i_mutex);
return error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0882d166239a..eafcc7c18706 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -619,7 +619,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_GETVERSION_32:
cmd = _NATIVE_IOC(cmd, long);
return xfs_file_ioctl(filp, cmd, p);
- case XFS_IOC_SWAPEXT: {
+ case XFS_IOC_SWAPEXT_32: {
struct xfs_swapext sxp;
struct compat_xfs_swapext __user *sxu = arg;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 8070b34cc287..6c32f1d63d8c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -485,14 +485,6 @@ xfs_vn_put_link(
}
STATIC int
-xfs_vn_permission(
- struct inode *inode,
- int mask)
-{
- return generic_permission(inode, mask, xfs_check_acl);
-}
-
-STATIC int
xfs_vn_getattr(
struct vfsmount *mnt,
struct dentry *dentry,
@@ -696,7 +688,7 @@ xfs_vn_fiemap(
}
static const struct inode_operations xfs_inode_operations = {
- .permission = xfs_vn_permission,
+ .check_acl = xfs_check_acl,
.truncate = xfs_vn_truncate,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
@@ -724,7 +716,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .permission = xfs_vn_permission,
+ .check_acl = xfs_check_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -749,7 +741,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .permission = xfs_vn_permission,
+ .check_acl = xfs_check_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -762,7 +754,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = xfs_vn_follow_link,
.put_link = xfs_vn_put_link,
- .permission = xfs_vn_permission,
+ .check_acl = xfs_check_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7078974a6eee..fde63a3c4ecc 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -817,7 +817,8 @@ write_retry:
xfs_iunlock(xip, iolock);
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
- error2 = sync_page_range(inode, mapping, pos, ret);
+ error2 = filemap_write_and_wait_range(mapping, pos,
+ pos + ret - 1);
if (!error)
error = error2;
if (need_i_mutex)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..98ef624d9baf 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
return 0;
}
+void
+__xfs_inode_set_reclaim_tag(
+ struct xfs_perag *pag,
+ struct xfs_inode *ip)
+{
+ radix_tree_tag_set(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+}
+
/*
* We set the inode flag atomically with the radix tree tag.
* Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
read_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
- radix_tree_tag_set(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ __xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..59120602588a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 34ec86923f7e..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -191,80 +191,82 @@ xfs_iget_cache_hit(
int flags,
int lock_flags) __releases(pag->pag_ici_lock)
{
+ struct inode *inode = VFS_I(ip);
struct xfs_mount *mp = ip->i_mount;
- int error = EAGAIN;
+ int error;
+
+ spin_lock(&ip->i_flags_lock);
/*
- * If INEW is set this inode is being set up
- * If IRECLAIM is set this inode is being torn down
- * Pause and try again.
+ * If we are racing with another cache hit that is currently
+ * instantiating this inode or currently recycling it out of
+ * reclaimabe state, wait for the initialisation to complete
+ * before continuing.
+ *
+ * XXX(hch): eventually we should do something equivalent to
+ * wait_on_inode to wait for these flags to be cleared
+ * instead of polling for it.
*/
- if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
+ if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
XFS_STATS_INC(xs_ig_frecycle);
+ error = EAGAIN;
goto out_error;
}
- /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
- if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
-
- /*
- * If lookup is racing with unlink, then we should return an
- * error immediately so we don't remove it from the reclaim
- * list and potentially leak the inode.
- */
- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- error = ENOENT;
- goto out_error;
- }
+ /*
+ * If lookup is racing with unlink return an error immediately.
+ */
+ if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ goto out_error;
+ }
+ /*
+ * If IRECLAIMABLE is set, we've torn down the VFS inode already.
+ * Need to carefully get it back into useable state.
+ */
+ if (ip->i_flags & XFS_IRECLAIMABLE) {
xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
/*
- * We need to re-initialise the VFS inode as it has been
- * 'freed' by the VFS. Do this here so we can deal with
- * errors cleanly, then tag it so it can be set up correctly
- * later.
+ * We need to set XFS_INEW atomically with clearing the
+ * reclaimable tag so that we do have an indicator of the
+ * inode still being initialized.
*/
- if (inode_init_always(mp->m_super, VFS_I(ip))) {
- error = ENOMEM;
- goto out_error;
- }
+ ip->i_flags |= XFS_INEW;
+ ip->i_flags &= ~XFS_IRECLAIMABLE;
+ __xfs_inode_clear_reclaim_tag(mp, pag, ip);
- /*
- * We must set the XFS_INEW flag before clearing the
- * XFS_IRECLAIMABLE flag so that if a racing lookup does
- * not find the XFS_IRECLAIMABLE above but has the igrab()
- * below succeed we can safely check XFS_INEW to detect
- * that this inode is still being initialised.
- */
- xfs_iflags_set(ip, XFS_INEW);
- xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
+ spin_unlock(&ip->i_flags_lock);
+ read_unlock(&pag->pag_ici_lock);
- /* clear the radix tree reclaim flag as well. */
- __xfs_inode_clear_reclaim_tag(mp, pag, ip);
- } else if (!igrab(VFS_I(ip))) {
+ error = -inode_init_always(mp->m_super, inode);
+ if (error) {
+ /*
+ * Re-initializing the inode failed, and we are in deep
+ * trouble. Try to re-add it to the reclaim list.
+ */
+ read_lock(&pag->pag_ici_lock);
+ spin_lock(&ip->i_flags_lock);
+
+ ip->i_flags &= ~XFS_INEW;
+ ip->i_flags |= XFS_IRECLAIMABLE;
+ __xfs_inode_set_reclaim_tag(pag, ip);
+ goto out_error;
+ }
+ inode->i_state = I_LOCK|I_NEW;
+ } else {
/* If the VFS inode is being torn down, pause and try again. */
- XFS_STATS_INC(xs_ig_frecycle);
- goto out_error;
- } else if (xfs_iflags_test(ip, XFS_INEW)) {
- /*
- * We are racing with another cache hit that is
- * currently recycling this inode out of the XFS_IRECLAIMABLE
- * state. Wait for the initialisation to complete before
- * continuing.
- */
- wait_on_inode(VFS_I(ip));
- }
+ if (!igrab(inode)) {
+ error = EAGAIN;
+ goto out_error;
+ }
- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- error = ENOENT;
- iput(VFS_I(ip));
- goto out_error;
+ /* We've got a live one. */
+ spin_unlock(&ip->i_flags_lock);
+ read_unlock(&pag->pag_ici_lock);
}
- /* We've got a live one. */
- read_unlock(&pag->pag_ici_lock);
-
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
@@ -274,6 +276,7 @@ xfs_iget_cache_hit(
return 0;
out_error:
+ spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
return error;
}