diff options
Diffstat (limited to 'fs')
238 files changed, 5718 insertions, 2859 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 12d602351dbe..6e58c4ca1e6e 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) return acl; } -int v9fs_check_acl(struct inode *inode, int mask) +int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; struct v9fs_session_info *v9ses; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + v9ses = v9fs_inode2v9ses(inode); if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { /* diff --git a/fs/9p/acl.h b/fs/9p/acl.h index 59e18c2e8c7e..7ef3ac9f6d95 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -16,7 +16,7 @@ #ifdef CONFIG_9P_FS_POSIX_ACL extern int v9fs_get_acl(struct inode *, struct p9_fid *); -extern int v9fs_check_acl(struct inode *inode, int mask); +extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, struct posix_acl *, struct posix_acl *); diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index cbf4e50f3933..466d2a4fc5cb 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -51,7 +51,7 @@ * */ -static int v9fs_dentry_delete(struct dentry *dentry) +static int v9fs_dentry_delete(const struct dentry *dentry) { P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, dentry); @@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry) * */ -static int v9fs_cached_dentry_delete(struct dentry *dentry) +static int v9fs_cached_dentry_delete(const struct dentry *dentry) { struct inode *inode = dentry->d_inode; P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 34bf71b56542..59782981b225 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb) * */ -void v9fs_destroy_inode(struct inode *inode) +static void v9fs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); } + +void v9fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, v9fs_i_callback); +} #endif /** @@ -270,11 +277,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); /* Directory should have only one entry. */ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); return dentry; } @@ -628,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); @@ -742,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -760,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -949,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -966,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ @@ -1034,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, inst_out: if (v9ses->cache) - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); else - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_add(dentry, inode); return NULL; @@ -1702,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1715,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } @@ -1849,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, ihold(old_dentry->d_inode); } - dentry->d_op = old_dentry->d_op; + d_set_d_op(dentry, old_dentry->d_op); d_instantiate(dentry, old_dentry->d_inode); return err; @@ -1973,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + d_set_d_op(dentry, &v9fs_cached_dentry_operations); d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1989,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, err = PTR_ERR(inode); goto error; } - dentry->d_op = &v9fs_dentry_operations; + d_set_d_op(dentry, &v9fs_dentry_operations); d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index f4287e4de744..bf7693c384f9 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = { }; static int -adfs_hash(struct dentry *parent, struct qstr *qstr) +adfs_hash(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr) { const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; const unsigned char *name; @@ -237,17 +238,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr) * requirements of the underlying filesystem. */ static int -adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name) +adfs_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i; - if (entry->len != name->len) + if (len != name->len) return 1; for (i = 0; i < name->len; i++) { char a, b; - a = entry->name[i]; + a = str[i]; b = name->name[i]; if (a >= 'A' && a <= 'Z') @@ -273,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - dentry->d_op = &adfs_dentry_operations; + d_set_d_op(dentry, &adfs_dentry_operations); lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 959dbff2d42d..a4041b52fbca 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void adfs_destroy_inode(struct inode *inode) +static void adfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } +static void adfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, adfs_i_callback); +} + static void init_once(void *foo) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; @@ -477,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } else - sb->s_root->d_op = &adfs_dentry_operations; + d_set_d_op(sb->s_root, &adfs_dentry_operations); unlock_kernel(); return 0; diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 7d0f0a30f7a3..3a4557e8325c 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) void *data = dentry->d_fsdata; struct list_head *head, *next; - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); head = &inode->i_dentry; next = head->next; while (next != head) { @@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) } next = next->next; } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 914d1c0bc07a..944a4042fb65 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -13,11 +13,19 @@ typedef int (*toupper_t)(int); static int affs_toupper(int ch); -static int affs_hash_dentry(struct dentry *, struct qstr *); -static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int affs_hash_dentry(const struct dentry *, + const struct inode *, struct qstr *); +static int affs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); static int affs_intl_toupper(int ch); -static int affs_intl_hash_dentry(struct dentry *, struct qstr *); -static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int affs_intl_hash_dentry(const struct dentry *, + const struct inode *, struct qstr *); +static int affs_intl_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); const struct dentry_operations affs_dentry_operations = { .d_hash = affs_hash_dentry, @@ -58,13 +66,13 @@ affs_get_toupper(struct super_block *sb) * Note: the dentry argument is the parent dentry. */ static inline int -__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) +__affs_hash_dentry(struct qstr *qstr, toupper_t toupper) { const u8 *name = qstr->name; unsigned long hash; int i; - i = affs_check_name(qstr->name,qstr->len); + i = affs_check_name(qstr->name, qstr->len); if (i) return i; @@ -78,39 +86,41 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) } static int -affs_hash_dentry(struct dentry *dentry, struct qstr *qstr) +affs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - return __affs_hash_dentry(dentry, qstr, affs_toupper); + return __affs_hash_dentry(qstr, affs_toupper); } static int -affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) +affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); + return __affs_hash_dentry(qstr, affs_intl_toupper); } -static inline int -__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper) +static inline int __affs_compare_dentry(unsigned int len, + const char *str, const struct qstr *name, toupper_t toupper) { - const u8 *aname = a->name; - const u8 *bname = b->name; - int len; + const u8 *aname = str; + const u8 *bname = name->name; - /* 'a' is the qstr of an already existing dentry, so the name - * must be valid. 'b' must be validated first. + /* + * 'str' is the name of an already existing dentry, so the name + * must be valid. 'name' must be validated first. */ - if (affs_check_name(b->name,b->len)) + if (affs_check_name(name->name, name->len)) return 1; - /* If the names are longer than the allowed 30 chars, + /* + * If the names are longer than the allowed 30 chars, * the excess is ignored, so their length may differ. */ - len = a->len; if (len >= 30) { - if (b->len < 30) + if (name->len < 30) return 1; len = 30; - } else if (len != b->len) + } else if (len != name->len) return 1; for (; len > 0; len--) @@ -121,14 +131,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou } static int -affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return __affs_compare_dentry(dentry, a, b, affs_toupper); + return __affs_compare_dentry(len, str, name, affs_toupper); } static int -affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return __affs_compare_dentry(dentry, a, b, affs_intl_toupper); + return __affs_compare_dentry(len, str, name, affs_intl_toupper); } /* @@ -226,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (IS_ERR(inode)) return ERR_CAST(inode); } - dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; + d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/affs/super.c b/fs/affs/super.c index 0cf7f4384cbd..d39081bbe7ce 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb) return &i->vfs_inode; } -static void affs_destroy_inode(struct inode *inode) +static void affs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } +static void affs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, affs_i_callback); +} + static void init_once(void *foo) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; @@ -475,7 +482,7 @@ got_root: printk(KERN_ERR "AFFS: Get root inode failed\n"); goto out_error; } - sb->s_root->d_op = &affs_dentry_operations; + d_set_d_op(sb->s_root, &affs_dentry_operations); pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); return 0; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5439e1bc9a86..34a3263d60a4 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/namei.h> #include <linux/pagemap.h> #include <linux/ctype.h> #include <linux/sched.h> @@ -23,7 +24,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); -static int afs_d_delete(struct dentry *dentry); +static int afs_d_delete(const struct dentry *dentry); static void afs_d_release(struct dentry *dentry); static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); @@ -581,7 +582,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } success: - dentry->d_op = &afs_fs_dentry_operations; + d_set_d_op(dentry, &afs_fs_dentry_operations); d_add(dentry, inode); _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", @@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) void *dir_version; int ret; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + vnode = AFS_FS_I(dentry->d_inode); if (dentry->d_inode) @@ -730,7 +734,7 @@ out_bad: * - called from dput() when d_count is going to 0. * - return 1 to request dentry be unhashed, 0 otherwise */ -static int afs_d_delete(struct dentry *dentry) +static int afs_d_delete(const struct dentry *dentry) { _enter("%s", dentry->d_name.name); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index cca8eef736fc..6d4bc1c8ff60 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, long); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); -extern int afs_permission(struct inode *, int); +extern int afs_permission(struct inode *, int, unsigned int); /* * server.c diff --git a/fs/afs/security.c b/fs/afs/security.c index bb4ed144d0e4..f44b9d355377 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ -int afs_permission(struct inode *inode, int mask) +int afs_permission(struct inode *inode, int mask, unsigned int flags) { struct afs_vnode *vnode = AFS_FS_I(inode); afs_access_t uninitialized_var(access); struct key *key; int ret; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + _enter("{{%x:%u},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); @@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask) } key_put(key); - ret = generic_permission(inode, mask, NULL); + ret = generic_permission(inode, mask, flags, NULL); _leave(" = %d", ret); return ret; diff --git a/fs/afs/super.c b/fs/afs/super.c index 27201cffece4..f901a9d7c111 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb) return &vnode->vfs_inode; } +static void afs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct afs_vnode *vnode = AFS_FS_I(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(afs_inode_cachep, vnode); +} + /* * destroy an AFS inode struct */ @@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode) ASSERTCMP(vnode->server, ==, NULL); - kmem_cache_free(afs_inode_cachep, vnode); + call_rcu(&inode->i_rcu, afs_i_callback); atomic_dec(&afs_count_active_inodes); } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 57ce55b2564c..5fd38112a6ca 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name, this.name = name; this.len = strlen(name); this.hash = 0; - path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); if (!path.dentry) goto err_module; @@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name, */ ihold(anon_inode_inode); - path.dentry->d_op = &anon_inodefs_dentry_operations; + d_set_d_op(path.dentry, &anon_inodefs_dentry_operations); d_instantiate(path.dentry, anon_inode_inode); error = -ENFILE; @@ -232,7 +232,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput(anon_inode_mnt); + mntput_long(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3d283abf67d7..0fffe1c24cec 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -16,6 +16,7 @@ #include <linux/auto_fs4.h> #include <linux/auto_dev-ioctl.h> #include <linux/mutex.h> +#include <linux/spinlock.h> #include <linux/list.h> /* This is the range of ioctl() numbers we claim as ours */ @@ -60,6 +61,8 @@ do { \ current->pid, __func__, ##args); \ } while (0) +extern spinlock_t autofs4_lock; + /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never @@ -254,17 +257,15 @@ static inline int simple_positive(struct dentry *dentry) return dentry->d_inode && !d_unhashed(dentry); } -static inline int __simple_empty(struct dentry *dentry) +static inline void __autofs4_add_expiring(struct dentry *dentry) { - struct dentry *child; - int ret = 0; - - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) - if (simple_positive(child)) - goto out; - ret = 1; -out: - return ret; + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + if (list_empty(&ino->expiring)) + list_add(&ino->expiring, &sbi->expiring_list); + } + return; } static inline void autofs4_add_expiring(struct dentry *dentry) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index a796c9417fb1..cc1d01365905 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -91,24 +91,64 @@ done: } /* - * Calculate next entry in top down tree traversal. - * From next_mnt in namespace.c - elegant. + * Calculate and dget next entry in top down tree traversal. */ -static struct dentry *next_dentry(struct dentry *p, struct dentry *root) +static struct dentry *get_next_positive_dentry(struct dentry *prev, + struct dentry *root) { - struct list_head *next = p->d_subdirs.next; + struct list_head *next; + struct dentry *p, *ret; + + if (prev == NULL) + return dget(prev); + spin_lock(&autofs4_lock); +relock: + p = prev; + spin_lock(&p->d_lock); +again: + next = p->d_subdirs.next; if (next == &p->d_subdirs) { while (1) { - if (p == root) + struct dentry *parent; + + if (p == root) { + spin_unlock(&p->d_lock); + spin_unlock(&autofs4_lock); + dput(prev); return NULL; + } + + parent = p->d_parent; + if (!spin_trylock(&parent->d_lock)) { + spin_unlock(&p->d_lock); + cpu_relax(); + goto relock; + } + spin_unlock(&p->d_lock); next = p->d_u.d_child.next; - if (next != &p->d_parent->d_subdirs) + p = parent; + if (next != &parent->d_subdirs) break; - p = p->d_parent; } } - return list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_u.d_child); + + spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); + /* Negative dentry - try next */ + if (!simple_positive(ret)) { + spin_unlock(&ret->d_lock); + p = ret; + goto again; + } + dget_dlock(ret); + spin_unlock(&ret->d_lock); + spin_unlock(&p->d_lock); + spin_unlock(&autofs4_lock); + + dput(prev); + + return ret; } /* @@ -158,18 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt, if (!simple_positive(top)) return 1; - spin_lock(&dcache_lock); - for (p = top; p; p = next_dentry(p, top)) { - /* Negative dentry - give up */ - if (!simple_positive(p)) - continue; - + p = NULL; + while ((p = get_next_positive_dentry(p, top))) { DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget(p); - spin_unlock(&dcache_lock); - /* * Is someone visiting anywhere in the subtree ? * If there's no mount we need to check the usage @@ -198,16 +231,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt, else ino_count++; - if (atomic_read(&p->d_count) > ino_count) { + if (p->d_count > ino_count) { top_ino->last_used = jiffies; dput(p); return 1; } } - dput(p); - spin_lock(&dcache_lock); } - spin_unlock(&dcache_lock); /* Timeout of a tree mount is ultimately determined by its top dentry */ if (!autofs4_can_expire(top, timeout, do_now)) @@ -226,32 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, DPRINTK("parent %p %.*s", parent, (int)parent->d_name.len, parent->d_name.name); - spin_lock(&dcache_lock); - for (p = parent; p; p = next_dentry(p, parent)) { - /* Negative dentry - give up */ - if (!simple_positive(p)) - continue; - + p = NULL; + while ((p = get_next_positive_dentry(p, parent))) { DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); - p = dget(p); - spin_unlock(&dcache_lock); - if (d_mountpoint(p)) { /* Can we umount this guy */ if (autofs4_mount_busy(mnt, p)) - goto cont; + continue; /* Can we expire this guy */ if (autofs4_can_expire(p, timeout, do_now)) return p; } -cont: - dput(p); - spin_lock(&dcache_lock); } - spin_unlock(&dcache_lock); return NULL; } @@ -276,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, struct autofs_info *ino = autofs4_dentry_ino(root); if (d_mountpoint(root)) { ino->flags |= AUTOFS_INF_MOUNTPOINT; - root->d_mounted--; + spin_lock(&root->d_lock); + root->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&root->d_lock); } ino->flags |= AUTOFS_INF_EXPIRING; init_completion(&ino->expire_complete); @@ -302,8 +323,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, { unsigned long timeout; struct dentry *root = sb->s_root; + struct dentry *dentry; struct dentry *expired = NULL; - struct list_head *next; int do_now = how & AUTOFS_EXP_IMMEDIATE; int exp_leaves = how & AUTOFS_EXP_LEAVES; struct autofs_info *ino; @@ -315,23 +336,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, now = jiffies; timeout = sbi->exp_timeout; - spin_lock(&dcache_lock); - next = root->d_subdirs.next; - - /* On exit from the loop expire is set to a dgot dentry - * to expire or it's NULL */ - while ( next != &root->d_subdirs ) { - struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); - - /* Negative dentry - give up */ - if (!simple_positive(dentry)) { - next = next->next; - continue; - } - - dentry = dget(dentry); - spin_unlock(&dcache_lock); - + dentry = NULL; + while ((dentry = get_next_positive_dentry(dentry, root))) { spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); @@ -347,7 +353,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 2; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; /* Can we umount this guy */ @@ -369,7 +375,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, if (!exp_leaves) { /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 1; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { @@ -383,7 +389,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } else { /* Path walk currently on this dentry? */ ino_count = atomic_read(&ino->count) + 1; - if (atomic_read(&dentry->d_count) > ino_count) + if (dentry->d_count > ino_count) goto next; expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); @@ -394,11 +400,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, } next: spin_unlock(&sbi->fs_lock); - dput(dentry); - spin_lock(&dcache_lock); - next = next->next; } - spin_unlock(&dcache_lock); return NULL; found: @@ -408,9 +410,13 @@ found: ino->flags |= AUTOFS_INF_EXPIRING; init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); + spin_lock(&expired->d_parent->d_lock); + spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); - spin_unlock(&dcache_lock); + spin_unlock(&expired->d_lock); + spin_unlock(&expired->d_parent->d_lock); + spin_unlock(&autofs4_lock); return expired; } @@ -499,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); if (ino->flags & AUTOFS_INF_MOUNTPOINT) { - sb->s_root->d_mounted++; + spin_lock(&sb->s_root->d_lock); + /* + * If we haven't been expired away, then reset + * mounted status. + */ + if (mnt->mnt_parent != mnt) + sb->s_root->d_flags |= DCACHE_MOUNTED; + spin_unlock(&sb->s_root->d_lock); ino->flags &= ~AUTOFS_INF_MOUNTPOINT; } ino->flags &= ~AUTOFS_INF_EXPIRING; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index ac87e49fa706..a7bdb9dcac84 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_iput; pipe = NULL; - root->d_op = &autofs4_sb_dentry_operations; + d_set_d_op(root, &autofs4_sb_dentry_operations); root->d_fsdata = ino; /* Can this call block? */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d34896cfb19f..651e4ef563b1 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -23,6 +23,8 @@ #include "autofs_i.h" +DEFINE_SPINLOCK(autofs4_lock); + static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); @@ -142,12 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * autofs file system so just let the libfs routines handle * it. */ - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); + spin_lock(&dentry->d_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); return -ENOENT; } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); out: return dcache_dir_open(inode, file); @@ -252,9 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) /* We trigger a mount for almost all flags */ lookup_type = autofs4_need_mount(nd->flags); spin_lock(&sbi->fs_lock); - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); + spin_lock(&dentry->d_lock); if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); goto follow; } @@ -266,7 +273,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) */ if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); status = try_to_fill_dentry(dentry, nd->flags); @@ -275,7 +283,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) goto follow; } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); spin_unlock(&sbi->fs_lock); follow: /* @@ -306,12 +315,19 @@ out_error: */ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *dir = dentry->d_parent->d_inode; - struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); - int oz_mode = autofs4_oz_mode(sbi); + struct inode *dir; + struct autofs_sb_info *sbi; + int oz_mode; int flags = nd ? nd->flags : 0; int status = 1; + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dentry->d_parent->d_inode; + sbi = autofs4_sbi(dir->i_sb); + oz_mode = autofs4_oz_mode(sbi); + /* Pending dentry */ spin_lock(&sbi->fs_lock); if (autofs4_ispending(dentry)) { @@ -346,12 +362,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; /* Check for a non-mountpoint directory with no contents */ - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); + spin_lock(&dentry->d_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); /* The daemon never causes a mount to trigger */ if (oz_mode) @@ -367,7 +385,8 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&autofs4_lock); return 1; } @@ -422,7 +441,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->active_list; list_for_each(p, head) { @@ -436,7 +455,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) spin_lock(&active->d_lock); /* Already gone? */ - if (atomic_read(&active->d_count) == 0) + if (active->d_count == 0) goto next; qstr = &active->d_name; @@ -452,17 +471,17 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) goto next; if (d_unhashed(active)) { - dget(active); + dget_dlock(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return active; } next: spin_unlock(&active->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return NULL; } @@ -477,7 +496,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); spin_lock(&sbi->lookup_lock); head = &sbi->expiring_list; list_for_each(p, head) { @@ -507,17 +526,17 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) goto next; if (d_unhashed(expiring)) { - dget(expiring); + dget_dlock(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return expiring; } next: spin_unlock(&expiring->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return NULL; } @@ -559,7 +578,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * we check for the hashed dentry and return the newly * hashed dentry. */ - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); /* * And we need to ensure that the same dentry is used for @@ -698,9 +717,9 @@ static int autofs4_dir_symlink(struct inode *dir, d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); @@ -753,12 +772,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); autofs4_add_expiring(dentry); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); return 0; } @@ -775,16 +794,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi)) return -EACCES; - spin_lock(&dcache_lock); + spin_lock(&autofs4_lock); + spin_lock(&sbi->lookup_lock); + spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); + spin_unlock(&sbi->lookup_lock); + spin_unlock(&autofs4_lock); return -ENOTEMPTY; } - autofs4_add_expiring(dentry); - spin_lock(&dentry->d_lock); + __autofs4_add_expiring(dentry); + spin_unlock(&sbi->lookup_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); @@ -829,9 +852,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) - dentry->d_op = &autofs4_root_dentry_operations; + d_set_d_op(dentry, &autofs4_root_dentry_operations); else - dentry->d_op = &autofs4_dentry_operations; + d_set_d_op(dentry, &autofs4_dentry_operations); dentry->d_fsdata = ino; ino->dentry = dget(dentry); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 2341375386f8..c5f8459c905e 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, { struct dentry *root = sbi->sb->s_root; struct dentry *tmp; - char *buf = *name; + char *buf; char *p; - int len = 0; + int len; + unsigned seq; - spin_lock(&dcache_lock); +rename_retry: + buf = *name; + len = 0; + + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); + spin_lock(&autofs4_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return 0; } @@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, p -= tmp->d_name.len; strncpy(p, tmp->d_name.name, tmp->d_name.len); } - spin_unlock(&dcache_lock); + spin_unlock(&autofs4_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; return len; } diff --git a/fs/bad_inode.c b/fs/bad_inode.c index f024d8aaddef..9ad2369d9e35 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, return -EIO; } -static int bad_inode_permission(struct inode *inode, int mask) +static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + return -EIO; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index aa4e7c7ae3c6..de93581b79a2 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb) return &bi->vfs_inode; } -static void -befs_destroy_inode(struct inode *inode) +static void befs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } +static void befs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, befs_i_callback); +} + static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 76db6d7d49bb..a8e37f81d097 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb) return &bi->vfs_inode; } -static void bfs_destroy_inode(struct inode *inode) +static void bfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } +static void bfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bfs_i_callback); +} + static void init_once(void *foo) { struct bfs_inode_info *bi = foo; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4230252fd689..771f23527010 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void bdev_destroy_inode(struct inode *inode) +static void bdev_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bdev_cachep, bdi); } +static void bdev_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bdev_i_callback); +} + static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2222d161c7b6..6ae2c8cac9d5 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, return ret; } -int btrfs_check_acl(struct inode *inode, int mask) +int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl; int error = -EAGAIN; - acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + error = -ECHILD; - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); + } else { + struct posix_acl *acl; + acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + } } return error; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index af52f6d7a4d8..a142d204b526 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait); /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -int btrfs_check_acl(struct inode *inode, int mask); +int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); #else #define btrfs_check_acl NULL #endif diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 6f0444473594..0ccf9a8afcdf 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, static struct dentry *btrfs_get_parent(struct dentry *child) { struct inode *dir = child->d_inode; - static struct dentry *dentry; + struct dentry *dentry; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_path *path; struct extent_buffer *leaf; @@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = 0; dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); return dentry; fail: btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 72f31ecb5c90..a0ff46a47895 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) int index; int ret; - dentry->d_op = &btrfs_dentry_operations; + d_set_d_op(dentry, &btrfs_dentry_operations); if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return inode; } -static int btrfs_dentry_delete(struct dentry *dentry) +static int btrfs_dentry_delete(const struct dentry *dentry) { struct btrfs_root *root; @@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return inode; } +static void btrfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + void btrfs_destroy_inode(struct inode *inode) { struct btrfs_ordered_extent *ordered; @@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode) inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); free: - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); + call_rcu(&inode->i_rcu, btrfs_i_callback); } int btrfs_drop_inode(struct inode *inode) @@ -7204,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -static int btrfs_permission(struct inode *inode, int mask) +static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) { if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) return -EACCES; - return generic_permission(inode, mask, btrfs_check_acl); + return generic_permission(inode, mask, flags, btrfs_check_acl); } static const struct inode_operations btrfs_dir_inode_operations = { diff --git a/fs/buffer.c b/fs/buffer.c index 5930e382959b..2219a76e2caf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1270,12 +1270,10 @@ static inline void check_irqs_on(void) static void bh_lru_install(struct buffer_head *bh) { struct buffer_head *evictee = NULL; - struct bh_lru *lru; check_irqs_on(); bh_lru_lock(); - lru = &__get_cpu_var(bh_lrus); - if (lru->bhs[0] != bh) { + if (__this_cpu_read(bh_lrus.bhs[0]) != bh) { struct buffer_head *bhs[BH_LRU_SIZE]; int in; int out = 0; @@ -1283,7 +1281,8 @@ static void bh_lru_install(struct buffer_head *bh) get_bh(bh); bhs[out++] = bh; for (in = 0; in < BH_LRU_SIZE; in++) { - struct buffer_head *bh2 = lru->bhs[in]; + struct buffer_head *bh2 = + __this_cpu_read(bh_lrus.bhs[in]); if (bh2 == bh) { __brelse(bh2); @@ -1298,7 +1297,7 @@ static void bh_lru_install(struct buffer_head *bh) } while (out < BH_LRU_SIZE) bhs[out++] = NULL; - memcpy(lru->bhs, bhs, sizeof(bhs)); + memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs)); } bh_lru_unlock(); @@ -1313,23 +1312,22 @@ static struct buffer_head * lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *ret = NULL; - struct bh_lru *lru; unsigned int i; check_irqs_on(); bh_lru_lock(); - lru = &__get_cpu_var(bh_lrus); for (i = 0; i < BH_LRU_SIZE; i++) { - struct buffer_head *bh = lru->bhs[i]; + struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); if (bh && bh->b_bdev == bdev && bh->b_blocknr == block && bh->b_size == size) { if (i) { while (i) { - lru->bhs[i] = lru->bhs[i - 1]; + __this_cpu_write(bh_lrus.bhs[i], + __this_cpu_read(bh_lrus.bhs[i - 1])); i--; } - lru->bhs[0] = bh; + __this_cpu_write(bh_lrus.bhs[0], bh); } get_bh(bh); ret = bh; @@ -3203,22 +3201,23 @@ static void recalc_bh_state(void) int i; int tot = 0; - if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) + if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096) return; - __get_cpu_var(bh_accounting).ratelimit = 0; + __this_cpu_write(bh_accounting.ratelimit, 0); for_each_online_cpu(i) tot += per_cpu(bh_accounting, i).nr; buffer_heads_over_limit = (tot > max_buffer_heads); } - + struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); - get_cpu_var(bh_accounting).nr++; + preempt_disable(); + __this_cpu_inc(bh_accounting.nr); recalc_bh_state(); - put_cpu_var(bh_accounting); + preempt_enable(); } return ret; } @@ -3228,9 +3227,10 @@ void free_buffer_head(struct buffer_head *bh) { BUG_ON(!list_empty(&bh->b_assoc_buffers)); kmem_cache_free(bh_cachep, bh); - get_cpu_var(bh_accounting).nr--; + preempt_disable(); + __this_cpu_dec(bh_accounting.nr); recalc_bh_state(); - put_cpu_var(bh_accounting); + preempt_enable(); } EXPORT_SYMBOL(free_buffer_head); @@ -3243,9 +3243,8 @@ static void buffer_exit_cpu(int cpu) brelse(b->bhs[i]); b->bhs[i] = NULL; } - get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; + this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); per_cpu(bh_accounting, cpu).nr = 0; - put_cpu_var(bh_accounting); } static int buffer_cpu_notify(struct notifier_block *self, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 158c700fdca5..fa7ca04ee816 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -40,12 +40,13 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - dentry->d_op = &ceph_dentry_ops; + if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ + ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) + d_set_d_op(dentry, &ceph_dentry_ops); else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - dentry->d_op = &ceph_snapdir_dentry_ops; + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); else - dentry->d_op = &ceph_snap_dentry_ops; + d_set_d_op(dentry, &ceph_snap_dentry_ops); di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) @@ -111,7 +112,7 @@ static int __dcache_readdir(struct file *filp, dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, last); - spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); /* start at beginning? */ if (filp->f_pos == 2 || last == NULL || @@ -135,6 +136,7 @@ more: fi->at_end = 1; goto out_unlock; } + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (!d_unhashed(dentry) && dentry->d_inode && ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && @@ -144,13 +146,15 @@ more: dentry->d_name.len, dentry->d_name.name, di->offset, filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", !dentry->d_inode ? " null" : ""); + spin_unlock(&dentry->d_lock); p = p->prev; dentry = list_entry(p, struct dentry, d_u.d_child); di = ceph_dentry(dentry); } - atomic_inc(&dentry->d_count); - spin_unlock(&dcache_lock); + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); @@ -176,19 +180,19 @@ more: filp->f_pos++; - /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ + /* make sure a dentry wasn't dropped while we didn't have parent lock */ if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); err = -EAGAIN; goto out; } - spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; out_unlock: - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); out: if (last) dput(last); @@ -986,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) */ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *dir = dentry->d_parent->d_inode; + struct inode *dir; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + dir = dentry->d_parent->d_inode; dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode, diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8d79b8912e31..7d0e4a82d898 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file) static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, - int *checkeof, bool align_to_pages) + int *checkeof, bool align_to_pages, + unsigned long buf_align) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); @@ -307,7 +308,7 @@ static int striped_read(struct inode *inode, more: if (align_to_pages) - page_align = (pos - io_align) & ~PAGE_MASK; + page_align = (pos - io_align + buf_align) & ~PAGE_MASK; else page_align = pos & ~PAGE_MASK; this_len = left; @@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, struct inode *inode = file->f_dentry->d_inode; struct page **pages; u64 off = *poff; - int num_pages = calc_pages_for(off, len); - int ret; + int num_pages, ret; dout("sync_read on file %p %llu~%u %s\n", file, off, len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); - if (file->f_flags & O_DIRECT) - pages = ceph_get_direct_page_vector(data, num_pages); - else + if (file->f_flags & O_DIRECT) { + num_pages = calc_pages_for((unsigned long)data, len); + pages = ceph_get_direct_page_vector(data, num_pages, true); + } else { + num_pages = calc_pages_for(off, len); pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + } if (IS_ERR(pages)) return PTR_ERR(pages); @@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, goto done; ret = striped_read(inode, off, len, pages, num_pages, checkeof, - file->f_flags & O_DIRECT); + file->f_flags & O_DIRECT, + (unsigned long)data & ~PAGE_MASK); if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) ret = ceph_copy_page_vector_to_user(pages, data, off, ret); @@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, done: if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, true); else ceph_release_page_vector(pages, num_pages); dout("sync_read result %d\n", ret); @@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, int do_sync = 0; int check_caps = 0; int page_align, io_align; + unsigned long buf_align; int ret; struct timespec mtime = CURRENT_TIME; @@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, pos = *offset; io_align = pos & ~PAGE_MASK; + buf_align = (unsigned long)data & ~PAGE_MASK; ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) @@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, */ more: len = left; - if (file->f_flags & O_DIRECT) + if (file->f_flags & O_DIRECT) { /* write from beginning of first page, regardless of io alignment */ - page_align = (pos - io_align) & ~PAGE_MASK; - else + page_align = (pos - io_align + buf_align) & ~PAGE_MASK; + num_pages = calc_pages_for((unsigned long)data, len); + } else { page_align = pos & ~PAGE_MASK; + num_pages = calc_pages_for(pos, len); + } req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), pos, &len, CEPH_OSD_OP_WRITE, flags, @@ -512,10 +521,8 @@ more: if (!req) return -ENOMEM; - num_pages = calc_pages_for(pos, len); - if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages); + pages = ceph_get_direct_page_vector(data, num_pages, false); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -565,7 +572,7 @@ more: } if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, false); else if (file->f_flags & O_SYNC) ceph_release_page_vector(pages, num_pages); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bf1286588f26..e61de4f7b99d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb) return &ci->vfs_inode; } +static void ceph_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ceph_inode_info *ci = ceph_inode(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ceph_inode_cachep, ci); +} + void ceph_destroy_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode) if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); - kmem_cache_free(ceph_inode_cachep, ci); + call_rcu(&inode->i_rcu, ceph_i_callback); } @@ -841,13 +850,13 @@ static void ceph_set_dentry_offset(struct dentry *dn) di->offset = ceph_inode(inode)->i_max_offset++; spin_unlock(&inode->i_lock); - spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); + spin_lock(&dir->d_lock); + spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&dir->d_lock); } /* @@ -879,8 +888,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } else if (realdn) { dout("dn %p (%d) spliced with %p (%d) " "inode %p ino %llx.%llx\n", - dn, atomic_read(&dn->d_count), - realdn, atomic_read(&realdn->d_count), + dn, dn->d_count, + realdn, realdn->d_count, realdn->d_inode, ceph_vinop(realdn->d_inode)); dput(dn); dn = realdn; @@ -1231,11 +1240,11 @@ retry_lookup: goto retry_lookup; } else { /* reorder parent's d_subdirs */ - spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); + spin_lock(&parent->d_lock); + spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); list_move(&dn->d_u.d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); } di = dn->d_fsdata; @@ -1772,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask) * Check inode permissions. We verify we have a valid value for * the AUTH cap, then call the generic handler. */ -int ceph_permission(struct inode *inode, int mask) +int ceph_permission(struct inode *inode, int mask, unsigned int flags) { - int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); + int err; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); if (!err) - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask, flags, NULL); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 38800eaa81d0..a50fca1e03be 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1486,7 +1486,7 @@ retry: *base = ceph_ino(temp->d_inode); *plen = len; dout("build_path on %p %d built %llx '%.*s'\n", - dentry, atomic_read(&dentry->d_count), *base, len, path); + dentry, dentry->d_count, *base, len, path); return path; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7f01728a4657..4553d8829edb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode); extern void ceph_queue_writeback(struct inode *inode); extern int ceph_do_getattr(struct inode *inode, int mask); -extern int ceph_permission(struct inode *inode, int mask); +extern int ceph_permission(struct inode *inode, int mask, unsigned int flags); extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 224d7bbd1fcc..e654dfd092c3 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -64,7 +64,9 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data, void *buffer, uint16_t maxbuf) { const struct TCP_Server_Info *server = cookie_netfs_data; - const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr; + const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr; + const struct sockaddr_in *addr = (struct sockaddr_in *) sa; + const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa; struct cifs_server_key *key = buffer; uint16_t key_len = sizeof(struct cifs_server_key); @@ -76,16 +78,16 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data, */ switch (sa->sa_family) { case AF_INET: - key->family = server->addr.sockAddr.sin_family; - key->port = server->addr.sockAddr.sin_port; - key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr; + key->family = sa->sa_family; + key->port = addr->sin_port; + key->addr[0].ipv4_addr = addr->sin_addr; key_len += sizeof(key->addr[0].ipv4_addr); break; case AF_INET6: - key->family = server->addr.sockAddr6.sin6_family; - key->port = server->addr.sockAddr6.sin6_port; - key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr; + key->family = sa->sa_family; + key->port = addr6->sin6_port; + key->addr[0].ipv6_addr = addr6->sin6_addr; key_len += sizeof(key->addr[0].ipv6_addr); break; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 103ab8b605b0..ede98300a8cd 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -119,29 +119,27 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) "Display Internal CIFS Data Structures for Debugging\n" "---------------------------------------------------\n"); seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); - seq_printf(m, "Features: "); + seq_printf(m, "Features:"); #ifdef CONFIG_CIFS_DFS_UPCALL - seq_printf(m, "dfs"); - seq_putc(m, ' '); + seq_printf(m, " dfs"); #endif #ifdef CONFIG_CIFS_FSCACHE - seq_printf(m, "fscache"); - seq_putc(m, ' '); + seq_printf(m, " fscache"); #endif #ifdef CONFIG_CIFS_WEAK_PW_HASH - seq_printf(m, "lanman"); - seq_putc(m, ' '); + seq_printf(m, " lanman"); #endif #ifdef CONFIG_CIFS_POSIX - seq_printf(m, "posix"); - seq_putc(m, ' '); + seq_printf(m, " posix"); #endif #ifdef CONFIG_CIFS_UPCALL - seq_printf(m, "spnego"); - seq_putc(m, ' '); + seq_printf(m, " spnego"); #endif #ifdef CONFIG_CIFS_XATTR - seq_printf(m, "xattr"); + seq_printf(m, " xattr"); +#endif +#ifdef CONFIG_CIFS_ACL + seq_printf(m, " acl"); #endif seq_putc(m, '\n'); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 87044906cd1f..4dfba8283165 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -98,6 +98,8 @@ struct key * cifs_get_spnego_key(struct cifsSesInfo *sesInfo) { struct TCP_Server_Info *server = sesInfo->server; + struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr; char *description, *dp; size_t desc_len; struct key *spnego_key; @@ -127,10 +129,10 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) dp = description + strlen(description); /* add the server address */ - if (server->addr.sockAddr.sin_family == AF_INET) - sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); - else if (server->addr.sockAddr.sin_family == AF_INET6) - sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr); + if (server->dstaddr.ss_family == AF_INET) + sprintf(dp, "ip4=%pI4", &sa->sin_addr); + else if (server->dstaddr.ss_family == AF_INET6) + sprintf(dp, "ip6=%pI6", &sa6->sin6_addr); else goto out; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index f856732161ab..66f3d50d0676 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -72,6 +72,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, return 0; } +/* must be called with server->srv_mutex held */ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -84,14 +85,12 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) return rc; - spin_lock(&GlobalMid_Lock); cifs_pdu->Signature.Sequence.SequenceNumber = cpu_to_le32(server->sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; *pexpected_response_sequence_number = server->sequence_number++; server->sequence_number++; - spin_unlock(&GlobalMid_Lock); rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); if (rc) @@ -149,6 +148,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, return rc; } +/* must be called with server->srv_mutex held */ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -162,14 +162,12 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) return rc; - spin_lock(&GlobalMid_Lock); cifs_pdu->Signature.Sequence.SequenceNumber = cpu_to_le32(server->sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; *pexpected_response_sequence_number = server->sequence_number++; server->sequence_number++; - spin_unlock(&GlobalMid_Lock); rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); if (rc) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3936aa7f2c22..5e7075d5f139 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int cifs_permission(struct inode *inode, int mask) +static int cifs_permission(struct inode *inode, int mask, unsigned int flags) { struct cifs_sb_info *cifs_sb; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + cifs_sb = CIFS_SB(inode->i_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { @@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask) on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } static struct kmem_cache *cifs_inode_cachep; @@ -326,6 +329,8 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->invalid_mapping = false; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ cifs_inode->server_eof = 0; + cifs_inode->uniqueid = 0; + cifs_inode->createtime = 0; /* Can not set i_flags here - they get immediately overwritten to zero by the VFS */ @@ -334,10 +339,17 @@ cifs_alloc_inode(struct super_block *sb) return &cifs_inode->vfs_inode; } +static void cifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); +} + static void cifs_destroy_inode(struct inode *inode) { - kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); + call_rcu(&inode->i_rcu, cifs_i_callback); } static void @@ -351,18 +363,19 @@ cifs_evict_inode(struct inode *inode) static void cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) { + struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr; + seq_printf(s, ",addr="); - switch (server->addr.sockAddr.sin_family) { + switch (server->dstaddr.ss_family) { case AF_INET: - seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr); + seq_printf(s, "%pI4", &sa->sin_addr.s_addr); break; case AF_INET6: - seq_printf(s, "%pI6", - &server->addr.sockAddr6.sin6_addr.s6_addr); - if (server->addr.sockAddr6.sin6_scope_id) - seq_printf(s, "%%%u", - server->addr.sockAddr6.sin6_scope_id); + seq_printf(s, "%pI6", &sa6->sin6_addr.s6_addr); + if (sa6->sin6_scope_id) + seq_printf(s, "%%%u", sa6->sin6_scope_id); break; default: seq_printf(s, "(unknown)"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7136c0c3e2f9..606ca8bb7102 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -163,10 +163,7 @@ struct TCP_Server_Info { char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; char *hostname; /* hostname portion of UNC string */ struct socket *ssocket; - union { - struct sockaddr_in sockAddr; - struct sockaddr_in6 sockAddr6; - } addr; + struct sockaddr_storage dstaddr; struct sockaddr_storage srcaddr; /* locally bind to this IP */ wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ @@ -210,7 +207,7 @@ struct TCP_Server_Info { char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; - __u32 sequence_number; /* needed for CIFS PDU signature */ + __u32 sequence_number; /* for signing, protected by srv_mutex */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ u16 dialect; /* dialect index that server chose */ @@ -456,6 +453,7 @@ struct cifsInodeInfo { bool invalid_mapping:1; /* pagecache is invalid */ u64 server_eof; /* current file size on server */ u64 uniqueid; /* server inode number */ + u64 createtime; /* creation time on server */ #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; #endif @@ -576,6 +574,7 @@ struct cifs_fattr { u64 cf_uniqueid; u64 cf_eof; u64 cf_bytes; + u64 cf_createtime; uid_t cf_uid; gid_t cf_gid; umode_t cf_mode; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 67acfb3acad2..2f6795e524d3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -401,15 +401,12 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { cFYI(1, "Kerberos only mechanism, enable extended security"); pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; - } -#ifdef CONFIG_CIFS_EXPERIMENTAL - else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) + } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { cFYI(1, "NTLMSSP only mechanism, enable extended security"); pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; } -#endif count = 0; for (i = 0; i < CIFS_NUM_PROT; i++) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index cc1a8604a790..a65d311d163a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -64,8 +64,8 @@ struct smb_vol { char *UNC; char *UNCip; char *iocharset; /* local code page for mapping to and from Unicode */ - char source_rfc1001_name[16]; /* netbios name of client */ - char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ + char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ + char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ uid_t cred_uid; uid_t linux_uid; gid_t linux_gid; @@ -115,8 +115,8 @@ struct smb_vol { #define TLINK_ERROR_EXPIRE (1 * HZ) #define TLINK_IDLE_EXPIRE (600 * HZ) -static int ipv4_connect(struct TCP_Server_Info *server); -static int ipv6_connect(struct TCP_Server_Info *server); +static int ip_connect(struct TCP_Server_Info *server); +static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); static void cifs_prune_tlinks(struct work_struct *work); @@ -200,10 +200,9 @@ cifs_reconnect(struct TCP_Server_Info *server) while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) { try_to_freeze(); - if (server->addr.sockAddr6.sin6_family == AF_INET6) - rc = ipv6_connect(server); - else - rc = ipv4_connect(server); + + /* we should try only the port we connected to before */ + rc = generic_ip_connect(server); if (rc) { cFYI(1, "reconnect error %d", rc); msleep(3000); @@ -477,7 +476,7 @@ incomplete_rcv: * initialize frame) */ cifs_set_port((struct sockaddr *) - &server->addr.sockAddr, CIFS_PORT); + &server->dstaddr, CIFS_PORT); cifs_reconnect(server); csocket = server->ssocket; wake_up(&server->response_q); @@ -817,11 +816,11 @@ cifs_parse_mount_options(char *options, const char *devname, * informational, only used for servers that do not support * port 445 and it can be overridden at mount time */ - memset(vol->source_rfc1001_name, 0x20, 15); - for (i = 0; i < strnlen(nodename, 15); i++) + memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN); + for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++) vol->source_rfc1001_name[i] = toupper(nodename[i]); - vol->source_rfc1001_name[15] = 0; + vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0; /* null target name indicates to use *SMBSERVR default called name if we end up sending RFC1001 session initialize */ vol->target_rfc1001_name[0] = 0; @@ -985,13 +984,11 @@ cifs_parse_mount_options(char *options, const char *devname, return 1; } else if (strnicmp(value, "krb5", 4) == 0) { vol->secFlg |= CIFSSEC_MAY_KRB5; -#ifdef CONFIG_CIFS_EXPERIMENTAL } else if (strnicmp(value, "ntlmsspi", 8) == 0) { vol->secFlg |= CIFSSEC_MAY_NTLMSSP | CIFSSEC_MUST_SIGN; } else if (strnicmp(value, "ntlmssp", 7) == 0) { vol->secFlg |= CIFSSEC_MAY_NTLMSSP; -#endif } else if (strnicmp(value, "ntlmv2i", 7) == 0) { vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN; @@ -1168,22 +1165,22 @@ cifs_parse_mount_options(char *options, const char *devname, if (!value || !*value || (*value == ' ')) { cFYI(1, "invalid (empty) netbiosname"); } else { - memset(vol->source_rfc1001_name, 0x20, 15); - for (i = 0; i < 15; i++) { - /* BB are there cases in which a comma can be - valid in this workstation netbios name (and need - special handling)? */ - - /* We do not uppercase netbiosname for user */ + memset(vol->source_rfc1001_name, 0x20, + RFC1001_NAME_LEN); + /* + * FIXME: are there cases in which a comma can + * be valid in workstation netbios name (and + * need special handling)? + */ + for (i = 0; i < RFC1001_NAME_LEN; i++) { + /* don't ucase netbiosname for user */ if (value[i] == 0) break; - else - vol->source_rfc1001_name[i] = - value[i]; + vol->source_rfc1001_name[i] = value[i]; } /* The string has 16th byte zero still from set at top of the function */ - if ((i == 15) && (value[i] != 0)) + if (i == RFC1001_NAME_LEN && value[i] != 0) printk(KERN_WARNING "CIFS: netbiosname" " longer than 15 truncated.\n"); } @@ -1193,7 +1190,8 @@ cifs_parse_mount_options(char *options, const char *devname, cFYI(1, "empty server netbiosname specified"); } else { /* last byte, type, is 0x20 for servr type */ - memset(vol->target_rfc1001_name, 0x20, 16); + memset(vol->target_rfc1001_name, 0x20, + RFC1001_NAME_LEN_WITH_NULL); for (i = 0; i < 15; i++) { /* BB are there cases in which a comma can be @@ -1210,7 +1208,7 @@ cifs_parse_mount_options(char *options, const char *devname, } /* The string has 16th byte zero still from set at top of the function */ - if ((i == 15) && (value[i] != 0)) + if (i == RFC1001_NAME_LEN && value[i] != 0) printk(KERN_WARNING "CIFS: server net" "biosname longer than 15 truncated.\n"); } @@ -1341,10 +1339,8 @@ cifs_parse_mount_options(char *options, const char *devname, vol->no_psx_acl = 0; } else if (strnicmp(data, "noacl", 5) == 0) { vol->no_psx_acl = 1; -#ifdef CONFIG_CIFS_EXPERIMENTAL } else if (strnicmp(data, "locallease", 6) == 0) { vol->local_lease = 1; -#endif } else if (strnicmp(data, "sign", 4) == 0) { vol->secFlg |= CIFSSEC_MUST_SIGN; } else if (strnicmp(data, "seal", 4) == 0) { @@ -1454,35 +1450,71 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs) } } +/* + * If no port is specified in addr structure, we try to match with 445 port + * and if it fails - with 139 ports. It should be called only if address + * families of server and addr are equal. + */ +static bool +match_port(struct TCP_Server_Info *server, struct sockaddr *addr) +{ + unsigned short int port, *sport; + + switch (addr->sa_family) { + case AF_INET: + sport = &((struct sockaddr_in *) &server->dstaddr)->sin_port; + port = ((struct sockaddr_in *) addr)->sin_port; + break; + case AF_INET6: + sport = &((struct sockaddr_in6 *) &server->dstaddr)->sin6_port; + port = ((struct sockaddr_in6 *) addr)->sin6_port; + break; + default: + WARN_ON(1); + return false; + } + + if (!port) { + port = htons(CIFS_PORT); + if (port == *sport) + return true; + + port = htons(RFC1001_PORT); + } + + return port == *sport; +} static bool match_address(struct TCP_Server_Info *server, struct sockaddr *addr, struct sockaddr *srcaddr) { - struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; - struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; - switch (addr->sa_family) { - case AF_INET: - if (addr4->sin_addr.s_addr != - server->addr.sockAddr.sin_addr.s_addr) - return false; - if (addr4->sin_port && - addr4->sin_port != server->addr.sockAddr.sin_port) + case AF_INET: { + struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; + struct sockaddr_in *srv_addr4 = + (struct sockaddr_in *)&server->dstaddr; + + if (addr4->sin_addr.s_addr != srv_addr4->sin_addr.s_addr) return false; break; - case AF_INET6: + } + case AF_INET6: { + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; + struct sockaddr_in6 *srv_addr6 = + (struct sockaddr_in6 *)&server->dstaddr; + if (!ipv6_addr_equal(&addr6->sin6_addr, - &server->addr.sockAddr6.sin6_addr)) + &srv_addr6->sin6_addr)) return false; - if (addr6->sin6_scope_id != - server->addr.sockAddr6.sin6_scope_id) - return false; - if (addr6->sin6_port && - addr6->sin6_port != server->addr.sockAddr6.sin6_port) + if (addr6->sin6_scope_id != srv_addr6->sin6_scope_id) return false; break; } + default: + WARN_ON(1); + return false; /* don't expect to be here */ + } if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr)) return false; @@ -1549,6 +1581,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) (struct sockaddr *)&vol->srcaddr)) continue; + if (!match_port(server, addr)) + continue; + if (!match_security(server, vol)) continue; @@ -1681,14 +1716,13 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cFYI(1, "attempting ipv6 connect"); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - memcpy(&tcp_ses->addr.sockAddr6, sin_server6, - sizeof(struct sockaddr_in6)); - rc = ipv6_connect(tcp_ses); - } else { - memcpy(&tcp_ses->addr.sockAddr, sin_server, - sizeof(struct sockaddr_in)); - rc = ipv4_connect(tcp_ses); - } + memcpy(&tcp_ses->dstaddr, sin_server6, + sizeof(struct sockaddr_in6)); + } else + memcpy(&tcp_ses->dstaddr, sin_server, + sizeof(struct sockaddr_in)); + + rc = ip_connect(tcp_ses); if (rc < 0) { cERROR(1, "Error connecting to socket. Aborting operation"); goto out_err_crypto_release; @@ -1793,6 +1827,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) { int rc = -ENOMEM, xid; struct cifsSesInfo *ses; + struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; xid = GetXid(); @@ -1836,12 +1872,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) /* new SMB session uses our server ref */ ses->server = server; - if (server->addr.sockAddr6.sin6_family == AF_INET6) - sprintf(ses->serverName, "%pI6", - &server->addr.sockAddr6.sin6_addr); + if (server->dstaddr.ss_family == AF_INET6) + sprintf(ses->serverName, "%pI6", &addr6->sin6_addr); else - sprintf(ses->serverName, "%pI4", - &server->addr.sockAddr.sin_addr.s_addr); + sprintf(ses->serverName, "%pI4", &addr->sin_addr); if (volume_info->username) strncpy(ses->userName, volume_info->username, @@ -2136,19 +2170,106 @@ bind_socket(struct TCP_Server_Info *server) } static int -ipv4_connect(struct TCP_Server_Info *server) +ip_rfc1001_connect(struct TCP_Server_Info *server) +{ + int rc = 0; + /* + * some servers require RFC1001 sessinit before sending + * negprot - BB check reconnection in case where second + * sessinit is sent but no second negprot + */ + struct rfc1002_session_packet *ses_init_buf; + struct smb_hdr *smb_buf; + ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), + GFP_KERNEL); + if (ses_init_buf) { + ses_init_buf->trailer.session_req.called_len = 32; + + if (server->server_RFC1001_name && + server->server_RFC1001_name[0] != 0) + rfc1002mangle(ses_init_buf->trailer. + session_req.called_name, + server->server_RFC1001_name, + RFC1001_NAME_LEN_WITH_NULL); + else + rfc1002mangle(ses_init_buf->trailer. + session_req.called_name, + DEFAULT_CIFS_CALLED_NAME, + RFC1001_NAME_LEN_WITH_NULL); + + ses_init_buf->trailer.session_req.calling_len = 32; + + /* + * calling name ends in null (byte 16) from old smb + * convention. + */ + if (server->workstation_RFC1001_name && + server->workstation_RFC1001_name[0] != 0) + rfc1002mangle(ses_init_buf->trailer. + session_req.calling_name, + server->workstation_RFC1001_name, + RFC1001_NAME_LEN_WITH_NULL); + else + rfc1002mangle(ses_init_buf->trailer. + session_req.calling_name, + "LINUX_CIFS_CLNT", + RFC1001_NAME_LEN_WITH_NULL); + + ses_init_buf->trailer.session_req.scope1 = 0; + ses_init_buf->trailer.session_req.scope2 = 0; + smb_buf = (struct smb_hdr *)ses_init_buf; + + /* sizeof RFC1002_SESSION_REQUEST with no scope */ + smb_buf->smb_buf_length = 0x81000044; + rc = smb_send(server, smb_buf, 0x44); + kfree(ses_init_buf); + /* + * RFC1001 layer in at least one server + * requires very short break before negprot + * presumably because not expecting negprot + * to follow so fast. This is a simple + * solution that works without + * complicating the code and causes no + * significant slowing down on mount + * for everyone else + */ + usleep_range(1000, 2000); + } + /* + * else the negprot may still work without this + * even though malloc failed + */ + + return rc; +} + +static int +generic_ip_connect(struct TCP_Server_Info *server) { int rc = 0; - int val; - bool connected = false; - __be16 orig_port = 0; + unsigned short int sport; + int slen, sfamily; struct socket *socket = server->ssocket; + struct sockaddr *saddr; + + saddr = (struct sockaddr *) &server->dstaddr; + + if (server->dstaddr.ss_family == AF_INET6) { + sport = ((struct sockaddr_in6 *) saddr)->sin6_port; + slen = sizeof(struct sockaddr_in6); + sfamily = AF_INET6; + } else { + sport = ((struct sockaddr_in *) saddr)->sin_port; + slen = sizeof(struct sockaddr_in); + sfamily = AF_INET; + } if (socket == NULL) { - rc = sock_create_kern(PF_INET, SOCK_STREAM, + rc = sock_create_kern(sfamily, SOCK_STREAM, IPPROTO_TCP, &socket); if (rc < 0) { cERROR(1, "Error %d creating socket", rc); + server->ssocket = NULL; return rc; } @@ -2156,63 +2277,28 @@ ipv4_connect(struct TCP_Server_Info *server) cFYI(1, "Socket created"); server->ssocket = socket; socket->sk->sk_allocation = GFP_NOFS; - cifs_reclassify_socket4(socket); + if (sfamily == AF_INET6) + cifs_reclassify_socket6(socket); + else + cifs_reclassify_socket4(socket); } rc = bind_socket(server); if (rc < 0) return rc; - /* user overrode default port */ - if (server->addr.sockAddr.sin_port) { - rc = socket->ops->connect(socket, (struct sockaddr *) - &server->addr.sockAddr, - sizeof(struct sockaddr_in), 0); - if (rc >= 0) - connected = true; - } - - if (!connected) { - /* save original port so we can retry user specified port - later if fall back ports fail this time */ - orig_port = server->addr.sockAddr.sin_port; - - /* do not retry on the same port we just failed on */ - if (server->addr.sockAddr.sin_port != htons(CIFS_PORT)) { - server->addr.sockAddr.sin_port = htons(CIFS_PORT); - rc = socket->ops->connect(socket, - (struct sockaddr *) - &server->addr.sockAddr, - sizeof(struct sockaddr_in), 0); - if (rc >= 0) - connected = true; - } - } - if (!connected) { - server->addr.sockAddr.sin_port = htons(RFC1001_PORT); - rc = socket->ops->connect(socket, (struct sockaddr *) - &server->addr.sockAddr, - sizeof(struct sockaddr_in), 0); - if (rc >= 0) - connected = true; - } - - /* give up here - unless we want to retry on different - protocol families some day */ - if (!connected) { - if (orig_port) - server->addr.sockAddr.sin_port = orig_port; - cFYI(1, "Error %d connecting to server via ipv4", rc); + rc = socket->ops->connect(socket, saddr, slen, 0); + if (rc < 0) { + cFYI(1, "Error %d connecting to server", rc); sock_release(socket); server->ssocket = NULL; return rc; } - /* * Eventually check for other socket options to change from - * the default. sock_setsockopt not used because it expects - * user space buffer + * the default. sock_setsockopt not used because it expects + * user space buffer */ socket->sk->sk_rcvtimeo = 7 * HZ; socket->sk->sk_sndtimeo = 5 * HZ; @@ -2226,7 +2312,7 @@ ipv4_connect(struct TCP_Server_Info *server) } if (server->tcp_nodelay) { - val = 1; + int val = 1; rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); if (rc) @@ -2237,161 +2323,39 @@ ipv4_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); - /* send RFC1001 sessinit */ - if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) { - /* some servers require RFC1001 sessinit before sending - negprot - BB check reconnection in case where second - sessinit is sent but no second negprot */ - struct rfc1002_session_packet *ses_init_buf; - struct smb_hdr *smb_buf; - ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), - GFP_KERNEL); - if (ses_init_buf) { - ses_init_buf->trailer.session_req.called_len = 32; - if (server->server_RFC1001_name && - server->server_RFC1001_name[0] != 0) - rfc1002mangle(ses_init_buf->trailer. - session_req.called_name, - server->server_RFC1001_name, - RFC1001_NAME_LEN_WITH_NULL); - else - rfc1002mangle(ses_init_buf->trailer. - session_req.called_name, - DEFAULT_CIFS_CALLED_NAME, - RFC1001_NAME_LEN_WITH_NULL); - - ses_init_buf->trailer.session_req.calling_len = 32; - - /* calling name ends in null (byte 16) from old smb - convention. */ - if (server->workstation_RFC1001_name && - server->workstation_RFC1001_name[0] != 0) - rfc1002mangle(ses_init_buf->trailer. - session_req.calling_name, - server->workstation_RFC1001_name, - RFC1001_NAME_LEN_WITH_NULL); - else - rfc1002mangle(ses_init_buf->trailer. - session_req.calling_name, - "LINUX_CIFS_CLNT", - RFC1001_NAME_LEN_WITH_NULL); - - ses_init_buf->trailer.session_req.scope1 = 0; - ses_init_buf->trailer.session_req.scope2 = 0; - smb_buf = (struct smb_hdr *)ses_init_buf; - /* sizeof RFC1002_SESSION_REQUEST with no scope */ - smb_buf->smb_buf_length = 0x81000044; - rc = smb_send(server, smb_buf, 0x44); - kfree(ses_init_buf); - msleep(1); /* RFC1001 layer in at least one server - requires very short break before negprot - presumably because not expecting negprot - to follow so fast. This is a simple - solution that works without - complicating the code and causes no - significant slowing down on mount - for everyone else */ - } - /* else the negprot may still work without this - even though malloc failed */ - - } + if (sport == htons(RFC1001_PORT)) + rc = ip_rfc1001_connect(server); return rc; } static int -ipv6_connect(struct TCP_Server_Info *server) +ip_connect(struct TCP_Server_Info *server) { - int rc = 0; - int val; - bool connected = false; - __be16 orig_port = 0; - struct socket *socket = server->ssocket; + unsigned short int *sport; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; + struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; - if (socket == NULL) { - rc = sock_create_kern(PF_INET6, SOCK_STREAM, - IPPROTO_TCP, &socket); - if (rc < 0) { - cERROR(1, "Error %d creating ipv6 socket", rc); - socket = NULL; - return rc; - } + if (server->dstaddr.ss_family == AF_INET6) + sport = &addr6->sin6_port; + else + sport = &addr->sin_port; - /* BB other socket options to set KEEPALIVE, NODELAY? */ - cFYI(1, "ipv6 Socket created"); - server->ssocket = socket; - socket->sk->sk_allocation = GFP_NOFS; - cifs_reclassify_socket6(socket); - } + if (*sport == 0) { + int rc; - rc = bind_socket(server); - if (rc < 0) - return rc; + /* try with 445 port at first */ + *sport = htons(CIFS_PORT); - /* user overrode default port */ - if (server->addr.sockAddr6.sin6_port) { - rc = socket->ops->connect(socket, - (struct sockaddr *) &server->addr.sockAddr6, - sizeof(struct sockaddr_in6), 0); - if (rc >= 0) - connected = true; - } - - if (!connected) { - /* save original port so we can retry user specified port - later if fall back ports fail this time */ - - orig_port = server->addr.sockAddr6.sin6_port; - /* do not retry on the same port we just failed on */ - if (server->addr.sockAddr6.sin6_port != htons(CIFS_PORT)) { - server->addr.sockAddr6.sin6_port = htons(CIFS_PORT); - rc = socket->ops->connect(socket, (struct sockaddr *) - &server->addr.sockAddr6, - sizeof(struct sockaddr_in6), 0); - if (rc >= 0) - connected = true; - } - } - if (!connected) { - server->addr.sockAddr6.sin6_port = htons(RFC1001_PORT); - rc = socket->ops->connect(socket, (struct sockaddr *) - &server->addr.sockAddr6, - sizeof(struct sockaddr_in6), 0); + rc = generic_ip_connect(server); if (rc >= 0) - connected = true; - } - - /* give up here - unless we want to retry on different - protocol families some day */ - if (!connected) { - if (orig_port) - server->addr.sockAddr6.sin6_port = orig_port; - cFYI(1, "Error %d connecting to server via ipv6", rc); - sock_release(socket); - server->ssocket = NULL; - return rc; - } - - /* - * Eventually check for other socket options to change from - * the default. sock_setsockopt not used because it expects - * user space buffer - */ - socket->sk->sk_rcvtimeo = 7 * HZ; - socket->sk->sk_sndtimeo = 5 * HZ; + return rc; - if (server->tcp_nodelay) { - val = 1; - rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - if (rc) - cFYI(1, "set TCP_NODELAY socket option error %d", rc); + /* if it failed, try with 139 port */ + *sport = htons(RFC1001_PORT); } - server->ssocket = socket; - - return rc; + return generic_ip_connect(server); } void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3840eddbfb7a..2e773825835e 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -293,10 +293,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle, + current->tgid); } else { /* BB implement mode setting via Windows security descriptors e.g. */ @@ -421,9 +419,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +602,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +632,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ @@ -656,22 +654,37 @@ lookup_out: static int cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { - int isValid = 1; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; if (direntry->d_inode) { if (cifs_revalidate_dentry(direntry)) return 0; - } else { - cFYI(1, "neg dentry 0x%p name = %s", - direntry, direntry->d_name.name); - if (time_after(jiffies, direntry->d_time + HZ) || - !lookupCacheEnabled) { - d_drop(direntry); - isValid = 0; - } + else + return 1; + } + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; } - return isValid; + if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) + return 0; + + return 1; } /* static int cifs_d_delete(struct dentry *direntry) @@ -688,9 +701,10 @@ const struct dentry_operations cifs_dentry_ops = { /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; -static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) +static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *q) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; int i; @@ -703,21 +717,16 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) return 0; } -static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, - struct qstr *b) +static int cifs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; - - if ((a->len == b->len) && - (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) { - /* - * To preserve case, don't let an existing negative dentry's - * case take precedence. If a is not a negative dentry, this - * should have no side effects - */ - memcpy((void *)a->name, b->name, a->len); + struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; + + if ((name->len == len) && + (nls_strnicmp(codepage, name->name, str, len) == 0)) return 0; - } return 1; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5a28660ca2b5..d843631c028d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -104,53 +104,6 @@ static inline int cifs_get_disposition(unsigned int flags) return FILE_OPEN; } -static inline int cifs_open_inode_helper(struct inode *inode, - struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf, - char *full_path, int xid) -{ - struct cifsInodeInfo *pCifsInode = CIFS_I(inode); - struct timespec temp; - int rc; - - if (pCifsInode->clientCanCacheRead) { - /* we have the inode open somewhere else - no need to discard cache data */ - goto client_can_cache; - } - - /* BB need same check in cifs_create too? */ - /* if not oplocked, invalidate inode pages if mtime or file - size changed */ - temp = cifs_NTtimeToUnix(buf->LastWriteTime); - if (timespec_equal(&inode->i_mtime, &temp) && - (inode->i_size == - (loff_t)le64_to_cpu(buf->EndOfFile))) { - cFYI(1, "inode unchanged on server"); - } else { - if (inode->i_mapping) { - /* BB no need to lock inode until after invalidate - since namei code should already have it locked? */ - rc = filemap_write_and_wait(inode->i_mapping); - mapping_set_error(inode->i_mapping, rc); - } - cFYI(1, "invalidating remote inode since open detected it " - "changed"); - invalidate_remote_inode(inode); - } - -client_can_cache: - if (pTcon->unix_ext) - rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, - xid); - else - rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, - xid, NULL); - - cifs_set_oplock_level(pCifsInode, oplock); - - return rc; -} - int cifs_posix_open(char *full_path, struct inode **pinode, struct super_block *sb, int mode, unsigned int f_flags, __u32 *poplock, __u16 *pnetfid, int xid) @@ -213,6 +166,76 @@ posix_open_ret: return rc; } +static int +cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, + struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock, + __u16 *pnetfid, int xid) +{ + int rc; + int desiredAccess; + int disposition; + FILE_ALL_INFO *buf; + + desiredAccess = cifs_convert_flags(f_flags); + +/********************************************************************* + * open flag mapping table: + * + * POSIX Flag CIFS Disposition + * ---------- ---------------- + * O_CREAT FILE_OPEN_IF + * O_CREAT | O_EXCL FILE_CREATE + * O_CREAT | O_TRUNC FILE_OVERWRITE_IF + * O_TRUNC FILE_OVERWRITE + * none of the above FILE_OPEN + * + * Note that there is not a direct match between disposition + * FILE_SUPERSEDE (ie create whether or not file exists although + * O_CREAT | O_TRUNC is similar but truncates the existing + * file rather than creating a new file as FILE_SUPERSEDE does + * (which uses the attributes / metadata passed in on open call) + *? + *? O_SYNC is a reasonable match to CIFS writethrough flag + *? and the read write flags match reasonably. O_LARGEFILE + *? is irrelevant because largefile support is always used + *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, + * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation + *********************************************************************/ + + disposition = cifs_get_disposition(f_flags); + + /* BB pass O_SYNC flag through on file attributes .. BB */ + + buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (tcon->ses->capabilities & CAP_NT_SMBS) + rc = CIFSSMBOpen(xid, tcon, full_path, disposition, + desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags + & CIFS_MOUNT_MAP_SPECIAL_CHR); + else + rc = SMBLegacyOpen(xid, tcon, full_path, disposition, + desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags + & CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc) + goto out; + + if (tcon->unix_ext) + rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, + xid); + else + rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, + xid, pnetfid); + +out: + kfree(buf); + return rc; +} + struct cifsFileInfo * cifs_new_fileinfo(__u16 fileHandle, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -317,10 +340,8 @@ int cifs_open(struct inode *inode, struct file *file) struct cifsFileInfo *pCifsFile = NULL; struct cifsInodeInfo *pCifsInode; char *full_path = NULL; - int desiredAccess; - int disposition; + bool posix_open_ok = false; __u16 netfid; - FILE_ALL_INFO *buf = NULL; xid = GetXid(); @@ -358,17 +379,7 @@ int cifs_open(struct inode *inode, struct file *file) file->f_flags, &oplock, &netfid, xid); if (rc == 0) { cFYI(1, "posix open succeeded"); - - pCifsFile = cifs_new_fileinfo(netfid, file, tlink, - oplock); - if (pCifsFile == NULL) { - CIFSSMBClose(xid, tcon, netfid); - rc = -ENOMEM; - } - - cifs_fscache_set_inode_cookie(inode, file); - - goto out; + posix_open_ok = true; } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { if (tcon->ses->serverNOS) cERROR(1, "server %s of type %s returned" @@ -385,103 +396,39 @@ int cifs_open(struct inode *inode, struct file *file) or DFS errors */ } - desiredAccess = cifs_convert_flags(file->f_flags); - -/********************************************************************* - * open flag mapping table: - * - * POSIX Flag CIFS Disposition - * ---------- ---------------- - * O_CREAT FILE_OPEN_IF - * O_CREAT | O_EXCL FILE_CREATE - * O_CREAT | O_TRUNC FILE_OVERWRITE_IF - * O_TRUNC FILE_OVERWRITE - * none of the above FILE_OPEN - * - * Note that there is not a direct match between disposition - * FILE_SUPERSEDE (ie create whether or not file exists although - * O_CREAT | O_TRUNC is similar but truncates the existing - * file rather than creating a new file as FILE_SUPERSEDE does - * (which uses the attributes / metadata passed in on open call) - *? - *? O_SYNC is a reasonable match to CIFS writethrough flag - *? and the read write flags match reasonably. O_LARGEFILE - *? is irrelevant because largefile support is always used - *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, - * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation - *********************************************************************/ - - disposition = cifs_get_disposition(file->f_flags); - - /* BB pass O_SYNC flag through on file attributes .. BB */ - - /* Also refresh inode by passing in file_info buf returned by SMBOpen - and calling get_inode_info with returned buf (at least helps - non-Unix server case) */ - - /* BB we can not do this if this is the second open of a file - and the first handle has writebehind data, we might be - able to simply do a filemap_fdatawrite/filemap_fdatawait first */ - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (!buf) { - rc = -ENOMEM; - goto out; - } - - if (tcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBOpen(xid, tcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = -EIO; /* no NT SMB support fall into legacy open below */ - - if (rc == -EIO) { - /* Old server, try legacy style OpenX */ - rc = SMBLegacyOpen(xid, tcon, full_path, disposition, - desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags - & CIFS_MOUNT_MAP_SPECIAL_CHR); - } - if (rc) { - cFYI(1, "cifs_open returned 0x%x", rc); - goto out; + if (!posix_open_ok) { + rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, + file->f_flags, &oplock, &netfid, xid); + if (rc) + goto out; } - rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid); - if (rc != 0) - goto out; - pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); if (pCifsFile == NULL) { + CIFSSMBClose(xid, tcon, netfid); rc = -ENOMEM; goto out; } cifs_fscache_set_inode_cookie(inode, file); - if (oplock & CIFS_CREATE_ACTION) { + if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { /* time to set mode which we can not set earlier due to problems creating new read-only files */ - if (tcon->unix_ext) { - struct cifs_unix_set_info_args args = { - .mode = inode->i_mode, - .uid = NO_CHANGE_64, - .gid = NO_CHANGE_64, - .ctime = NO_CHANGE_64, - .atime = NO_CHANGE_64, - .mtime = NO_CHANGE_64, - .device = 0, - }; - CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - } + struct cifs_unix_set_info_args args = { + .mode = inode->i_mode, + .uid = NO_CHANGE_64, + .gid = NO_CHANGE_64, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid, + pCifsFile->pid); } out: - kfree(buf); kfree(full_path); FreeXid(xid); cifs_put_tlink(tlink); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 589f3e3f6e00..0c7e36910e31 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -518,6 +518,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_eof = le64_to_cpu(info->EndOfFile); fattr->cf_bytes = le64_to_cpu(info->AllocationSize); + fattr->cf_createtime = le64_to_cpu(info->CreationTime); if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; @@ -779,6 +780,10 @@ cifs_find_inode(struct inode *inode, void *opaque) if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; + /* use createtime like an i_generation field */ + if (CIFS_I(inode)->createtime != fattr->cf_createtime) + return 0; + /* don't match inode of different type */ if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) return 0; @@ -796,6 +801,7 @@ cifs_init_inode(struct inode *inode, void *opaque) struct cifs_fattr *fattr = (struct cifs_fattr *) opaque; CIFS_I(inode)->uniqueid = fattr->cf_uniqueid; + CIFS_I(inode)->createtime = fattr->cf_createtime; return 0; } @@ -809,14 +815,14 @@ inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); return true; } } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); return false; } @@ -1319,9 +1325,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1369,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7b..fe2f6a93c49e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a73eb9f4bdaf..76b1b37c9e6b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, cFYI(1, "For %s", name->name); if (parent->d_op && parent->d_op->d_hash) - parent->d_op->d_hash(parent, name); + parent->d_op->d_hash(parent, parent->d_inode, name); else name->hash = full_name_hash(name->name, name->len); @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { @@ -160,6 +160,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes); fattr->cf_eof = le64_to_cpu(info->EndOfFile); fattr->cf_bytes = le64_to_cpu(info->AllocationSize); + fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7b01d3f6eed6..eb746486e49e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -420,7 +420,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, return 0; } -#ifdef CONFIG_CIFS_EXPERIMENTAL /* BB Move to ntlmssp.c eventually */ /* We do not malloc the blob, it is passed in pbuffer, because @@ -431,13 +430,14 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer; __u32 flags; + memset(pbuffer, 0, sizeof(NEGOTIATE_MESSAGE)); memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmNegotiate; /* BB is NTLMV2 session security format easier to use here? */ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM; + NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { flags |= NTLMSSP_NEGOTIATE_SIGN; @@ -446,7 +446,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, NTLMSSP_NEGOTIATE_EXTENDED_SEC; } - sec_blob->NegotiateFlags |= cpu_to_le32(flags); + sec_blob->NegotiateFlags = cpu_to_le32(flags); sec_blob->WorkstationName.BufferOffset = 0; sec_blob->WorkstationName.Length = 0; @@ -477,7 +477,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM; + NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) flags |= NTLMSSP_NEGOTIATE_SIGN; @@ -485,7 +485,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); - sec_blob->NegotiateFlags |= cpu_to_le32(flags); + sec_blob->NegotiateFlags = cpu_to_le32(flags); sec_blob->LmChallengeResponse.BufferOffset = cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE)); @@ -544,8 +544,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; - if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && - !calc_seckey(ses)) { + if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) || + (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) + && !calc_seckey(ses)) { memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); @@ -563,17 +564,6 @@ setup_ntlmv2_ret: return rc; } - -static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, - struct cifsSesInfo *ses) -{ - build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses); - pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); - - return; -} -#endif - int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const struct nls_table *nls_cp) @@ -814,71 +804,70 @@ ssetup_ntlmssp_authenticate: rc = -ENOSYS; goto ssetup_exit; #endif /* CONFIG_CIFS_UPCALL */ - } else { -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (type == RawNTLMSSP) { - if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { - cERROR(1, "NTLMSSP requires Unicode support"); - rc = -ENOSYS; + } else if (type == RawNTLMSSP) { + if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { + cERROR(1, "NTLMSSP requires Unicode support"); + rc = -ENOSYS; + goto ssetup_exit; + } + + cFYI(1, "ntlmssp session setup phase %d", phase); + pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; + capabilities |= CAP_EXTENDED_SECURITY; + pSMB->req.Capabilities |= cpu_to_le32(capabilities); + switch(phase) { + case NtLmNegotiate: + build_ntlmssp_negotiate_blob( + pSMB->req.SecurityBlob, ses); + iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); + iov[1].iov_base = pSMB->req.SecurityBlob; + pSMB->req.SecurityBlobLength = + cpu_to_le16(sizeof(NEGOTIATE_MESSAGE)); + break; + case NtLmAuthenticate: + /* + * 5 is an empirical value, large enough to hold + * authenticate message plus max 10 of av paris, + * domain, user, workstation names, flags, etc. + */ + ntlmsspblob = kzalloc( + 5*sizeof(struct _AUTHENTICATE_MESSAGE), + GFP_KERNEL); + if (!ntlmsspblob) { + cERROR(1, "Can't allocate NTLMSSP blob"); + rc = -ENOMEM; goto ssetup_exit; } - cFYI(1, "ntlmssp session setup phase %d", phase); - pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; - capabilities |= CAP_EXTENDED_SECURITY; - pSMB->req.Capabilities |= cpu_to_le32(capabilities); - if (phase == NtLmNegotiate) { - setup_ntlmssp_neg_req(pSMB, ses); - iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); - iov[1].iov_base = &pSMB->req.SecurityBlob[0]; - } else if (phase == NtLmAuthenticate) { - /* 5 is an empirical value, large enought to - * hold authenticate message, max 10 of - * av paris, doamin,user,workstation mames, - * flags etc.. - */ - ntlmsspblob = kmalloc( - 5*sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - cERROR(1, "Can't allocate NTLMSSP"); - rc = -ENOMEM; - goto ssetup_exit; - } - - rc = build_ntlmssp_auth_blob(ntlmsspblob, - &blob_len, ses, nls_cp); - if (rc) - goto ssetup_exit; - iov[1].iov_len = blob_len; - iov[1].iov_base = ntlmsspblob; - pSMB->req.SecurityBlobLength = - cpu_to_le16(blob_len); - /* Make sure that we tell the server that we - are using the uid that it just gave us back - on the response (challenge) */ - smb_buf->Uid = ses->Suid; - } else { - cERROR(1, "invalid phase %d", phase); - rc = -ENOSYS; + rc = build_ntlmssp_auth_blob(ntlmsspblob, + &blob_len, ses, nls_cp); + if (rc) goto ssetup_exit; - } - /* unicode strings must be word aligned */ - if ((iov[0].iov_len + iov[1].iov_len) % 2) { - *bcc_ptr = 0; - bcc_ptr++; - } - unicode_oslm_strings(&bcc_ptr, nls_cp); - } else { - cERROR(1, "secType %d not supported!", type); + iov[1].iov_len = blob_len; + iov[1].iov_base = ntlmsspblob; + pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); + /* + * Make sure that we tell the server that we are using + * the uid that it just gave us back on the response + * (challenge) + */ + smb_buf->Uid = ses->Suid; + break; + default: + cERROR(1, "invalid phase %d", phase); rc = -ENOSYS; goto ssetup_exit; } -#else + /* unicode strings must be word aligned */ + if ((iov[0].iov_len + iov[1].iov_len) % 2) { + *bcc_ptr = 0; + bcc_ptr++; + } + unicode_oslm_strings(&bcc_ptr, nls_cp); + } else { cERROR(1, "secType %d not supported!", type); rc = -ENOSYS; goto ssetup_exit; -#endif } iov[2].iov_base = str_area; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e0588cdf4cc5..59ca81b16919 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -119,7 +119,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ - smb_msg.msg_name = (struct sockaddr *) &server->addr.sockAddr; + smb_msg.msg_name = (struct sockaddr *) &server->dstaddr; smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 9060f08e70cf..5525e1c660fd 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct list_head *child; struct dentry *de; - spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); list_for_each(child, &parent->d_subdirs) { de = list_entry(child, struct dentry, d_u.d_child); @@ -102,7 +102,7 @@ static void coda_flag_children(struct dentry *parent, int flag) continue; coda_flag_inode(de->d_inode, flag); } - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); return; } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 5d8b35539601..29badd91360f 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -18,6 +18,7 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/spinlock.h> +#include <linux/namei.h> #include <asm/uaccess.h> @@ -47,7 +48,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir); /* dentry ops */ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); -static int coda_dentry_delete(struct dentry *); +static int coda_dentry_delete(const struct dentry *); /* support routines */ static int coda_venus_readdir(struct file *coda_file, void *buf, @@ -125,7 +126,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - entry->d_op = &coda_dentry_operations; + d_set_d_op(entry, &coda_dentry_operations); if (inode && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); @@ -134,10 +135,13 @@ exit: } -int coda_permission(struct inode *inode, int mask) +int coda_permission(struct inode *inode, int mask, unsigned int flags) { int error; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (!mask) @@ -541,9 +545,13 @@ out: /* called when a cache lookup succeeds */ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) { - struct inode *inode = de->d_inode; + struct inode *inode; struct coda_inode_info *cii; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = de->d_inode; if (!inode || coda_isroot(inode)) goto out; if (is_bad_inode(inode)) @@ -559,7 +567,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) if (cii->c_flags & C_FLUSH) coda_flag_inode_children(inode, C_FLUSH); - if (atomic_read(&de->d_count) > 1) + if (de->d_count > 1) /* pretend it's valid, but don't change the flags */ goto out; @@ -577,7 +585,7 @@ out: * This is the callback from dput() when d_count is going to 0. * We use this to unhash dentries with bad inodes. */ -static int coda_dentry_delete(struct dentry * dentry) +static int coda_dentry_delete(const struct dentry * dentry) { int flags; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 5ea57c8c7f97..50dc7d189f56 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void coda_destroy_inode(struct inode *inode) +static void coda_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(coda_inode_cachep, ITOC(inode)); } +static void coda_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, coda_i_callback); +} + static void init_once(void *foo) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 2fd89b5c5c7b..741f0bd03918 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -24,7 +24,7 @@ #include <linux/coda_psdev.h> /* pioctl ops */ -static int coda_ioctl_permission(struct inode *inode, int mask); +static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags); static long coda_pioctl(struct file *filp, unsigned int cmd, unsigned long user_data); @@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = { }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct inode *inode, int mask) +static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; return (mask & MAY_EXEC) ? -EACCES : 0; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a60579b007b0..61abb638b4bf 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -42,7 +42,7 @@ #include <linux/tty.h> #include <linux/vt_kern.h> #include <linux/fb.h> -#include <linux/videodev.h> +#include <linux/videodev2.h> #include <linux/netdevice.h> #include <linux/raw.h> #include <linux/blkdev.h> @@ -836,6 +836,7 @@ COMPATIBLE_IOCTL(TCSETSW) COMPATIBLE_IOCTL(TCSETSF) COMPATIBLE_IOCTL(TIOCLINUX) COMPATIBLE_IOCTL(TIOCSBRK) +COMPATIBLE_IOCTL(TIOCGDEV) COMPATIBLE_IOCTL(TIOCCBRK) COMPATIBLE_IOCTL(TIOCGSID) COMPATIBLE_IOCTL(TIOCGICOUNT) diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index da6061a6df40..026cf68553a4 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry { struct config_item * item = NULL; - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_ITEM_LINK) { @@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry } else item = config_item_get(sd->s_element); } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); return item; } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 0b502f80c691..36637a8c1ed3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry, * We _must_ delete our dentries on last dput, as the chain-to-parent * behavior is required to clear the parents of default_groups. */ -static int configfs_d_delete(struct dentry *dentry) +static int configfs_d_delete(const struct dentry *dentry) { return 1; } @@ -232,10 +232,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, sd->s_mode = mode; sd->s_dentry = dentry; - if (dentry) { + if (dentry) dentry->d_fsdata = configfs_get(sd); - dentry->d_op = &configfs_dentry_ops; - } return 0; } @@ -278,7 +276,6 @@ static int create_dir(struct config_item * k, struct dentry * p, error = configfs_create(d, mode, init_dir); if (!error) { inc_nlink(p->d_inode); - (d)->d_op = &configfs_dentry_ops; } else { struct configfs_dirent *sd = d->d_fsdata; if (sd) { @@ -371,9 +368,7 @@ int configfs_create_link(struct configfs_symlink *sl, CONFIGFS_ITEM_LINK); if (!err) { err = configfs_create(dentry, mode, init_symlink); - if (!err) - dentry->d_op = &configfs_dentry_ops; - else { + if (err) { struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { spin_lock(&configfs_dirent_lock); @@ -399,8 +394,7 @@ static void remove_dir(struct dentry * d) if (d->d_inode) simple_rmdir(parent->d_inode,d); - pr_debug(" o %s removing done (%d)\n",d->d_name.name, - atomic_read(&d->d_count)); + pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count); dput(parent); } @@ -448,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den return error; } - dentry->d_op = &configfs_dentry_ops; + d_set_d_op(dentry, &configfs_dentry_ops); d_rehash(dentry); return 0; @@ -493,7 +487,11 @@ static struct dentry * configfs_lookup(struct inode *dir, * If it doesn't exist and it isn't a NOT_PINNED item, * it must be negative. */ - return simple_lookup(dir, dentry, nd); + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_set_d_op(dentry, &configfs_dentry_ops); + d_add(dentry, NULL); + return NULL; } out: @@ -685,6 +683,7 @@ static int create_default_group(struct config_group *parent_group, ret = -ENOMEM; child = d_alloc(parent, &name); if (child) { + d_set_d_op(child, &configfs_dentry_ops); d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, @@ -1682,6 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); if (dentry) { + d_set_d_op(dentry, &configfs_dentry_ops); d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 253476d78ed8..c83f4768eeaa 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) struct dentry * dentry = sd->s_dentry; if (dentry) { - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { - dget_locked(dentry); + dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, dentry); - } else { + } else spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - } } } diff --git a/fs/dcache.c b/fs/dcache.c index 23702a9d4e6d..5699d4c027cb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -33,20 +33,58 @@ #include <linux/bootmem.h> #include <linux/fs_struct.h> #include <linux/hardirq.h> +#include <linux/bit_spinlock.h> +#include <linux/rculist_bl.h> #include "internal.h" +/* + * Usage: + * dcache->d_inode->i_lock protects: + * - i_dentry, d_alias, d_inode of aliases + * dcache_hash_bucket lock protects: + * - the dcache hash table + * s_anon bl list spinlock protects: + * - the s_anon list (see __d_drop) + * dcache_lru_lock protects: + * - the dcache lru lists and counters + * d_lock protects: + * - d_flags + * - d_name + * - d_lru + * - d_count + * - d_unhashed() + * - d_parent and d_subdirs + * - childrens' d_child and d_parent + * - d_alias, d_inode + * + * Ordering: + * dentry->d_inode->i_lock + * dentry->d_lock + * dcache_lru_lock + * dcache_hash_bucket lock + * s_anon lock + * + * If there is an ancestor relationship: + * dentry->d_parent->...->d_parent->d_lock + * ... + * dentry->d_parent->d_lock + * dentry->d_lock + * + * If no ancestor relationship: + * if (dentry1 < dentry2) + * dentry1->d_lock + * dentry2->d_lock + */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); -EXPORT_SYMBOL(dcache_lock); +EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; -static struct hlist_head *dentry_hashtable __read_mostly; + +struct dcache_hash_bucket { + struct hlist_bl_head head; +}; +static struct dcache_hash_bucket *dentry_hashtable __read_mostly; + +static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, + unsigned long hash) +{ + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + return dentry_hashtable + (hash & D_HASHMASK); +} + +static inline void spin_lock_bucket(struct dcache_hash_bucket *b) +{ + bit_spin_lock(0, (unsigned long *)&b->head.first); +} + +static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) +{ + __bit_spin_unlock(0, (unsigned long *)&b->head.first); +} /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; -static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(unsigned int, nr_dentry); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +static int get_nr_dentry(void) +{ + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); - dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); + dentry_stat.nr_dentry = get_nr_dentry(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head) } /* - * no dcache_lock, please. + * no locks, please. */ static void d_free(struct dentry *dentry) { - percpu_counter_dec(&nr_dentry); + BUG_ON(dentry->d_count); + this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_unhashed(&dentry->d_hash)) + if (hlist_bl_unhashed(&dentry->d_hash)) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); } +/** + * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * After this call, in-progress rcu-walk path lookup will fail. This + * should be called after unhashing, and after changing d_inode (if + * the dentry has not already been unhashed). + */ +static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +{ + assert_spin_locked(&dentry->d_lock); + /* Go through a barrier */ + write_seqcount_barrier(&dentry->d_seq); +} + /* * Release the dentry's inode, using the filesystem - * d_iput() operation if defined. + * d_iput() operation if defined. Dentry has no refcount + * and is unhashed. */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) - __releases(dcache_lock) + __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } } /* - * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. + * Release the dentry's inode, using the filesystem + * d_iput() operation if defined. dentry remains in-use. + */ +static void dentry_unlink_inode(struct dentry * dentry) + __releases(dentry->d_lock) + __releases(dentry->d_inode->i_lock) +{ + struct inode *inode = dentry->d_inode; + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); +} + +/* + * dentry_lru_(add|del|move_tail) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { if (list_empty(&dentry->d_lru)) { + spin_lock(&dcache_lru_lock); list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; + spin_unlock(&dcache_lru_lock); } } +static void __dentry_lru_del(struct dentry *dentry) +{ + list_del_init(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; +} + static void dentry_lru_del(struct dentry *dentry) { if (!list_empty(&dentry->d_lru)) { - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - percpu_counter_dec(&nr_dentry_unused); + spin_lock(&dcache_lru_lock); + __dentry_lru_del(dentry); + spin_unlock(&dcache_lru_lock); } } static void dentry_lru_move_tail(struct dentry *dentry) { + spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; - percpu_counter_inc(&nr_dentry_unused); + dentry_stat.nr_unused++; } else { list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); } + spin_unlock(&dcache_lru_lock); } /** @@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry) * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. + * + * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by + * d_kill. */ -static struct dentry *d_kill(struct dentry *dentry) +static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(dentry->d_lock) - __releases(dcache_lock) + __releases(parent->d_lock) + __releases(dentry->d_inode->i_lock) { - struct dentry *parent; - + dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); - /*drops the locks, at that point nobody can reach this dentry */ + if (parent) + spin_unlock(&parent->d_lock); dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + d_free(dentry); + return parent; +} + +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent dentry hashes, so that it won't + * be found through a VFS lookup any more. Note that this is different from + * deleting the dentry - d_delete will try to mark the dentry negative if + * possible, giving a successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants to invalidate a dentry for some + * reason (NFS timeouts or autofs deletes). + * + * __d_drop requires dentry->d_lock. + */ +void __d_drop(struct dentry *dentry) +{ + if (!(dentry->d_flags & DCACHE_UNHASHED)) { + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { + bit_spin_lock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_init(&dentry->d_hash); + __bit_spin_unlock(0, + (unsigned long *)&dentry->d_sb->s_anon.first); + } else { + struct dcache_hash_bucket *b; + b = d_hash(dentry->d_parent, dentry->d_name.hash); + spin_lock_bucket(b); + /* + * We may not actually need to put DCACHE_UNHASHED + * manipulations under the hash lock, but follow + * the principle of least surprise. + */ + dentry->d_flags |= DCACHE_UNHASHED; + hlist_bl_del_rcu(&dentry->d_hash); + spin_unlock_bucket(b); + dentry_rcuwalk_barrier(dentry); + } + } +} +EXPORT_SYMBOL(__d_drop); + +void d_drop(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(d_drop); + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) + __releases(dentry->d_lock) +{ + struct inode *inode; + struct dentry *parent; + + inode = dentry->d_inode; + if (inode && !spin_trylock(&inode->i_lock)) { +relock: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ + } if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; - d_free(dentry); - return parent; + if (parent && !spin_trylock(&parent->d_lock)) { + if (inode) + spin_unlock(&inode->i_lock); + goto relock; + } + + if (ref) + dentry->d_count--; + /* if dentry was on the d_lru list delete it from there */ + dentry_lru_del(dentry); + /* if it was on the hash then remove it */ + __d_drop(dentry); + return d_kill(dentry, parent); } /* @@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry) * call the dentry unlink method as well as removing it from the queues and * releasing its resources. If the parent dentries were scheduled for release * they too may now get deleted. - * - * no dcache lock, please. */ - void dput(struct dentry *dentry) { if (!dentry) return; repeat: - if (atomic_read(&dentry->d_count) == 1) + if (dentry->d_count == 1) might_sleep(); - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) - return; - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + BUG_ON(!dentry->d_count); + if (dentry->d_count > 1) { + dentry->d_count--; spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return; } - /* - * AV: ->d_delete() is _NOT_ allowed to block now. - */ - if (dentry->d_op && dentry->d_op->d_delete) { + if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) - goto unhash_it; + goto kill_it; } /* Unreachable? Get rid of it */ @@ -252,16 +451,12 @@ repeat: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); return; -unhash_it: - __d_drop(dentry); kill_it: - /* if dentry was on the d_lru list delete it from there */ - dentry_lru_del(dentry); - dentry = d_kill(dentry); + dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); return 0; } /* @@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); } /* @@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry) * we might still populate it if it was a * working directory or similar). */ - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } EXPORT_SYMBOL(d_invalidate); -/* This should be called _only_ with dcache_lock held */ -static inline struct dentry * __dget_locked(struct dentry *dentry) +/* This must be called with d_lock held */ +static inline void __dget_dlock(struct dentry *dentry) { - atomic_inc(&dentry->d_count); - dentry_lru_del(dentry); - return dentry; + dentry->d_count++; } -struct dentry * dget_locked(struct dentry *dentry) +static inline void __dget(struct dentry *dentry) { - return __dget_locked(dentry); + spin_lock(&dentry->d_lock); + __dget_dlock(dentry); + spin_unlock(&dentry->d_lock); +} + +struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + +repeat: + /* + * Don't need rcu_dereference because we re-check it was correct under + * the lock. + */ + rcu_read_lock(); + ret = dentry->d_parent; + if (!ret) { + rcu_read_unlock(); + goto out; + } + spin_lock(&ret->d_lock); + if (unlikely(ret != dentry->d_parent)) { + spin_unlock(&ret->d_lock); + rcu_read_unlock(); + goto repeat; + } + rcu_read_unlock(); + BUG_ON(!ret->d_count); + ret->d_count++; + spin_unlock(&ret->d_lock); +out: + return ret; } -EXPORT_SYMBOL(dget_locked); +EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode @@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked); * any other hashed alias over that one unless @want_discon is set, * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ - -static struct dentry * __d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { - struct list_head *head, *next, *tmp; - struct dentry *alias, *discon_alias=NULL; + struct dentry *alias, *discon_alias; - head = &inode->i_dentry; - next = inode->i_dentry.next; - while (next != head) { - tmp = next; - next = tmp->next; - prefetch(next); - alias = list_entry(tmp, struct dentry, d_alias); +again: + discon_alias = NULL; + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && - (alias->d_flags & DCACHE_DISCONNECTED)) + (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - else if (!want_discon) { - __dget_locked(alias); + } else if (!want_discon) { + __dget_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + } + spin_unlock(&alias->d_lock); + } + if (discon_alias) { + alias = discon_alias; + spin_lock(&alias->d_lock); + if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) { + __dget_dlock(alias); + spin_unlock(&alias->d_lock); return alias; } } + spin_unlock(&alias->d_lock); + goto again; } - if (discon_alias) - __dget_locked(discon_alias); - return discon_alias; + return NULL; } -struct dentry * d_find_alias(struct inode *inode) +struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } return de; } @@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { - __dget_locked(dentry); + if (!dentry->d_count) { + __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_prune_aliases); /* - * Throw away a dentry - free the inode, dput the parent. This requires that - * the LRU list has already been removed. + * Try to throw away a dentry - free the inode, dput the parent. + * Requires dentry->d_lock is held, and dentry->d_count == 0. + * Releases dentry->d_lock. * - * Try to prune ancestors as well. This is necessary to prevent - * quadratic behavior of shrink_dcache_parent(), but is also expected - * to be beneficial in reducing dentry cache fragmentation. + * This may fail if locks cannot be acquired no problem, just try again. */ -static void prune_one_dentry(struct dentry * dentry) +static void try_prune_one_dentry(struct dentry *dentry) __releases(dentry->d_lock) - __releases(dcache_lock) - __acquires(dcache_lock) { - __d_drop(dentry); - dentry = d_kill(dentry); + struct dentry *parent; + parent = dentry_kill(dentry, 0); /* - * Prune ancestors. Locking is simpler than in dput(), - * because dcache_lock needs to be taken anyway. + * If dentry_kill returns NULL, we have nothing more to do. + * if it returns the same dentry, trylocks failed. In either + * case, just loop again. + * + * Otherwise, we need to prune ancestors too. This is necessary + * to prevent quadratic behavior of shrink_dcache_parent(), but + * is also expected to be beneficial in reducing dentry cache + * fragmentation. */ - spin_lock(&dcache_lock); + if (!parent) + return; + if (parent == dentry) + return; + + /* Prune ancestors. */ + dentry = parent; while (dentry) { - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) + spin_lock(&dentry->d_lock); + if (dentry->d_count > 1) { + dentry->d_count--; + spin_unlock(&dentry->d_lock); return; - - if (dentry->d_op && dentry->d_op->d_delete) - dentry->d_op->d_delete(dentry); - dentry_lru_del(dentry); - __d_drop(dentry); - dentry = d_kill(dentry); - spin_lock(&dcache_lock); + } + dentry = dentry_kill(dentry, 1); } } @@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list) { struct dentry *dentry; - while (!list_empty(list)) { - dentry = list_entry(list->prev, struct dentry, d_lru); - dentry_lru_del(dentry); + rcu_read_lock(); + for (;;) { + dentry = list_entry_rcu(list->prev, struct dentry, d_lru); + if (&dentry->d_lru == list) + break; /* empty */ + spin_lock(&dentry->d_lock); + if (dentry != list_entry(list->prev, struct dentry, d_lru)) { + spin_unlock(&dentry->d_lock); + continue; + } /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { + if (dentry->d_count) { + dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } - prune_one_dentry(dentry); - /* dentry->d_lock was dropped in prune_one_dentry() */ - cond_resched_lock(&dcache_lock); + + rcu_read_unlock(); + + try_prune_one_dentry(dentry); + + rcu_read_lock(); } + rcu_read_unlock(); } /** @@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) LIST_HEAD(tmp); int cnt = *count; - spin_lock(&dcache_lock); +relock: + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { dentry = list_entry(sb->s_dentry_lru.prev, struct dentry, d_lru); BUG_ON(dentry->d_sb != sb); + if (!spin_trylock(&dentry->d_lock)) { + spin_unlock(&dcache_lru_lock); + cpu_relax(); + goto relock; + } + /* * If we are honouring the DCACHE_REFERENCED flag and the * dentry has this flag set, don't free it. Clear the flag * and put it back on the LRU. */ - if (flags & DCACHE_REFERENCED) { - spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - cond_resched_lock(&dcache_lock); - continue; - } + if (flags & DCACHE_REFERENCED && + dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + list_move(&dentry->d_lru, &referenced); spin_unlock(&dentry->d_lock); + } else { + list_move_tail(&dentry->d_lru, &tmp); + spin_unlock(&dentry->d_lock); + if (!--cnt) + break; } - - list_move_tail(&dentry->d_lru, &tmp); - if (!--cnt) - break; - cond_resched_lock(&dcache_lock); + cond_resched_lock(&dcache_lru_lock); } - - *count = cnt; - shrink_dentry_list(&tmp); - if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); - spin_unlock(&dcache_lock); + spin_unlock(&dcache_lru_lock); + shrink_dentry_list(&tmp); + + *count = cnt; } /** @@ -546,13 +796,12 @@ static void prune_dcache(int count) { struct super_block *sb, *p = NULL; int w_count; - int unused = percpu_counter_sum_positive(&nr_dentry_unused); + int unused = dentry_stat.nr_unused; int prune_ratio; int pruned; if (unused == 0 || count == 0) return; - spin_lock(&dcache_lock); if (count >= unused) prune_ratio = 1; else @@ -589,11 +838,9 @@ static void prune_dcache(int count) if (down_read_trylock(&sb->s_umount)) { if ((sb->s_root != NULL) && (!list_empty(&sb->s_dentry_lru))) { - spin_unlock(&dcache_lock); __shrink_dcache_sb(sb, &w_count, DCACHE_REFERENCED); pruned -= w_count; - spin_lock(&dcache_lock); } up_read(&sb->s_umount); } @@ -609,7 +856,6 @@ static void prune_dcache(int count) if (p) __put_super(p); spin_unlock(&sb_lock); - spin_unlock(&dcache_lock); } /** @@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb) { LIST_HEAD(tmp); - spin_lock(&dcache_lock); + spin_lock(&dcache_lru_lock); while (!list_empty(&sb->s_dentry_lru)) { list_splice_init(&sb->s_dentry_lru, &tmp); + spin_unlock(&dcache_lru_lock); shrink_dentry_list(&tmp); + spin_lock(&dcache_lru_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&dcache_lru_lock); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); dentry_lru_del(dentry); __d_drop(dentry); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); for (;;) { /* descend to the first leaf in the current subtree */ @@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { + spin_lock_nested(&loop->d_lock, + DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); __d_drop(loop); - cond_resched_lock(&dcache_lock); + spin_unlock(&loop->d_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) do { struct inode *inode; - if (atomic_read(&dentry->d_count) != 0) { + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - atomic_read(&dentry->d_count), + dentry->d_count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); } - if (IS_ROOT(dentry)) + if (IS_ROOT(dentry)) { parent = NULL; - else { + list_del(&dentry->d_u.d_child); + } else { parent = dentry->d_parent; - atomic_dec(&parent->d_count); + spin_lock(&parent->d_lock); + parent->d_count--; + list_del(&dentry->d_u.d_child); + spin_unlock(&parent->d_lock); } - list_del(&dentry->d_u.d_child); detached++; inode = dentry->d_inode; @@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock, and only need dcache_lock when - * removing the dentry from the system lists and hashes because: + * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore @@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + dentry->d_count--; + spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); - while (!hlist_empty(&sb->s_anon)) { - dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); shrink_dcache_for_umount_subtree(dentry); } } @@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb) * Return true if the parent or its subdirectories contain * a mount point */ - int have_submounts(struct dentry *parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; + int locked = 0; + + seq = read_seqbegin(&rename_lock); +again: + this_parent = parent; - spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -784,27 +1043,65 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) + if (d_mountpoint(dentry)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&this_parent->d_lock); goto positive; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); + spin_unlock(&this_parent->d_lock); + child = this_parent; + this_parent = tmp; + spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } - spin_unlock(&dcache_lock); + spin_unlock(&this_parent->d_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 0; /* No mount points found in tree */ positive: - spin_unlock(&dcache_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return 1; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } EXPORT_SYMBOL(have_submounts); @@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts); */ static int select_parent(struct dentry * parent) { - struct dentry *this_parent = parent; + struct dentry *this_parent; struct list_head *next; + unsigned seq; int found = 0; + int locked = 0; - spin_lock(&dcache_lock); + seq = read_seqbegin(&rename_lock); +again: + this_parent = parent; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -837,11 +1139,13 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + /* * move only zero ref count dentries to the end * of the unused list for prune_dcache */ - if (!atomic_read(&dentry->d_count)) { + if (!dentry->d_count) { dentry_lru_move_tail(dentry); found++; } else { @@ -853,28 +1157,63 @@ resume: * ensures forward progress). We'll be coming back to find * the rest. */ - if (found && need_resched()) + if (found && need_resched()) { + spin_unlock(&dentry->d_lock); goto out; + } /* * Descend a level if the d_subdirs list is non-empty. */ if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } + + spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_u.d_child.next; - this_parent = this_parent->d_parent; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); + spin_unlock(&this_parent->d_lock); + child = this_parent; + this_parent = tmp; + spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } out: - spin_unlock(&dcache_lock); + spin_unlock(&this_parent->d_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); return found; + +rename_retry: + if (found) + return found; + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** @@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent); */ static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) { - int nr_unused; - if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; prune_dcache(nr); } - nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); - return (nr_unused / 100) * sysctl_vfs_cache_pressure; + return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } static struct shrinker dcache_shrinker = { @@ -960,38 +1296,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) memcpy(dname, name->name, name->len); dname[name->len] = 0; - atomic_set(&dentry->d_count, 1); + dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); + seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; dentry->d_parent = NULL; dentry->d_sb = NULL; dentry->d_op = NULL; dentry->d_fsdata = NULL; - dentry->d_mounted = 0; - INIT_HLIST_NODE(&dentry->d_hash); + INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); + INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - dentry->d_parent = dget(parent); + spin_lock(&parent->d_lock); + /* + * don't need child lock because it is not subject + * to concurrency here + */ + __dget_dlock(parent); + dentry->d_parent = parent; dentry->d_sb = parent->d_sb; - } else { - INIT_LIST_HEAD(&dentry->d_u.d_child); - } - - spin_lock(&dcache_lock); - if (parent) list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); + } - percpu_counter_inc(&nr_dentry); + this_cpu_inc(nr_dentry); return dentry; } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) +{ + struct dentry *dentry = d_alloc(NULL, name); + if (dentry) { + dentry->d_sb = sb; + dentry->d_parent = dentry; + dentry->d_flags |= DCACHE_DISCONNECTED; + } + return dentry; +} +EXPORT_SYMBOL(d_alloc_pseudo); + struct dentry *d_alloc_name(struct dentry *parent, const char *name) { struct qstr q; @@ -1003,12 +1353,36 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); -/* the caller must hold dcache_lock */ +void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) +{ + BUG_ON(dentry->d_op); + BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | + DCACHE_OP_COMPARE | + DCACHE_OP_REVALIDATE | + DCACHE_OP_DELETE )); + dentry->d_op = op; + if (!op) + return; + if (op->d_hash) + dentry->d_flags |= DCACHE_OP_HASH; + if (op->d_compare) + dentry->d_flags |= DCACHE_OP_COMPARE; + if (op->d_revalidate) + dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_delete) + dentry->d_flags |= DCACHE_OP_DELETE; + +} +EXPORT_SYMBOL(d_set_d_op); + static void __d_instantiate(struct dentry *dentry, struct inode *inode) { + spin_lock(&dentry->d_lock); if (inode) list_add(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + dentry_rcuwalk_barrier(dentry); + spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } @@ -1030,9 +1404,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); + if (inode) + spin_lock(&inode->i_lock); __d_instantiate(entry, inode); - spin_unlock(&dcache_lock); + if (inode) + spin_unlock(&inode->i_lock); security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1069,15 +1445,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct qstr *qstr = &alias->d_name; + /* + * Don't need alias->d_lock here, because aliases with + * d_parent == entry->d_parent are not subject to name or + * parent changes, because the parent inode i_mutex is held. + */ if (qstr->hash != hash) continue; if (alias->d_parent != entry->d_parent) continue; - if (qstr->len != len) + if (dentry_cmp(qstr->name, qstr->len, name, len)) continue; - if (memcmp(qstr->name, name, len)) - continue; - dget_locked(alias); + __dget(alias); return alias; } @@ -1091,9 +1470,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); + if (inode) + spin_lock(&inode->i_lock); result = __d_instantiate_unique(entry, inode); - spin_unlock(&dcache_lock); + if (inode) + spin_unlock(&inode->i_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1134,14 +1515,6 @@ struct dentry * d_alloc_root(struct inode * root_inode) } EXPORT_SYMBOL(d_alloc_root); -static inline struct hlist_head *d_hash(struct dentry *parent, - unsigned long hash) -{ - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); -} - /** * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for @@ -1182,10 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode) } tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&dcache_lock); + + spin_lock(&inode->i_lock); res = __d_find_alias(inode, 0); if (res) { - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); dput(tmp); goto out_iput; } @@ -1195,12 +1569,14 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_sb = inode->i_sb; tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - tmp->d_flags &= ~DCACHE_UNHASHED; list_add(&tmp->d_alias, &inode->i_dentry); - hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); + bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); + tmp->d_flags &= ~DCACHE_UNHASHED; + hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); + __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); spin_unlock(&tmp->d_lock); + spin_unlock(&inode->i_lock); - spin_unlock(&dcache_lock); return tmp; out_iput: @@ -1230,18 +1606,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); iput(inode); } else { - /* already taking dcache_lock, so d_add() by hand */ + /* already taking inode->i_lock, so d_add() by hand */ __d_instantiate(dentry, inode); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1314,10 +1690,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); security_d_instantiate(found, inode); return found; } @@ -1327,8 +1703,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * reference to it, move it in place and use it. */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - dget_locked(new); - spin_unlock(&dcache_lock); + __dget(new); + spin_unlock(&inode->i_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -1342,6 +1718,112 @@ err_out: EXPORT_SYMBOL(d_add_ci); /** + * __d_lookup_rcu - search for a dentry (racy, store-free) + * @parent: parent dentry + * @name: qstr of name we wish to find + * @seq: returns d_seq value at the point where the dentry was found + * @inode: returns dentry->d_inode when the inode was found valid. + * Returns: dentry, or NULL + * + * __d_lookup_rcu is the dcache lookup function for rcu-walk name + * resolution (store-free path walking) design described in + * Documentation/filesystems/path-lookup.txt. + * + * This is not to be used outside core vfs. + * + * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock + * held, and rcu_read_lock held. The returned dentry must not be stored into + * without taking d_lock and checking d_seq sequence count against @seq + * returned here. + * + * A refcount may be taken on the found dentry with the __d_rcu_to_refcount + * function. + * + * Alternatively, __d_lookup_rcu may be called again to look up the child of + * the returned dentry, so long as its parent's seqlock is checked after the + * child is looked up. Thus, an interlocking stepping of sequence lock checks + * is formed, giving integrity down the path walk. + */ +struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, + unsigned *seq, struct inode **inode) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_node *node; + struct dentry *dentry; + + /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + + /* + * The hash list is protected using RCU. + * + * Carefully use d_seq when comparing a candidate dentry, to avoid + * races with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ + hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { + struct inode *i; + const char *tname; + int tlen; + + if (dentry->d_name.hash != hash) + continue; + +seqretry: + *seq = read_seqcount_begin(&dentry->d_seq); + if (dentry->d_parent != parent) + continue; + if (d_unhashed(dentry)) + continue; + tlen = dentry->d_name.len; + tname = dentry->d_name.name; + i = dentry->d_inode; + prefetch(tname); + if (i) + prefetch(i); + /* + * This seqcount check is required to ensure name and + * len are loaded atomically, so as not to walk off the + * edge of memory when walking. If we could load this + * atomically some other way, we could drop this check. + */ + if (read_seqcount_retry(&dentry->d_seq, *seq)) + goto seqretry; + if (parent->d_flags & DCACHE_OP_COMPARE) { + if (parent->d_op->d_compare(parent, *inode, + dentry, i, + tlen, tname, name)) + continue; + } else { + if (dentry_cmp(tname, tlen, str, len)) + continue; + } + /* + * No extra seqcount check is required after the name + * compare. The caller must perform a seqcount check in + * order to do anything useful with the returned dentry + * anyway. + */ + *inode = i; + return dentry; + } + return NULL; +} + +/** * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find @@ -1352,10 +1834,10 @@ EXPORT_SYMBOL(d_add_ci); * dentry is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned if the dentry does not exist. */ -struct dentry * d_lookup(struct dentry * parent, struct qstr * name) +struct dentry *d_lookup(struct dentry *parent, struct qstr *name) { - struct dentry * dentry = NULL; - unsigned long seq; + struct dentry *dentry; + unsigned seq; do { seq = read_seqbegin(&rename_lock); @@ -1367,7 +1849,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); -/* +/** * __d_lookup - search for a dentry (racy) * @parent: parent dentry * @name: qstr of name we wish to find @@ -1382,17 +1864,24 @@ EXPORT_SYMBOL(d_lookup); * * __d_lookup callers must be commented. */ -struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) +struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) { unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct hlist_head *head = d_hash(parent,hash); + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_bl_node *node; struct dentry *found = NULL; - struct hlist_node *node; struct dentry *dentry; /* + * Note: There is significant duplication with __d_lookup_rcu which is + * required to prevent single threaded performance regressions + * especially on architectures where smp_rmb (in seqcounts) are costly. + * Keep the two functions in sync. + */ + + /* * The hash list is protected using RCU. * * Take d_lock when comparing a candidate dentry, to avoid races @@ -1407,25 +1896,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) */ rcu_read_lock(); - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { - struct qstr *qstr; + hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { + const char *tname; + int tlen; if (dentry->d_name.hash != hash) continue; - if (dentry->d_parent != parent) - continue; spin_lock(&dentry->d_lock); - - /* - * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because - * we're about to compare the whole name anyway. - */ if (dentry->d_parent != parent) goto next; - - /* non-existing due to RCU? */ if (d_unhashed(dentry)) goto next; @@ -1433,18 +1913,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). */ - qstr = &dentry->d_name; - if (parent->d_op && parent->d_op->d_compare) { - if (parent->d_op->d_compare(parent, qstr, name)) + tlen = dentry->d_name.len; + tname = dentry->d_name.name; + if (parent->d_flags & DCACHE_OP_COMPARE) { + if (parent->d_op->d_compare(parent, parent->d_inode, + dentry, dentry->d_inode, + tlen, tname, name)) goto next; } else { - if (qstr->len != len) - goto next; - if (memcmp(qstr->name, str, len)) + if (dentry_cmp(tname, tlen, str, len)) goto next; } - atomic_inc(&dentry->d_count); + dentry->d_count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -1473,8 +1954,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) * routine may choose to leave the hash value unchanged. */ name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) { - if (dir->d_op->d_hash(dir, name) < 0) + if (dir->d_flags & DCACHE_OP_HASH) { + if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) goto out; } dentry = d_lookup(dir, name); @@ -1483,34 +1964,32 @@ out: } /** - * d_validate - verify dentry provided from insecure source + * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. * Zero is returned in the dentry is invalid. + * + * This function is slow for big directories, and deprecated, do not use it. */ -int d_validate(struct dentry *dentry, struct dentry *parent) +int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct hlist_head *head = d_hash(parent, dentry->d_name.hash); - struct hlist_node *node; - struct dentry *d; - - /* Check whether the ptr might be valid at all.. */ - if (!kmem_ptr_validate(dentry_cache, dentry)) - return 0; - if (dentry->d_parent != parent) - return 0; + struct dentry *child; - rcu_read_lock(); - hlist_for_each_entry_rcu(d, node, head, d_hash) { - if (d == dentry) { - dget(dentry); + spin_lock(&dparent->d_lock); + list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + if (dentry == child) { + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + __dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dparent->d_lock); return 1; } } - rcu_read_unlock(); + spin_unlock(&dparent->d_lock); + return 0; } EXPORT_SYMBOL(d_validate); @@ -1538,16 +2017,23 @@ EXPORT_SYMBOL(d_validate); void d_delete(struct dentry * dentry) { + struct inode *inode; int isdir = 0; /* * Are we the only user? */ - spin_lock(&dcache_lock); +again: spin_lock(&dentry->d_lock); - isdir = S_ISDIR(dentry->d_inode->i_mode); - if (atomic_read(&dentry->d_count) == 1) { + inode = dentry->d_inode; + isdir = S_ISDIR(inode->i_mode); + if (dentry->d_count == 1) { + if (inode && !spin_trylock(&inode->i_lock)) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + goto again; + } dentry->d_flags &= ~DCACHE_CANT_MOUNT; - dentry_iput(dentry); + dentry_unlink_inode(dentry); fsnotify_nameremove(dentry, isdir); return; } @@ -1556,17 +2042,18 @@ void d_delete(struct dentry * dentry) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); -static void __d_rehash(struct dentry * entry, struct hlist_head *list) +static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) { - + BUG_ON(!d_unhashed(entry)); + spin_lock_bucket(b); entry->d_flags &= ~DCACHE_UNHASHED; - hlist_add_head_rcu(&entry->d_hash, list); + hlist_bl_add_head_rcu(&entry->d_hash, &b->head); + spin_unlock_bucket(b); } static void _d_rehash(struct dentry * entry) @@ -1583,25 +2070,39 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); _d_rehash(entry); spin_unlock(&entry->d_lock); - spin_unlock(&dcache_lock); } EXPORT_SYMBOL(d_rehash); -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch. +/** + * dentry_update_name_case - update case insensitive dentry with a new name + * @dentry: dentry to be updated + * @name: new name * - * Note that we have to be a lot more careful about getting the hash - * switched - we have to switch the hash value properly even if it - * then no longer matches the actual (corrupted) string of the target. - * The hash value has to match the hash queue that the dentry is on.. + * Update a case insensitive dentry with new case of name. + * + * dentry must have been returned by d_lookup with name @name. Old and new + * name lengths must match (ie. no d_compare which allows mismatched name + * lengths). + * + * Parent inode i_mutex must be held over d_lookup and into this call (to + * keep renames and concurrent inserts, and readdir(2) away). */ +void dentry_update_name_case(struct dentry *dentry, struct qstr *name) +{ + BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ + + spin_lock(&dentry->d_lock); + write_seqcount_begin(&dentry->d_seq); + memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); + write_seqcount_end(&dentry->d_seq); + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(dentry_update_name_case); + static void switch_names(struct dentry *dentry, struct dentry *target) { if (dname_external(target)) { @@ -1643,54 +2144,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target) swap(dentry->d_name.len, target->d_name.len); } +static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) +{ + /* + * XXXX: do we really need to take target->d_lock? + */ + if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) + spin_lock(&target->d_parent->d_lock); + else { + if (d_ancestor(dentry->d_parent, target->d_parent)) { + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&target->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } else { + spin_lock(&target->d_parent->d_lock); + spin_lock_nested(&dentry->d_parent->d_lock, + DENTRY_D_LOCK_NESTED); + } + } + if (target < dentry) { + spin_lock_nested(&target->d_lock, 2); + spin_lock_nested(&dentry->d_lock, 3); + } else { + spin_lock_nested(&dentry->d_lock, 2); + spin_lock_nested(&target->d_lock, 3); + } +} + +static void dentry_unlock_parents_for_move(struct dentry *dentry, + struct dentry *target) +{ + if (target->d_parent != dentry->d_parent) + spin_unlock(&dentry->d_parent->d_lock); + if (target->d_parent != target) + spin_unlock(&target->d_parent->d_lock); +} + /* - * We cannibalize "target" when moving dentry on top of it, - * because it's going to be thrown away anyway. We could be more - * polite about it, though. - * - * This forceful removal will result in ugly /proc output if - * somebody holds a file open that got deleted due to a rename. - * We could be nicer about the deleted file, and let it show - * up under the name it had before it was deleted rather than - * under the original name of the file that was moved on top of it. + * When switching names, the actual string doesn't strictly have to + * be preserved in the target - because we're dropping the target + * anyway. As such, we can just do a simple memcpy() to copy over + * the new name before we switch. + * + * Note that we have to be a lot more careful about getting the hash + * switched - we have to switch the hash value properly even if it + * then no longer matches the actual (corrupted) string of the target. + * The hash value has to match the hash queue that the dentry is on.. */ - /* - * d_move_locked - move a dentry + * d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. */ -static void d_move_locked(struct dentry * dentry, struct dentry * target) +void d_move(struct dentry * dentry, struct dentry * target) { - struct hlist_head *list; - if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); + BUG_ON(d_ancestor(dentry, target)); + BUG_ON(d_ancestor(target, dentry)); + write_seqlock(&rename_lock); - /* - * XXXX: do we really need to take target->d_lock? - */ - if (target < dentry) { - spin_lock(&target->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&dentry->d_lock); - spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); - } - /* Move the dentry to the target hash queue, if on different bucket */ - if (d_unhashed(dentry)) - goto already_unhashed; + dentry_lock_for_move(dentry, target); - hlist_del_rcu(&dentry->d_hash); + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&target->d_seq); -already_unhashed: - list = d_hash(target->d_parent, target->d_name.hash); - __d_rehash(dentry, list); + /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ + + /* + * Move the dentry to the target hash queue. Don't bother checking + * for the same hash queue because of how unlikely it is. + */ + __d_drop(dentry); + __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); /* Unhash the target: dput() will then get rid of it */ __d_drop(target); @@ -1715,27 +2246,16 @@ already_unhashed: } list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); + + dentry_unlock_parents_for_move(dentry, target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); } - -/** - * d_move - move a dentry - * @dentry: entry to move - * @target: new dentry - * - * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. - */ - -void d_move(struct dentry * dentry, struct dentry * target) -{ - spin_lock(&dcache_lock); - d_move_locked(dentry, target); - spin_unlock(&dcache_lock); -} EXPORT_SYMBOL(d_move); /** @@ -1761,13 +2281,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * dentry->d_parent->d_inode->i_mutex and the inode->i_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) - __releases(dcache_lock) +static struct dentry *__d_unalias(struct inode *inode, + struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; @@ -1790,10 +2310,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move_locked(alias, dentry); + d_move(alias, dentry); ret = alias; out_err: - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (m2) mutex_unlock(m2); if (m1) @@ -1804,17 +2324,23 @@ out_err: /* * Prepare an anonymous dentry for life in the superblock's dentry tree as a * named dentry in place of the dentry to be replaced. + * returns with anon->d_lock held! */ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) { struct dentry *dparent, *aparent; - switch_names(dentry, anon); - swap(dentry->d_name.hash, anon->d_name.hash); + dentry_lock_for_move(anon, dentry); + + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&anon->d_seq); dparent = dentry->d_parent; aparent = anon->d_parent; + switch_names(dentry, anon); + swap(dentry->d_name.hash, anon->d_name.hash); + dentry->d_parent = (aparent == anon) ? dentry : aparent; list_del(&dentry->d_u.d_child); if (!IS_ROOT(dentry)) @@ -1829,6 +2355,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); + write_seqcount_end(&dentry->d_seq); + write_seqcount_end(&anon->d_seq); + + dentry_unlock_parents_for_move(anon, dentry); + spin_unlock(&dentry->d_lock); + + /* anon->d_lock still locked, returns locked */ anon->d_flags &= ~DCACHE_DISCONNECTED; } @@ -1846,14 +2379,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); - spin_lock(&dcache_lock); - if (!inode) { actual = dentry; __d_instantiate(dentry, NULL); - goto found_lock; + d_rehash(actual); + goto out_nolock; } + spin_lock(&inode->i_lock); + if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -1864,13 +2398,12 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) /* Is this an anonymous mountpoint that we could splice * into our tree? */ if (IS_ROOT(alias)) { - spin_lock(&alias->d_lock); __d_materialise_dentry(dentry, alias); __d_drop(alias); goto found; } /* Nope, but we must(!) avoid directory aliasing */ - actual = __d_unalias(dentry, alias); + actual = __d_unalias(inode, dentry, alias); if (IS_ERR(actual)) dput(alias); goto out_nolock; @@ -1881,15 +2414,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) actual = __d_instantiate_unique(dentry, inode); if (!actual) actual = dentry; - else if (unlikely(!d_unhashed(actual))) - goto shouldnt_be_hashed; + else + BUG_ON(!d_unhashed(actual)); -found_lock: spin_lock(&actual->d_lock); found: _d_rehash(actual); spin_unlock(&actual->d_lock); - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); @@ -1898,10 +2430,6 @@ out_nolock: iput(inode); return actual; - -shouldnt_be_hashed: - spin_unlock(&dcache_lock); - BUG(); } EXPORT_SYMBOL_GPL(d_materialise_unique); @@ -1928,7 +2456,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * Caller holds the dcache_lock. + * Caller holds the rename_lock. * * If path is not reachable from the supplied root, then the value of * root is changed (without modifying refcounts). @@ -1956,7 +2484,9 @@ static int prepend_path(const struct path *path, struct path *root, } parent = dentry->d_parent; prefetch(parent); + spin_lock(&dentry->d_lock); error = prepend_name(buffer, buflen, &dentry->d_name); + spin_unlock(&dentry->d_lock); if (!error) error = prepend(buffer, buflen, "/", 1); if (error) @@ -2012,9 +2542,9 @@ char *__d_path(const struct path *path, struct path *root, int error; prepend(&res, &buflen, "\0", 1); - spin_lock(&dcache_lock); + write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); - spin_unlock(&dcache_lock); + write_sequnlock(&rename_lock); if (error) return ERR_PTR(error); @@ -2076,12 +2606,12 @@ char *d_path(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (error) res = ERR_PTR(error); - spin_unlock(&dcache_lock); + write_sequnlock(&rename_lock); path_put(&root); return res; } @@ -2107,12 +2637,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - spin_lock(&dcache_lock); + write_seqlock(&rename_lock); tmp = root; error = path_with_deleted(path, &tmp, &res, &buflen); if (!error && !path_equal(&tmp, &root)) error = prepend_unreachable(&res, &buflen); - spin_unlock(&dcache_lock); + write_sequnlock(&rename_lock); path_put(&root); if (error) res = ERR_PTR(error); @@ -2144,7 +2674,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, /* * Write full pathname from the root of the filesystem into the buffer. */ -char *__dentry_path(struct dentry *dentry, char *buf, int buflen) +static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) { char *end = buf + buflen; char *retval; @@ -2158,10 +2688,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen) while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; + int error; prefetch(parent); - if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || - (prepend(&end, &buflen, "/", 1) != 0)) + spin_lock(&dentry->d_lock); + error = prepend_name(&end, &buflen, &dentry->d_name); + spin_unlock(&dentry->d_lock); + if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) goto Elong; retval = end; @@ -2171,14 +2704,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen) Elong: return ERR_PTR(-ENAMETOOLONG); } -EXPORT_SYMBOL(__dentry_path); + +char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +{ + char *retval; + + write_seqlock(&rename_lock); + retval = __dentry_path(dentry, buf, buflen); + write_sequnlock(&rename_lock); + + return retval; +} +EXPORT_SYMBOL(dentry_path_raw); char *dentry_path(struct dentry *dentry, char *buf, int buflen) { char *p = NULL; char *retval; - spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; if (prepend(&p, &buflen, "//deleted", 10) != 0) @@ -2186,12 +2730,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) buflen++; } retval = __dentry_path(dentry, buf, buflen); - spin_unlock(&dcache_lock); + write_sequnlock(&rename_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ return retval; Elong: - spin_unlock(&dcache_lock); return ERR_PTR(-ENAMETOOLONG); } @@ -2225,7 +2768,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd(current->fs, &root, &pwd); error = -ENOENT; - spin_lock(&dcache_lock); + write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; @@ -2234,7 +2777,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &tmp, &cwd, &buflen); - spin_unlock(&dcache_lock); + write_sequnlock(&rename_lock); if (error) goto out; @@ -2253,8 +2796,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) if (copy_to_user(buf, cwd, len)) error = -EFAULT; } - } else - spin_unlock(&dcache_lock); + } else { + write_sequnlock(&rename_lock); + } out: path_put(&pwd); @@ -2282,25 +2826,25 @@ out: int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - unsigned long seq; + unsigned seq; if (new_dentry == old_dentry) return 1; - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move + */ + rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = 1; else result = 0; + rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); - rcu_read_unlock(); return result; } @@ -2332,10 +2876,15 @@ EXPORT_SYMBOL(path_is_under); void d_genocide(struct dentry *root) { - struct dentry *this_parent = root; + struct dentry *this_parent; struct list_head *next; + unsigned seq; + int locked = 0; - spin_lock(&dcache_lock); + seq = read_seqbegin(&rename_lock); +again: + this_parent = root; + spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -2343,21 +2892,62 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - if (d_unhashed(dentry)||!dentry->d_inode) + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (d_unhashed(dentry) || !dentry->d_inode) { + spin_unlock(&dentry->d_lock); continue; + } if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } - atomic_dec(&dentry->d_count); + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_count--; + } + spin_unlock(&dentry->d_lock); } if (this_parent != root) { - next = this_parent->d_u.d_child.next; - atomic_dec(&this_parent->d_count); - this_parent = this_parent->d_parent; + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { + this_parent->d_flags |= DCACHE_GENOCIDE; + this_parent->d_count--; + } + rcu_read_lock(); + spin_unlock(&this_parent->d_lock); + child = this_parent; + this_parent = tmp; + spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; goto resume; } - spin_unlock(&dcache_lock); + spin_unlock(&this_parent->d_lock); + if (!locked && read_seqretry(&rename_lock, seq)) + goto rename_retry; + if (locked) + write_sequnlock(&rename_lock); + return; + +rename_retry: + locked = 1; + write_seqlock(&rename_lock); + goto again; } /** @@ -2411,7 +3001,7 @@ static void __init dcache_init_early(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct dcache_hash_bucket), dhash_entries, 13, HASH_EARLY, @@ -2420,16 +3010,13 @@ static void __init dcache_init_early(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); } static void __init dcache_init(void) { int loop; - percpu_counter_init(&nr_dentry, 0); - percpu_counter_init(&nr_dentry_unused, 0); - /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature @@ -2446,7 +3033,7 @@ static void __init dcache_init(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct hlist_head), + sizeof(struct dcache_hash_bucket), dhash_entries, 13, 0, @@ -2455,7 +3042,7 @@ static void __init dcache_init(void) 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) - INIT_HLIST_HEAD(&dentry_hashtable[loop]); + INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); } /* SLAB cache for __getname() consumers */ diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 37a34c2c622a..9c64ae9e4c1a 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -63,6 +63,9 @@ #define NEEDED_RMEM (4*1024*1024) #define CONN_HASH_SIZE 32 +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 + struct cbuf { unsigned int base; unsigned int len; @@ -108,6 +111,7 @@ struct connection { #define CF_INIT_PENDING 4 #define CF_IS_OTHERCON 5 #define CF_CLOSE 6 +#define CF_APP_LIMITED 7 struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; int (*rx_action) (struct connection *); /* What to do when active */ @@ -295,7 +299,17 @@ static void lowcomms_write_space(struct sock *sk) { struct connection *con = sock2con(sk); - if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + if (!con) + return; + + clear_bit(SOCK_NOSPACE, &con->sock->flags); + + if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { + con->sock->sk->sk_write_pending--; + clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags); + } + + if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) queue_work(send_workqueue, &con->swork); } @@ -915,6 +929,7 @@ static void tcp_connect_to_sock(struct connection *con) struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; + int one = 1; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -960,6 +975,11 @@ static void tcp_connect_to_sock(struct connection *con) make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); log_print("connecting to %d", con->nodeid); + + /* Turn off Nagle's algorithm */ + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, + sizeof(one)); + result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, O_NONBLOCK); @@ -1011,6 +1031,10 @@ static struct socket *tcp_create_listen_sock(struct connection *con, goto create_out; } + /* Turn off Nagle's algorithm */ + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, + sizeof(one)); + result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); @@ -1297,6 +1321,7 @@ static void send_to_sock(struct connection *con) const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; struct writequeue_entry *e; int len, offset; + int count = 0; mutex_lock(&con->sock_mutex); if (con->sock == NULL) @@ -1319,14 +1344,27 @@ static void send_to_sock(struct connection *con) ret = kernel_sendpage(con->sock, e->page, offset, len, msg_flags); if (ret == -EAGAIN || ret == 0) { + if (ret == -EAGAIN && + test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) && + !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { + /* Notify TCP that we're limited by the + * application window size. + */ + set_bit(SOCK_NOSPACE, &con->sock->flags); + con->sock->sk->sk_write_pending++; + } cond_resched(); goto out; } if (ret <= 0) goto send_error; } - /* Don't starve people filling buffers */ + + /* Don't starve people filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { cond_resched(); + count = 0; + } spin_lock(&con->writequeue_lock); e->offset += ret; @@ -1430,20 +1468,19 @@ static void work_stop(void) static int work_start(void) { - int error; - recv_workqueue = create_workqueue("dlm_recv"); - error = IS_ERR(recv_workqueue); - if (error) { - log_print("can't start dlm_recv %d", error); - return error; + recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | + WQ_HIGHPRI | WQ_FREEZEABLE, 0); + if (!recv_workqueue) { + log_print("can't start dlm_recv"); + return -ENOMEM; } - send_workqueue = create_singlethread_workqueue("dlm_send"); - error = IS_ERR(send_workqueue); - if (error) { - log_print("can't start dlm_send %d", error); + send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | + WQ_HIGHPRI | WQ_FREEZEABLE, 0); + if (!send_workqueue) { + log_print("can't start dlm_send"); destroy_workqueue(recv_workqueue); - return error; + return -ENOMEM; } return 0; diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 906e803f7f79..6fc4f319b550 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -44,12 +44,17 @@ */ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct dentry *dentry_save; struct vfsmount *vfsmount_save; int rc = 1; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; dentry_save = nd->path.dentry; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 9d1a22d62765..337352a94751 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, ecryptfs_dentry->d_parent)); lower_inode = lower_dentry->d_inode; fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); - BUG_ON(!atomic_read(&lower_dentry->d_count)); + BUG_ON(!lower_dentry->d_count); ecryptfs_set_dentry_private(ecryptfs_dentry, kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL)); @@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct qstr lower_name; int rc = 0; - ecryptfs_dentry->d_op = &ecryptfs_dops; + d_set_d_op(ecryptfs_dentry, &ecryptfs_dops); if ((ecryptfs_dentry->d_name.len == 1 && !strcmp(ecryptfs_dentry->d_name.name, ".")) || (ecryptfs_dentry->d_name.len == 2 @@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, lower_name.hash = ecryptfs_dentry->d_name.hash; if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - &lower_name); + lower_dir_dentry->d_inode, &lower_name); if (rc < 0) goto out_d_drop; } @@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, lower_name.hash = full_name_hash(lower_name.name, lower_name.len); if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - &lower_name); + lower_dir_dentry->d_inode, &lower_name); if (rc < 0) goto out_d_drop; } @@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) } static int -ecryptfs_permission(struct inode *inode, int mask) +ecryptfs_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; return inode_permission(ecryptfs_inode_to_lower(inode), mask); } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a9dbd62518e6..351038675376 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (special_file(lower_inode->i_mode)) init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev); - dentry->d_op = &ecryptfs_dops; + d_set_d_op(dentry, &ecryptfs_dops); fsstack_copy_attr_all(inode, lower_inode); /* This size will be overwritten for real files w/ headers and * other metadata */ @@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags deactivate_locked_super(s); goto out; } - s->s_root->d_op = &ecryptfs_dops; + d_set_d_op(s->s_root, &ecryptfs_dops); s->s_root->d_sb = s; s->s_root->d_parent = s->s_root; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 2720178b7718..3042fe123a34 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -62,6 +62,16 @@ out: return inode; } +static void ecryptfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ecryptfs_inode_info *inode_info; + inode_info = ecryptfs_inode_to_private(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ecryptfs_inode_info_cache, inode_info); +} + /** * ecryptfs_destroy_inode * @inode: The ecryptfs inode @@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode) } } ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); - kmem_cache_free(ecryptfs_inode_info_cache, inode_info); + call_rcu(&inode->i_rcu, ecryptfs_i_callback); } /** diff --git a/fs/efs/super.c b/fs/efs/super.c index 5073a07652cc..0f31acb0131c 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void efs_destroy_inode(struct inode *inode) +static void efs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } +static void efs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, efs_i_callback); +} + static void init_once(void *foo) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 79c3ae6e0456..8c6c4669b381 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } +static void exofs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); +} + /* * Remove an inode from the cache */ static void exofs_destroy_inode(struct inode *inode) { - kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); + call_rcu(&inode->i_rcu, exofs_i_callback); } /* diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 51b304056f10..4b6825740dd5 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result, void *context) { struct dentry *dentry, *toput = NULL; + struct inode *inode; if (acceptable(context, result)) return result; - spin_lock(&dcache_lock); - list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { - dget_locked(dentry); - spin_unlock(&dcache_lock); + inode = result->d_inode; + spin_lock(&inode->i_lock); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + dget(dentry); + spin_unlock(&inode->i_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); toput = dentry; } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); if (toput) dput(toput); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 2bcc0431bada..7b4180554a62 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -232,10 +232,17 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) } int -ext2_check_acl(struct inode *inode, int mask) +ext2_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + return -EAGAIN; + } + acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 3ff6cbb9ac44..c939b7b12099 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL /* acl.c */ -extern int ext2_check_acl (struct inode *, int); +extern int ext2_check_acl (struct inode *, int, unsigned int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d89e0b6a2d78..e0c6380ff992 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ext2_destroy_inode(struct inode *inode) +static void ext2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } +static void ext2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ext2_i_callback); +} + static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 8a11fe212183..e4fa49e6c539 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -240,10 +240,17 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext3_check_acl(struct inode *inode, int mask) +ext3_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + return -EAGAIN; + } + acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 597334626de9..5faf8048e906 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size) #ifdef CONFIG_EXT3_FS_POSIX_ACL /* acl.c */ -extern int ext3_check_acl (struct inode *, int); +extern int ext3_check_acl (struct inode *, int, unsigned int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index acf8695fa8f0..77ce1616f725 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } +static void ext3_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); +} + static void ext3_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT3_I(inode)->i_orphan))) { @@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode) false); dump_stack(); } - kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); + call_rcu(&inode->i_rcu, ext3_i_callback); } static void init_once(void *foo) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 5e2ed4504ead..e0270d1f8d82 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, } int -ext4_check_acl(struct inode *inode, int mask) +ext4_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + return -EAGAIN; + } + acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 9d843d5deac4..dec821168fd4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL /* acl.c */ -extern int ext4_check_acl(struct inode *, int); +extern int ext4_check_acl(struct inode *, int, unsigned int); extern int ext4_acl_chmod(struct inode *); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index dc963929de65..981c8477adab 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb, GFP_NOFS); if (err) goto exit_bh; + for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) + ext4_set_bit(bit, bh->b_data); ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, input->block_bitmap - start); @@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); if (err) goto exit_bh; + for (i = 0, bit = input->inode_table - start; + i < sbi->s_itb_per_group; i++, bit++) + ext4_set_bit(bit, bh->b_data); if ((err = extend_or_restart_transaction(handle, 2, bh))) goto exit_bh; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb15c9c0be74..cd37f9d5e447 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -841,6 +841,13 @@ static int ext4_drop_inode(struct inode *inode) return drop; } +static void ext4_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); +} + static void ext4_destroy_inode(struct inode *inode) { ext4_ioend_wait(inode); @@ -853,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode) true); dump_stack(); } - kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); + call_rcu(&inode->i_rcu, ext4_i_callback); } static void init_once(void *foo) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ad6998a92c30..206351af7c58 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void fat_destroy_inode(struct inode *inode) +static void fat_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } +static void fat_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, fat_i_callback); +} + static void init_once(void *foo) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; @@ -743,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, */ result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = sb->s_root->d_op; + d_set_d_op(result, sb->s_root->d_op); return result; } @@ -793,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent)) - parent->d_op = sb->s_root->d_op; + d_set_d_op(parent, sb->s_root->d_op); out: unlock_super(sb); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3345aabd1dd7..35ffe43afa4b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len, * that the existing dentry can be used. The msdos fs routines will * return ENOENT or EINVAL as appropriate. */ -static int msdos_hash(struct dentry *dentry, struct qstr *qstr) +static int msdos_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; unsigned char msdos_name[MSDOS_NAME]; @@ -164,16 +165,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr) * Compare two msdos names. If either of the names are invalid, * we fall back to doing the standard name comparison. */ -static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int msdos_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; + struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; int error; - error = msdos_format_name(a->name, a->len, a_msdos_name, options); + error = msdos_format_name(name->name, name->len, a_msdos_name, options); if (error) goto old_compare; - error = msdos_format_name(b->name, b->len, b_msdos_name, options); + error = msdos_format_name(str, len, b_msdos_name, options); if (error) goto old_compare; error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); @@ -182,8 +185,8 @@ out: old_compare: error = 1; - if (a->len == b->len) - error = memcmp(a->name, b->name, a->len); + if (name->len == len) + error = memcmp(name->name, str, len); goto out; } @@ -224,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, } out: unlock_super(sb); - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); dentry = d_splice_alias(inode, dentry); if (dentry) - dentry->d_op = &msdos_dentry_operations; + d_set_d_op(dentry, &msdos_dentry_operations); return dentry; error: @@ -670,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) } sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; + d_set_d_op(sb->s_root, &msdos_dentry_operations); unlock_super(sb); return 0; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b936703b8924..e3ffc5e12332 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry) static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + /* This is not negative dentry. Always valid. */ if (dentry->d_inode) return 1; @@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + /* * This is not negative dentry. Always valid. * @@ -85,22 +91,26 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) } /* returns the length of a struct qstr, ignoring trailing dots */ -static unsigned int vfat_striptail_len(struct qstr *qstr) +static unsigned int __vfat_striptail_len(unsigned int len, const char *name) { - unsigned int len = qstr->len; - - while (len && qstr->name[len - 1] == '.') + while (len && name[len - 1] == '.') len--; return len; } +static unsigned int vfat_striptail_len(const struct qstr *qstr) +{ + return __vfat_striptail_len(qstr->len, qstr->name); +} + /* * Compute the hash for the vfat name corresponding to the dentry. * Note: if the name is invalid, we leave the hash code unchanged so * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hash(struct dentry *dentry, struct qstr *qstr) +static int vfat_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); return 0; @@ -112,9 +122,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr) * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) +static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; const unsigned char *name; unsigned int len; unsigned long hash; @@ -133,16 +144,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) /* * Case insensitive compare of two vfat names. */ -static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; unsigned int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); if (alen == blen) { - if (nls_strnicmp(t, a->name, b->name, alen) == 0) + if (nls_strnicmp(t, name->name, str, alen) == 0) return 0; } return 1; @@ -151,15 +164,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) /* * Case sensitive compare of two vfat names. */ -static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int vfat_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { unsigned int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) + if (strncmp(name->name, str, alen) == 0) return 0; } return 1; @@ -757,11 +772,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: unlock_super(sb); - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = sb->s_root->d_op; + d_set_d_op(dentry, sb->s_root->d_op); dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1063,9 +1078,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) } if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_ci_dentry_ops; + d_set_d_op(sb->s_root, &vfat_ci_dentry_ops); else - sb->s_root->d_op = &vfat_dentry_ops; + d_set_d_op(sb->s_root, &vfat_dentry_ops); unlock_super(sb); return 0; diff --git a/fs/filesystems.c b/fs/filesystems.c index 68ba492d8eef..751d6b255a12 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs) tmp = &(*tmp)->next; } write_unlock(&file_systems_lock); + + synchronize_rcu(); + return -EINVAL; } diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 8c04eac5079d..2ba6719ac612 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino) return ip; } +static void vxfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(vxfs_inode_cachep, inode->i_private); +} + /** * vxfs_evict_inode - remove inode from main memory * @ip: inode to discard. @@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip) { truncate_inode_pages(&ip->i_data, 0); end_writeback(ip); - kmem_cache_free(vxfs_inode_cachep, ip->i_private); + call_rcu(&ip->i_rcu, vxfs_i_callback); } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index ed45a9cf5f3d..68ca487bedb1 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -14,12 +14,14 @@ void set_fs_root(struct fs_struct *fs, struct path *path) struct path old_root; spin_lock(&fs->lock); + write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; - path_get(path); + path_get_long(path); + write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put(&old_root); + path_put_long(&old_root); } /* @@ -31,13 +33,15 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) struct path old_pwd; spin_lock(&fs->lock); + write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; - path_get(path); + path_get_long(path); + write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put(&old_pwd); + path_put_long(&old_pwd); } void chroot_fs_refs(struct path *old_root, struct path *new_root) @@ -52,31 +56,33 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs = p->fs; if (fs) { spin_lock(&fs->lock); + write_seqcount_begin(&fs->seq); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->root = *new_root; count++; } if (fs->pwd.dentry == old_root->dentry && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); + path_get_long(new_root); fs->pwd = *new_root; count++; } + write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put(old_root); + path_put_long(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put(&fs->root); - path_put(&fs->pwd); + path_put_long(&fs->root); + path_put_long(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk) int kill; task_lock(tsk); spin_lock(&fs->lock); + write_seqcount_begin(&fs->seq); tsk->fs = NULL; kill = !--fs->users; + write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); task_unlock(tsk); if (kill) @@ -105,8 +113,15 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) fs->users = 1; fs->in_exec = 0; spin_lock_init(&fs->lock); + seqcount_init(&fs->seq); fs->umask = old->umask; - get_fs_root_and_pwd(old, &fs->root, &fs->pwd); + + spin_lock(&old->lock); + fs->root = old->root; + path_get_long(&fs->root); + fs->pwd = old->pwd; + path_get_long(&fs->pwd); + spin_unlock(&old->lock); } return fs; } @@ -144,6 +159,7 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), + .seq = SEQCNT_ZERO, .umask = 0022, }; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6e07696308dc..cf8d28d1fbad 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -251,6 +251,20 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) kill_fasync(&fc->fasync, SIGIO, POLL_IN); } +void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup) +{ + forget->forget_one.nodeid = nodeid; + forget->forget_one.nlookup = nlookup; + + spin_lock(&fc->lock); + fc->forget_list_tail->next = forget; + fc->forget_list_tail = forget; + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_unlock(&fc->lock); +} + static void flush_bg_queue(struct fuse_conn *fc) { while (fc->active_background < fc->max_background && @@ -438,12 +452,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) } } -void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) -{ - req->isreply = 0; - fuse_request_send_nowait(fc, req); -} - void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; @@ -896,9 +904,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } +static int forget_pending(struct fuse_conn *fc) +{ + return fc->forget_list_head.next != NULL; +} + static int request_pending(struct fuse_conn *fc) { - return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); + return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) || + forget_pending(fc); } /* Wait until a request is available on the pending list */ @@ -960,6 +974,120 @@ __releases(fc->lock) return err ? err : reqsize; } +static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, + unsigned max, + unsigned *countp) +{ + struct fuse_forget_link *head = fc->forget_list_head.next; + struct fuse_forget_link **newhead = &head; + unsigned count; + + for (count = 0; *newhead != NULL && count < max; count++) + newhead = &(*newhead)->next; + + fc->forget_list_head.next = *newhead; + *newhead = NULL; + if (fc->forget_list_head.next == NULL) + fc->forget_list_tail = &fc->forget_list_head; + + if (countp != NULL) + *countp = count; + + return head; +} + +static int fuse_read_single_forget(struct fuse_conn *fc, + struct fuse_copy_state *cs, + size_t nbytes) +__releases(fc->lock) +{ + int err; + struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); + struct fuse_forget_in arg = { + .nlookup = forget->forget_one.nlookup, + }; + struct fuse_in_header ih = { + .opcode = FUSE_FORGET, + .nodeid = forget->forget_one.nodeid, + .unique = fuse_get_unique(fc), + .len = sizeof(ih) + sizeof(arg), + }; + + spin_unlock(&fc->lock); + kfree(forget); + if (nbytes < ih.len) + return -EINVAL; + + err = fuse_copy_one(cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(cs, &arg, sizeof(arg)); + fuse_copy_finish(cs); + + if (err) + return err; + + return ih.len; +} + +static int fuse_read_batch_forget(struct fuse_conn *fc, + struct fuse_copy_state *cs, size_t nbytes) +__releases(fc->lock) +{ + int err; + unsigned max_forgets; + unsigned count; + struct fuse_forget_link *head; + struct fuse_batch_forget_in arg = { .count = 0 }; + struct fuse_in_header ih = { + .opcode = FUSE_BATCH_FORGET, + .unique = fuse_get_unique(fc), + .len = sizeof(ih) + sizeof(arg), + }; + + if (nbytes < ih.len) { + spin_unlock(&fc->lock); + return -EINVAL; + } + + max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); + head = dequeue_forget(fc, max_forgets, &count); + spin_unlock(&fc->lock); + + arg.count = count; + ih.len += count * sizeof(struct fuse_forget_one); + err = fuse_copy_one(cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(cs, &arg, sizeof(arg)); + + while (head) { + struct fuse_forget_link *forget = head; + + if (!err) { + err = fuse_copy_one(cs, &forget->forget_one, + sizeof(forget->forget_one)); + } + head = forget->next; + kfree(forget); + } + + fuse_copy_finish(cs); + + if (err) + return err; + + return ih.len; +} + +static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, + size_t nbytes) +__releases(fc->lock) +{ + if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fc, cs, nbytes); + else + return fuse_read_batch_forget(fc, cs, nbytes); +} + /* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from @@ -998,6 +1126,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, return fuse_read_interrupt(fc, cs, nbytes, req); } + if (forget_pending(fc)) { + if (list_empty(&fc->pending) || fc->forget_batch-- > 0) + return fuse_read_forget(fc, cs, nbytes); + + if (fc->forget_batch <= -8) + fc->forget_batch = 16; + } + req = list_entry(fc->pending.next, struct fuse_req, list); req->state = FUSE_REQ_READING; list_move(&req->list, &fc->io); @@ -1090,7 +1226,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (!fc) return -EPERM; - bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); + bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; @@ -1626,7 +1762,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (!fc) return -EPERM; - bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL); + bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; @@ -1770,6 +1906,8 @@ __acquires(fc->lock) flush_bg_queue(fc); end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); + while (forget_pending(fc)) + kfree(dequeue_forget(fc, 1, NULL)); } /* diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c9627c95482d..042af7346ec1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -10,9 +10,9 @@ #include <linux/pagemap.h> #include <linux/file.h> -#include <linux/gfp.h> #include <linux/sched.h> #include <linux/namei.h> +#include <linux/slab.h> #if BITS_PER_LONG >= 64 static inline void fuse_dentry_settime(struct dentry *entry, u64 time) @@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc) */ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) { - struct inode *inode = entry->d_inode; + struct inode *inode; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = entry->d_inode; if (inode && is_bad_inode(inode)) return 0; else if (fuse_dentry_time(entry) < get_jiffies_64()) { @@ -165,7 +169,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) struct fuse_entry_out outarg; struct fuse_conn *fc; struct fuse_req *req; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; struct dentry *parent; u64 attr_version; @@ -178,8 +182,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (IS_ERR(req)) return 0; - forget_req = fuse_get_req(fc); - if (IS_ERR(forget_req)) { + forget = fuse_alloc_forget(); + if (!forget) { fuse_put_request(fc, req); return 0; } @@ -199,15 +203,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!err) { struct fuse_inode *fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { - fuse_send_forget(fc, forget_req, - outarg.nodeid, 1); + fuse_queue_forget(fc, forget, outarg.nodeid, 1); return 0; } spin_lock(&fc->lock); fi->nlookup++; spin_unlock(&fc->lock); } - fuse_put_request(fc, forget_req); + kfree(forget); if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) return 0; @@ -259,7 +262,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, { struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; u64 attr_version; int err; @@ -273,9 +276,9 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, if (IS_ERR(req)) goto out; - forget_req = fuse_get_req(fc); - err = PTR_ERR(forget_req); - if (IS_ERR(forget_req)) { + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) { fuse_put_request(fc, req); goto out; } @@ -301,13 +304,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, attr_version); err = -ENOMEM; if (!*inode) { - fuse_send_forget(fc, forget_req, outarg->nodeid, 1); + fuse_queue_forget(fc, forget, outarg->nodeid, 1); goto out; } err = 0; out_put_forget: - fuse_put_request(fc, forget_req); + kfree(forget); out: return err; } @@ -347,7 +350,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } entry = newent ? newent : entry; - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); if (outarg_valid) fuse_change_entry_timeout(entry, &outarg); else @@ -374,7 +377,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct inode *inode; struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; @@ -388,9 +391,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (flags & O_DIRECT) return -EINVAL; - forget_req = fuse_get_req(fc); - if (IS_ERR(forget_req)) - return PTR_ERR(forget_req); + forget = fuse_alloc_forget(); + if (!forget) + return -ENOMEM; req = fuse_get_req(fc); err = PTR_ERR(req); @@ -448,10 +451,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(ff, flags); - fuse_send_forget(fc, forget_req, outentry.nodeid, 1); + fuse_queue_forget(fc, forget, outentry.nodeid, 1); return -ENOMEM; } - fuse_put_request(fc, forget_req); + kfree(forget); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); @@ -469,7 +472,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, out_put_request: fuse_put_request(fc, req); out_put_forget_req: - fuse_put_request(fc, forget_req); + kfree(forget); return err; } @@ -483,12 +486,12 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct fuse_entry_out outarg; struct inode *inode; int err; - struct fuse_req *forget_req; + struct fuse_forget_link *forget; - forget_req = fuse_get_req(fc); - if (IS_ERR(forget_req)) { + forget = fuse_alloc_forget(); + if (!forget) { fuse_put_request(fc, req); - return PTR_ERR(forget_req); + return -ENOMEM; } memset(&outarg, 0, sizeof(outarg)); @@ -515,10 +518,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr, entry_attr_timeout(&outarg), 0); if (!inode) { - fuse_send_forget(fc, forget_req, outarg.nodeid, 1); + fuse_queue_forget(fc, forget, outarg.nodeid, 1); return -ENOMEM; } - fuse_put_request(fc, forget_req); + kfree(forget); if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -541,7 +544,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, return 0; out_put_forget_req: - fuse_put_request(fc, forget_req); + kfree(forget); return err; } @@ -981,12 +984,15 @@ static int fuse_access(struct inode *inode, int mask) * access request is sent. Execute permission is still checked * locally based on file mode. */ -static int fuse_permission(struct inode *inode, int mask) +static int fuse_permission(struct inode *inode, int mask, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; int err = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + if (!fuse_allow_task(fc, current)) return -EACCES; @@ -1001,7 +1007,7 @@ static int fuse_permission(struct inode *inode, int mask) } if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask, flags, NULL); /* If permission is denied, try to refresh file attributes. This is also needed, because the root @@ -1009,7 +1015,8 @@ static int fuse_permission(struct inode *inode, int mask) if (err == -EACCES && !refreshed) { err = fuse_do_getattr(inode, NULL, NULL); if (!err) - err = generic_permission(inode, mask, NULL); + err = generic_permission(inode, mask, + flags, NULL); } /* Note: the opposite of the above test does not diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8b984a2cebbd..95da1bc1c826 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1634,9 +1634,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, * and 64bit. Fortunately we can determine which structure the server * used from the size of the reply. */ -static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src, - size_t transferred, unsigned count, - bool is_compat) +static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, + size_t transferred, unsigned count, + bool is_compat) { #ifdef CONFIG_COMPAT if (count * sizeof(struct compat_iovec) == transferred) { @@ -1680,6 +1680,42 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) return 0; } +static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, + void *src, size_t transferred, unsigned count, + bool is_compat) +{ + unsigned i; + struct fuse_ioctl_iovec *fiov = src; + + if (fc->minor < 16) { + return fuse_copy_ioctl_iovec_old(dst, src, transferred, + count, is_compat); + } + + if (count * sizeof(struct fuse_ioctl_iovec) != transferred) + return -EIO; + + for (i = 0; i < count; i++) { + /* Did the server supply an inappropriate value? */ + if (fiov[i].base != (unsigned long) fiov[i].base || + fiov[i].len != (unsigned long) fiov[i].len) + return -EIO; + + dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; + dst[i].iov_len = (size_t) fiov[i].len; + +#ifdef CONFIG_COMPAT + if (is_compat && + (ptr_to_compat(dst[i].iov_base) != fiov[i].base || + (compat_size_t) dst[i].iov_len != fiov[i].len)) + return -EIO; +#endif + } + + return 0; +} + + /* * For ioctls, there is no generic way to determine how much memory * needs to be read and/or written. Furthermore, ioctls are allowed @@ -1740,18 +1776,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, struct fuse_ioctl_out outarg; struct fuse_req *req = NULL; struct page **pages = NULL; - struct page *iov_page = NULL; + struct iovec *iov_page = NULL; struct iovec *in_iov = NULL, *out_iov = NULL; unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; size_t in_size, out_size, transferred; int err; +#if BITS_PER_LONG == 32 + inarg.flags |= FUSE_IOCTL_32BIT; +#else + if (flags & FUSE_IOCTL_COMPAT) + inarg.flags |= FUSE_IOCTL_32BIT; +#endif + /* assume all the iovs returned by client always fits in a page */ - BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); err = -ENOMEM; pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); - iov_page = alloc_page(GFP_KERNEL); + iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); if (!pages || !iov_page) goto out; @@ -1760,7 +1803,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, * RETRY from server is not allowed. */ if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { - struct iovec *iov = page_address(iov_page); + struct iovec *iov = iov_page; iov->iov_base = (void __user *)arg; iov->iov_len = _IOC_SIZE(cmd); @@ -1841,7 +1884,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, /* did it ask for retry? */ if (outarg.flags & FUSE_IOCTL_RETRY) { - char *vaddr; + void *vaddr; /* no retry if in restricted mode */ err = -EIO; @@ -1862,14 +1905,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, goto out; vaddr = kmap_atomic(pages[0], KM_USER0); - err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr, + err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); kunmap_atomic(vaddr, KM_USER0); if (err) goto out; - in_iov = page_address(iov_page); + in_iov = iov_page; out_iov = in_iov + in_iovs; err = fuse_verify_ioctl_iov(in_iov, in_iovs); @@ -1891,8 +1934,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, out: if (req) fuse_put_request(fc, req); - if (iov_page) - __free_page(iov_page); + free_page((unsigned long) iov_page); while (num_pages) __free_page(pages[--num_pages]); kfree(pages); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 57d4a3a0f102..ae5744a2f9e9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -53,6 +53,12 @@ extern struct mutex fuse_mutex; extern unsigned max_user_bgreq; extern unsigned max_user_congthresh; +/* One forget request */ +struct fuse_forget_link { + struct fuse_forget_one forget_one; + struct fuse_forget_link *next; +}; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -66,7 +72,7 @@ struct fuse_inode { u64 nlookup; /** The request used for sending the FORGET message */ - struct fuse_req *forget_req; + struct fuse_forget_link *forget; /** Time in jiffies until the file attributes are valid */ u64 i_time; @@ -255,7 +261,6 @@ struct fuse_req { /** Data for asynchronous requests */ union { - struct fuse_forget_in forget_in; struct { struct fuse_release_in in; struct path path; @@ -369,6 +374,13 @@ struct fuse_conn { /** Pending interrupts */ struct list_head interrupts; + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ @@ -543,8 +555,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, /** * Send FORGET command */ -void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - u64 nodeid, u64 nlookup); +void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup); + +struct fuse_forget_link *fuse_alloc_forget(void); /** * Initialize READ or READDIR request @@ -656,11 +670,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); /** - * Send a request with no reply - */ -void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); - -/** * Send a request in the background */ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cfce3ad86a92..f62b32cffea9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -71,6 +71,11 @@ struct fuse_mount_data { unsigned blksize; }; +struct fuse_forget_link *fuse_alloc_forget() +{ + return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); +} + static struct inode *fuse_alloc_inode(struct super_block *sb) { struct inode *inode; @@ -90,8 +95,8 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); - fi->forget_req = fuse_request_alloc(); - if (!fi->forget_req) { + fi->forget = fuse_alloc_forget(); + if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); return NULL; } @@ -99,27 +104,20 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) return inode; } -static void fuse_destroy_inode(struct inode *inode) +static void fuse_i_callback(struct rcu_head *head) { - struct fuse_inode *fi = get_fuse_inode(inode); - BUG_ON(!list_empty(&fi->write_files)); - BUG_ON(!list_empty(&fi->queued_writes)); - if (fi->forget_req) - fuse_request_free(fi->forget_req); + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(fuse_inode_cachep, inode); } -void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, - u64 nodeid, u64 nlookup) +static void fuse_destroy_inode(struct inode *inode) { - struct fuse_forget_in *inarg = &req->misc.forget_in; - inarg->nlookup = nlookup; - req->in.h.opcode = FUSE_FORGET; - req->in.h.nodeid = nodeid; - req->in.numargs = 1; - req->in.args[0].size = sizeof(struct fuse_forget_in); - req->in.args[0].value = inarg; - fuse_request_send_noreply(fc, req); + struct fuse_inode *fi = get_fuse_inode(inode); + BUG_ON(!list_empty(&fi->write_files)); + BUG_ON(!list_empty(&fi->queued_writes)); + kfree(fi->forget); + call_rcu(&inode->i_rcu, fuse_i_callback); } static void fuse_evict_inode(struct inode *inode) @@ -129,8 +127,8 @@ static void fuse_evict_inode(struct inode *inode) if (inode->i_sb->s_flags & MS_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup); - fi->forget_req = NULL; + fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); + fi->forget = NULL; } } @@ -534,6 +532,7 @@ void fuse_conn_init(struct fuse_conn *fc) INIT_LIST_HEAD(&fc->interrupts); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); + fc->forget_list_tail = &fc->forget_list_head; atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; @@ -619,7 +618,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, entry = d_obtain_alias(inode); if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { - entry->d_op = &fuse_dentry_operations; + d_set_d_op(entry, &fuse_dentry_operations); fuse_invalidate_entry_cache(entry); } @@ -721,7 +720,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { - parent->d_op = &fuse_dentry_operations; + d_set_d_op(parent, &fuse_dentry_operations); fuse_invalidate_entry_cache(parent); } diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 6bc9e3a5a693..06c48a891832 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -190,14 +190,20 @@ generic_acl_chmod(struct inode *inode) } int -generic_check_acl(struct inode *inode, int mask) +generic_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS); - - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + } else { + struct posix_acl *acl; + + acl = get_cached_acl(inode, ACL_TYPE_ACCESS); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } } return -EAGAIN; } diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 48171f4c943d..7118f1a780a9 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) * Returns: errno */ -int gfs2_check_acl(struct inode *inode, int mask) +int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int error; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index b522b0cb39ea..a93907c8159b 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -16,7 +16,7 @@ #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" #define GFS2_ACL_MAX_ENTRIES 25 -extern int gfs2_check_acl(struct inode *inode, int mask); +extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int); extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); extern const struct xattr_handler gfs2_xattr_system_handler; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5476c066d4ee..3c4039d5eef1 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, int metadata; unsigned int revokes = 0; int x; - int error; + int error = 0; if (!*top) sm->sm_first = 0; @@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; - error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); + if (ip != GFS2_I(sdp->sd_rindex)) + error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); + else if (!sdp->sd_rgrps) + error = gfs2_ri_update(ip); + if (error) return error; @@ -879,7 +883,8 @@ out_rg_gunlock: out_rlist: gfs2_rlist_free(&rlist); out: - gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); + if (ip != GFS2_I(sdp->sd_rindex)) + gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); return error; } diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 6798755b3858..4a456338b873 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -11,6 +11,7 @@ #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/gfs2_ondisk.h> +#include <linux/namei.h> #include <linux/crc32.h> #include "gfs2.h" @@ -34,15 +35,23 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) { - struct dentry *parent = dget_parent(dentry); - struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); - struct gfs2_inode *dip = GFS2_I(parent->d_inode); - struct inode *inode = dentry->d_inode; + struct dentry *parent; + struct gfs2_sbd *sdp; + struct gfs2_inode *dip; + struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; int error; int had_lock = 0; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + parent = dget_parent(dentry); + sdp = GFS2_SB(parent->d_inode); + dip = GFS2_I(parent->d_inode); + inode = dentry->d_inode; + if (inode) { if (is_bad_inode(inode)) goto invalid; @@ -100,13 +109,14 @@ fail: return 0; } -static int gfs2_dhash(struct dentry *dentry, struct qstr *str) +static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, + struct qstr *str) { str->hash = gfs2_disk_hash(str->name, str->len); return 0; } -static int gfs2_dentry_delete(struct dentry *dentry) +static int gfs2_dentry_delete(const struct dentry *dentry) { struct gfs2_inode *ginode; diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 5ab3839dfcb9..97012ecff560 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } @@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, out_inode: dentry = d_obtain_alias(inode); if (!IS_ERR(dentry)) - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); return dentry; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index aa996471ec5c..fca6689e12e6 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) !capable(CAP_LINUX_IMMUTABLE)) goto out; if (!IS_IMMUTABLE(inode)) { - error = gfs2_permission(inode, MAY_WRITE); + error = gfs2_permission(inode, MAY_WRITE, 0); if (error) goto out; } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f92c17704169..08a8beb152e6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -541,21 +541,6 @@ out_locked: spin_unlock(&gl->gl_spin); } -static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, - unsigned int req_state, - unsigned int flags) -{ - int ret = LM_OUT_ERROR; - - if (!sdp->sd_lockstruct.ls_ops->lm_lock) - return req_state == LM_ST_UNLOCKED ? 0 : req_state; - - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, - req_state, flags); - return ret; -} - /** * do_xmote - Calls the DLM to change the state of a lock * @gl: The lock state @@ -575,13 +560,14 @@ __acquires(&gl->gl_spin) lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); - BUG_ON(gl->gl_state == target); - BUG_ON(gl->gl_state == gl->gl_target); + GLOCK_BUG_ON(gl, gl->gl_state == target); + GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && glops->go_inval) { set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); do_error(gl, 0); /* Fail queued try locks */ } + gl->gl_req = target; spin_unlock(&gl->gl_spin); if (glops->go_xmote_th) glops->go_xmote_th(gl); @@ -594,15 +580,17 @@ __acquires(&gl->gl_spin) gl->gl_state == LM_ST_DEFERRED) && !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) lck_flags |= LM_FLAG_TRY_1CB; - ret = gfs2_lm_lock(sdp, gl, target, lck_flags); - if (!(ret & LM_OUT_ASYNC)) { - finish_xmote(gl, ret); + if (sdp->sd_lockstruct.ls_ops->lm_lock) { + /* lock_dlm */ + ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); + GLOCK_BUG_ON(gl, ret); + } else { /* lock_nolock */ + finish_xmote(gl, target); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); - } else { - GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); } + spin_lock(&gl->gl_spin); } @@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); + if (seq) { struct gfs2_glock_iter *gi = seq->private; vsprintf(gi->string, fmt, args); seq_printf(seq, gi->string); } else { - printk(KERN_ERR " "); - vprintk(fmt, args); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_ERR " %pV", &vaf); } + va_end(args); } @@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) * @gl: Pointer to the glock * @ret: The return value from the dlm * + * The gl_reply field is under the gl_spin lock so that it is ok + * to use a bitfield shared with other glock state fields. */ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + spin_lock(&gl->gl_spin); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { - spin_lock(&gl->gl_spin); if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); spin_unlock(&gl->gl_spin); return; } - spin_unlock(&gl->gl_spin); } + + spin_unlock(&gl->gl_spin); set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + smp_wmb(); gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); @@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { struct task_struct *gh_owner = NULL; - char buffer[KSYM_SYMBOL_LEN]; char flags_buf[32]; - sprint_symbol(buffer, gh->gh_ip); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", - state2str(gh->gh_state), - hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), - gh->gh_error, - gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, - gh_owner ? gh_owner->comm : "(ended)", buffer); + gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", + state2str(gh->gh_state), + hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), + gh->gh_error, + gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, + gh_owner ? gh_owner->comm : "(ended)", + (void *)gh->gh_ip); return 0; } @@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void) } #endif - glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | + glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZEABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); - gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | - WQ_FREEZEABLE, 0); + gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", + WQ_MEM_RECLAIM | WQ_FREEZEABLE, + 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); return PTR_ERR(gfs2_delete_workqueue); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index db1c26d6d220..691851ceb615 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -87,11 +87,10 @@ enum { #define GL_ASYNC 0x00000040 #define GL_EXACT 0x00000080 #define GL_SKIP 0x00000100 -#define GL_ATIME 0x00000200 #define GL_NOCACHE 0x00000400 /* - * lm_lock() and lm_async_cb return flags + * lm_async_cb return flags * * LM_OUT_ST_MASK * Masks the lower two bits of lock state in the returned value. @@ -99,15 +98,11 @@ enum { * LM_OUT_CANCELED * The lock request was canceled. * - * LM_OUT_ASYNC - * The result of the request will be returned in an LM_CB_ASYNC callback. - * */ #define LM_OUT_ST_MASK 0x00000003 #define LM_OUT_CANCELED 0x00000008 -#define LM_OUT_ASYNC 0x00000080 -#define LM_OUT_ERROR 0x00000100 +#define LM_OUT_ERROR 0x00000004 /* * lm_recovery_done() messages @@ -124,25 +119,12 @@ struct lm_lockops { void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); - unsigned int (*lm_lock) (struct gfs2_glock *gl, - unsigned int req_state, unsigned int flags); + int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, + unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); const match_table_t *lm_tokens; }; -#define LM_FLAG_TRY 0x00000001 -#define LM_FLAG_TRY_1CB 0x00000002 -#define LM_FLAG_NOEXP 0x00000004 -#define LM_FLAG_ANY 0x00000008 -#define LM_FLAG_PRIORITY 0x00000010 - -#define GL_ASYNC 0x00000040 -#define GL_EXACT 0x00000080 -#define GL_SKIP 0x00000100 -#define GL_NOCACHE 0x00000400 - -#define GLR_TRYFAILED 13 - extern struct workqueue_struct *gfs2_delete_workqueue; static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { @@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp, int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); + +__attribute__ ((format(printf, 2, 3))) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 0d149dcc04e5..263561bf1a50 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl) if (gl->gl_state != LM_ST_UNLOCKED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - flush_workqueue(gfs2_delete_workqueue); gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 764fbb49efc8..a79790c06275 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -11,6 +11,7 @@ #define __INCORE_DOT_H__ #include <linux/fs.h> +#include <linux/kobject.h> #include <linux/workqueue.h> #include <linux/dlm.h> #include <linux/buffer_head.h> @@ -207,12 +208,14 @@ struct gfs2_glock { spinlock_t gl_spin; - unsigned int gl_state; - unsigned int gl_target; - unsigned int gl_reply; + /* State fields protected by gl_spin */ + unsigned int gl_state:2, /* Current state */ + gl_target:2, /* Target state */ + gl_demote_state:2, /* State requested by remote node */ + gl_req:2, /* State in last dlm request */ + gl_reply:8; /* Last reply from the dlm */ + unsigned int gl_hash; - unsigned int gl_req; - unsigned int gl_demote_state; /* state requested by remote node */ unsigned long gl_demote_time; /* time of first demote request */ struct list_head gl_holders; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e1213f7f9217..2232b3c780bd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -509,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, } if (!is_root) { - error = gfs2_permission(dir, MAY_EXEC); + error = gfs2_permission(dir, MAY_EXEC, 0); if (error) goto out; } @@ -539,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, { int error; - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); if (error) return error; @@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) if (error) return error; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - setattr_copy(inode, attr); mark_inode_dirty(inode); - - gfs2_assert_warn(GFS2_SB(inode), !error); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index d8499fadcc53..732a183efdb3 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -113,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, extern struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); -extern int gfs2_permission(struct inode *inode, int mask); +extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 1c09425b45fd..6e493aee28f8 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, return lkf; } -static unsigned int gdlm_lock(struct gfs2_glock *gl, - unsigned int req_state, unsigned int flags) +static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, + unsigned int flags) { struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; - int error; int req; u32 lkf; - gl->gl_req = req_state; req = make_mode(req_state); lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); @@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, * Submit the actual lock request. */ - error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, - GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); - if (error == -EAGAIN) - return 0; - if (error) - return LM_OUT_ERROR; - return LM_OUT_ASYNC; + return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, + GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); } static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b81..2aeabd4218cc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, iput(inode); return -ENOMEM; } - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); *dptr = dentry; return 0; } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 12cbea7502c2..1501db4f0e6d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; - dentry->d_op = &gfs2_dops; + d_set_d_op(dentry, &gfs2_dops); inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) @@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_child; - error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); if (error) goto out_gunlock; @@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (IS_APPEND(&dip->i_inode)) return -EPERM; - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); if (error) return error; @@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } } } else { - error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0); if (error) goto out_gunlock; @@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Check out the dir to be renamed */ if (dir_rename) { - error = gfs2_permission(odentry->d_inode, MAY_WRITE); + error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0); if (error) goto out_gunlock; } @@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) * Returns: errno */ -int gfs2_permission(struct inode *inode, int mask) +int gfs2_permission(struct inode *inode, int mask, unsigned int flags) { - struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_inode *ip; struct gfs2_holder i_gh; int error; int unlock = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) @@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask) if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) error = -EACCES; else - error = generic_permission(inode, mask, gfs2_check_acl); + error = generic_permission(inode, mask, flags, gfs2_check_acl); if (unlock) gfs2_glock_dq_uninit(&i_gh); @@ -1069,7 +1073,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct buffer_head *dibh; u32 ouid, ogid, nuid, ngid; int error; @@ -1100,25 +1103,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out_gunlock_q; - error = gfs2_meta_inode_buffer(ip, &dibh); + error = gfs2_setattr_simple(ip, attr); if (error) goto out_end_trans; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - int error; - - error = vmtruncate(inode, attr->ia_size); - gfs2_assert_warn(sdp, !error); - } - - setattr_copy(inode, attr); - mark_inode_dirty(inode); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); gfs2_quota_change(ip, -blocks, ouid, ogid); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index f606baf9ba72..a689901963de 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); qd->qd_qb.qb_limit = qp->qu_limit; } + if (fdq->d_fieldmask & FS_DQ_BCOUNT) { + qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = qp->qu_value; + } } /* Write the quota into the quota file on disk */ @@ -1509,7 +1513,7 @@ out: } /* GFS2 only supports a subset of the XFS fields */ -#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) +#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, struct fs_disk_quota *fdq) @@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if ((fdq->d_fieldmask & FS_DQ_BSOFT) && ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) fdq->d_fieldmask ^= FS_DQ_BSOFT; + if ((fdq->d_fieldmask & FS_DQ_BHARD) && ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) fdq->d_fieldmask ^= FS_DQ_BHARD; + + if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && + ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) + fdq->d_fieldmask ^= FS_DQ_BCOUNT; + if (fdq->d_fieldmask == 0) goto out_i; @@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = { .get_dqblk = gfs2_get_dqblk, .set_dqblk = gfs2_set_dqblk, }; - diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 33c8407b876f..7293ea27020c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) for (rgrps = 0;; rgrps++) { loff_t pos = rgrps * sizeof(struct gfs2_rindex); - if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) + if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) break; error = gfs2_internal_read(ip, &ra_state, buf, &pos, sizeof(struct gfs2_rindex)); @@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, * Returns: 0 on successful update, error code otherwise */ -static int gfs2_ri_update(struct gfs2_inode *ip) +int gfs2_ri_update(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct inode *inode = &ip->i_inode; @@ -614,46 +614,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip) } /** - * gfs2_ri_update_special - Pull in a new resource index from the disk - * - * This is a special version that's safe to call from gfs2_inplace_reserve_i. - * In this case we know that we don't have any resource groups in memory yet. - * - * @ip: pointer to the rindex inode - * - * Returns: 0 on successful update, error code otherwise - */ -static int gfs2_ri_update_special(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct inode *inode = &ip->i_inode; - struct file_ra_state ra_state; - struct gfs2_rgrpd *rgd; - unsigned int max_data = 0; - int error; - - file_ra_state_init(&ra_state, inode->i_mapping); - for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { - /* Ignore partials */ - if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > - i_size_read(inode)) - break; - error = read_rindex_entry(ip, &ra_state); - if (error) { - clear_rgrpdi(sdp); - return error; - } - } - list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) - if (rgd->rd_data > max_data) - max_data = rgd->rd_data; - sdp->sd_max_rg_data = max_data; - - sdp->sd_rindex_uptodate = 1; - return 0; -} - -/** * gfs2_rindex_hold - Grab a lock on the rindex * @sdp: The GFS2 superblock * @ri_gh: the glock holder @@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, error = gfs2_rindex_hold(sdp, &al->al_ri_gh); else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ - error = gfs2_ri_update_special(ip); + error = gfs2_ri_update(ip); if (error) return error; } +try_again: do { error = get_local_rgrp(ip, &last_unlinked); /* If there is no space, flushing the log may release some */ - if (error) + if (error) { + if (ip == GFS2_I(sdp->sd_rindex) && + !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + return error; + goto try_again; + } gfs2_log_flush(sdp, NULL); + } } while (error && tries++ < 3); if (error) { diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 0e35c0466f9a..50c2bb04369c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, extern void gfs2_inplace_release(struct gfs2_inode *ip); +extern int gfs2_ri_update(struct gfs2_inode *ip); extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2b2c4997430b..16c2ecac7eb7 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) return &ip->i_inode; } -static void gfs2_destroy_inode(struct inode *inode) +static void gfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(gfs2_inode_cachep, inode); } +static void gfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, gfs2_i_callback); +} + const struct super_operations gfs2_super_ops = { .alloc_inode = gfs2_alloc_inode, .destroy_inode = gfs2_destroy_inode, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 30b58f07c8a6..439b61c03262 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1296,10 +1296,8 @@ fail: int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) { - struct inode *inode = &ip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_ea_location el; - struct buffer_head *dibh; int error; error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); @@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) if (error) return error; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto out_trans_end; - - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - int error; - - error = vmtruncate(inode, attr->ia_size); - gfs2_assert_warn(GFS2_SB(inode), !error); - } - - setattr_copy(inode, attr); - mark_inode_dirty(inode); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - -out_trans_end: + error = gfs2_setattr_simple(ip, attr); gfs2_trans_end(sdp); return error; } diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 2b3b8611b41b..ea4aefe7c652 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; int res; - dentry->d_op = &hfs_dentry_operations; + d_set_d_op(dentry, &hfs_dentry_operations); hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index c8cffb81e849..ad97c2d58287 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -213,10 +213,14 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *); /* string.c */ extern const struct dentry_operations hfs_dentry_operations; -extern int hfs_hash_dentry(struct dentry *, struct qstr *); +extern int hfs_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); extern int hfs_strcmp(const unsigned char *, unsigned int, const unsigned char *, unsigned int); -extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +extern int hfs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); /* trans.c */ extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); diff --git a/fs/hfs/string.c b/fs/hfs/string.c index 927a5af79428..495a976a3cc9 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c @@ -51,7 +51,8 @@ static unsigned char caseorder[256] = { /* * Hash a string to an integer in a case-independent way */ -int hfs_hash_dentry(struct dentry *dentry, struct qstr *this) +int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *this) { const unsigned char *name = this->name; unsigned int hash, len = this->len; @@ -92,21 +93,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1, * Test for equality of two strings in the HFS filename character ordering. * return 1 on failure and 0 on success */ -int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) +int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { const unsigned char *n1, *n2; - int len; - len = s1->len; if (len >= HFS_NAMELEN) { - if (s2->len < HFS_NAMELEN) + if (name->len < HFS_NAMELEN) return 1; len = HFS_NAMELEN; - } else if (len != s2->len) + } else if (len != name->len) return 1; - n1 = s1->name; - n2 = s2->name; + n1 = str; + n2 = name->name; while (len--) { if (caseorder[*n1++] != caseorder[*n2++]) return 1; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4824c27cebb8..0bef62aa4f42 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb) return i ? &i->vfs_inode : NULL; } -static void hfs_destroy_inode(struct inode *inode) +static void hfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); } +static void hfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hfs_i_callback); +} + static const struct super_operations hfs_super_operations = { .alloc_inode = hfs_alloc_inode, .destroy_inode = hfs_destroy_inode, @@ -427,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto bail_iput; - sb->s_root->d_op = &hfs_dentry_operations; + d_set_d_op(sb->s_root, &hfs_dentry_operations); /* everything's okay */ return 0; diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 7478f5c219aa..19cf291eb91f 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -8,15 +8,20 @@ * This file contains the code to do various system dependent things. */ +#include <linux/namei.h> #include "hfs_fs.h" /* dentry case-handling: just lowercase everything */ static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; + struct inode *inode; int diff; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; if(!inode) return 1; diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index d182438c7ae4..5d799c13205f 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -22,7 +22,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) return -ENOMEM; fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; - dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); + dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", + tree->cnid, __builtin_return_address(0)); mutex_lock(&tree->tree_lock); return 0; } @@ -31,7 +32,8 @@ void hfs_find_exit(struct hfs_find_data *fd) { hfs_bnode_put(fd->bnode); kfree(fd->search_key); - dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); + dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", + fd->tree->cnid, __builtin_return_address(0)); mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; } diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index ad57f5991eb1..1cad80c789cb 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -15,7 +15,8 @@ #define PAGE_CACHE_BITS (PAGE_CACHE_SIZE * 8) -int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) +int hfsplus_block_allocate(struct super_block *sb, u32 size, + u32 offset, u32 *max) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct page *page; diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 29da6574ba77..1c42cc5b899f 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -42,7 +42,7 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off) { __be16 data; - // optimize later... + /* TODO: optimize later... */ hfs_bnode_read(node, &data, off, 2); return be16_to_cpu(data); } @@ -50,7 +50,7 @@ u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off) u8 hfs_bnode_read_u8(struct hfs_bnode *node, int off) { u8 data; - // optimize later... + /* TODO: optimize later... */ hfs_bnode_read(node, &data, off, 1); return data; } @@ -96,7 +96,7 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len) void hfs_bnode_write_u16(struct hfs_bnode *node, int off, u16 data) { __be16 v = cpu_to_be16(data); - // optimize later... + /* TODO: optimize later... */ hfs_bnode_write(node, &v, off, 2); } @@ -212,7 +212,8 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) dst_page--; } src -= len; - memmove(kmap(*dst_page) + src, kmap(*src_page) + src, len); + memmove(kmap(*dst_page) + src, + kmap(*src_page) + src, len); kunmap(*src_page); set_page_dirty(*dst_page); kunmap(*dst_page); @@ -250,14 +251,16 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) if (src == dst) { l = min(len, (int)PAGE_CACHE_SIZE - src); - memmove(kmap(*dst_page) + src, kmap(*src_page) + src, l); + memmove(kmap(*dst_page) + src, + kmap(*src_page) + src, l); kunmap(*src_page); set_page_dirty(*dst_page); kunmap(*dst_page); while ((len -= l) != 0) { l = min(len, (int)PAGE_CACHE_SIZE); - memmove(kmap(*++dst_page), kmap(*++src_page), l); + memmove(kmap(*++dst_page), + kmap(*++src_page), l); kunmap(*src_page); set_page_dirty(*dst_page); kunmap(*dst_page); @@ -268,7 +271,8 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) do { src_ptr = kmap(*src_page) + src; dst_ptr = kmap(*dst_page) + dst; - if (PAGE_CACHE_SIZE - src < PAGE_CACHE_SIZE - dst) { + if (PAGE_CACHE_SIZE - src < + PAGE_CACHE_SIZE - dst) { l = PAGE_CACHE_SIZE - src; src = 0; dst += l; @@ -340,7 +344,8 @@ void hfs_bnode_unlink(struct hfs_bnode *node) return; tmp->next = node->next; cnid = cpu_to_be32(tmp->next); - hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, next), 4); + hfs_bnode_write(tmp, &cnid, + offsetof(struct hfs_bnode_desc, next), 4); hfs_bnode_put(tmp); } else if (node->type == HFS_NODE_LEAF) tree->leaf_head = node->next; @@ -351,15 +356,15 @@ void hfs_bnode_unlink(struct hfs_bnode *node) return; tmp->prev = node->prev; cnid = cpu_to_be32(tmp->prev); - hfs_bnode_write(tmp, &cnid, offsetof(struct hfs_bnode_desc, prev), 4); + hfs_bnode_write(tmp, &cnid, + offsetof(struct hfs_bnode_desc, prev), 4); hfs_bnode_put(tmp); } else if (node->type == HFS_NODE_LEAF) tree->leaf_tail = node->prev; - // move down? - if (!node->prev && !node->next) { - printk(KERN_DEBUG "hfs_btree_del_level\n"); - } + /* move down? */ + if (!node->prev && !node->next) + dprint(DBG_BNODE_MOD, "hfs_btree_del_level\n"); if (!node->parent) { tree->root = 0; tree->depth = 0; @@ -379,16 +384,16 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) struct hfs_bnode *node; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); + printk(KERN_ERR "hfs: request for non-existent node " + "%d in B*Tree\n", + cnid); return NULL; } for (node = tree->node_hash[hfs_bnode_hash(cnid)]; - node; node = node->next_hash) { - if (node->this == cnid) { + node; node = node->next_hash) + if (node->this == cnid) return node; - } - } return NULL; } @@ -402,7 +407,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) loff_t off; if (cnid >= tree->node_count) { - printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid); + printk(KERN_ERR "hfs: request for non-existent node " + "%d in B*Tree\n", + cnid); return NULL; } @@ -429,7 +436,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) } else { spin_unlock(&tree->hash_lock); kfree(node); - wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags)); + wait_event(node2->lock_wq, + !test_bit(HFS_BNODE_NEW, &node2->flags)); return node2; } spin_unlock(&tree->hash_lock); @@ -483,7 +491,8 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) if (node) { hfs_bnode_get(node); spin_unlock(&tree->hash_lock); - wait_event(node->lock_wq, !test_bit(HFS_BNODE_NEW, &node->flags)); + wait_event(node->lock_wq, + !test_bit(HFS_BNODE_NEW, &node->flags)); if (test_bit(HFS_BNODE_ERROR, &node->flags)) goto node_error; return node; @@ -497,7 +506,8 @@ struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num) if (!test_bit(HFS_BNODE_NEW, &node->flags)) return node; - desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + node->page_offset); + desc = (struct hfs_bnode_desc *)(kmap(node->page[0]) + + node->page_offset); node->prev = be32_to_cpu(desc->prev); node->next = be32_to_cpu(desc->next); node->num_recs = be16_to_cpu(desc->num_recs); @@ -556,11 +566,13 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { - //int i; +#if 0 + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); +#endif kfree(node); } @@ -607,7 +619,8 @@ void hfs_bnode_get(struct hfs_bnode *node) if (node) { atomic_inc(&node->refcnt); dprint(DBG_BNODE_REFS, "get_node(%d:%d): %d\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); } } @@ -619,7 +632,8 @@ void hfs_bnode_put(struct hfs_bnode *node) int i; dprint(DBG_BNODE_REFS, "put_node(%d:%d): %d\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); BUG_ON(!atomic_read(&node->refcnt)); if (!atomic_dec_and_lock(&node->refcnt, &tree->hash_lock)) return; diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 2f39d05443e1..2312de34bd42 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -39,7 +39,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) { retval = node->tree->max_key_len + 2; } else { - recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); + recoff = hfs_bnode_read_u16(node, + node->tree->node_size - (rec + 1) * 2); if (!recoff) return 0; @@ -84,7 +85,8 @@ again: end_rec_off = tree->node_size - (node->num_recs + 1) * 2; end_off = hfs_bnode_read_u16(node, end_rec_off); end_rec_off -= 2; - dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", rec, size, end_off, end_rec_off); + dprint(DBG_BNODE_MOD, "insert_rec: %d, %d, %d, %d\n", + rec, size, end_off, end_rec_off); if (size > end_rec_off - end_off) { if (new_node) panic("not enough room!\n"); @@ -99,7 +101,9 @@ again: } node->num_recs++; /* write new last offset */ - hfs_bnode_write_u16(node, offsetof(struct hfs_bnode_desc, num_recs), node->num_recs); + hfs_bnode_write_u16(node, + offsetof(struct hfs_bnode_desc, num_recs), + node->num_recs); hfs_bnode_write_u16(node, end_rec_off, end_off + size); data_off = end_off; data_rec_off = end_rec_off + 2; @@ -151,7 +155,8 @@ skip: if (tree->attributes & HFS_TREE_VARIDXKEYS) key_len = be16_to_cpu(fd->search_key->key_len) + 2; else { - fd->search_key->key_len = cpu_to_be16(tree->max_key_len); + fd->search_key->key_len = + cpu_to_be16(tree->max_key_len); key_len = tree->max_key_len + 2; } goto again; @@ -180,7 +185,8 @@ again: mark_inode_dirty(tree->inode); } hfs_bnode_dump(node); - dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", fd->record, fd->keylength + fd->entrylength); + dprint(DBG_BNODE_MOD, "remove_rec: %d, %d\n", + fd->record, fd->keylength + fd->entrylength); if (!--node->num_recs) { hfs_bnode_unlink(node); if (!node->parent) @@ -194,7 +200,9 @@ again: __hfs_brec_find(node, fd); goto again; } - hfs_bnode_write_u16(node, offsetof(struct hfs_bnode_desc, num_recs), node->num_recs); + hfs_bnode_write_u16(node, + offsetof(struct hfs_bnode_desc, num_recs), + node->num_recs); if (rec_off == end_off) goto skip; @@ -364,7 +372,8 @@ again: newkeylen = hfs_bnode_read_u16(node, 14) + 2; else fd->keylength = newkeylen = tree->max_key_len + 2; - dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", rec, fd->keylength, newkeylen); + dprint(DBG_BNODE_MOD, "update_rec: %d, %d, %d\n", + rec, fd->keylength, newkeylen); rec_off = tree->node_size - (rec + 2) * 2; end_rec_off = tree->node_size - (parent->num_recs + 1) * 2; @@ -375,7 +384,7 @@ again: end_off = hfs_bnode_read_u16(parent, end_rec_off); if (end_rec_off - end_off < diff) { - printk(KERN_DEBUG "hfs: splitting index node...\n"); + dprint(DBG_BNODE_MOD, "hfs: splitting index node.\n"); fd->bnode = parent; new_node = hfs_bnode_split(fd); if (IS_ERR(new_node)) @@ -383,7 +392,8 @@ again: parent = fd->bnode; rec = fd->record; rec_off = tree->node_size - (rec + 2) * 2; - end_rec_off = tree->node_size - (parent->num_recs + 1) * 2; + end_rec_off = tree->node_size - + (parent->num_recs + 1) * 2; } } diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 22e4d4e32999..21023d9f8ff3 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -51,7 +51,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) goto free_inode; /* Load the header */ - head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); + head = (struct hfs_btree_header_rec *)(kmap(page) + + sizeof(struct hfs_bnode_desc)); tree->root = be32_to_cpu(head->root); tree->leaf_count = be32_to_cpu(head->leaf_count); tree->leaf_head = be32_to_cpu(head->leaf_head); @@ -115,7 +116,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) tree->node_size_shift = ffs(size) - 1; - tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + tree->pages_per_bnode = + (tree->node_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; kunmap(page); page_cache_release(page); @@ -144,8 +147,10 @@ void hfs_btree_close(struct hfs_btree *tree) while ((node = tree->node_hash[i])) { tree->node_hash[i] = node->next_hash; if (atomic_read(&node->refcnt)) - printk(KERN_CRIT "hfs: node %d:%d still has %d user(s)!\n", - node->tree->cnid, node->this, atomic_read(&node->refcnt)); + printk(KERN_CRIT "hfs: node %d:%d " + "still has %d user(s)!\n", + node->tree->cnid, node->this, + atomic_read(&node->refcnt)); hfs_bnode_free(node); tree->node_hash_cnt--; } @@ -166,7 +171,8 @@ void hfs_btree_write(struct hfs_btree *tree) return; /* Load the header */ page = node->page[0]; - head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); + head = (struct hfs_btree_header_rec *)(kmap(page) + + sizeof(struct hfs_bnode_desc)); head->root = cpu_to_be32(tree->root); head->leaf_count = cpu_to_be32(tree->leaf_count); @@ -272,7 +278,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) tree->free_nodes--; mark_inode_dirty(tree->inode); hfs_bnode_put(node); - return hfs_bnode_create(tree, idx); + return hfs_bnode_create(tree, + idx); } } } @@ -287,7 +294,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) kunmap(*pagep); nidx = node->next; if (!nidx) { - printk(KERN_DEBUG "hfs: create new bmap node...\n"); + dprint(DBG_BNODE_MOD, "hfs: create new bmap node.\n"); next_node = hfs_bmap_new_bmap(node, idx); } else next_node = hfs_bnode_find(tree, nidx); @@ -329,7 +336,9 @@ void hfs_bmap_free(struct hfs_bnode *node) hfs_bnode_put(node); if (!i) { /* panic */; - printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this); + printk(KERN_CRIT "hfs: unable to free bnode %u. " + "bmap not found!\n", + node->this); return; } node = hfs_bnode_find(tree, i); @@ -337,7 +346,9 @@ void hfs_bmap_free(struct hfs_bnode *node) return; if (node->type != HFS_NODE_MAP) { /* panic */; - printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type); + printk(KERN_CRIT "hfs: invalid bmap found! " + "(%u,%d)\n", + node->this, node->type); hfs_bnode_put(node); return; } @@ -350,7 +361,9 @@ void hfs_bmap_free(struct hfs_bnode *node) m = 1 << (~nidx & 7); byte = data[off]; if (!(byte & m)) { - printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type); + printk(KERN_CRIT "hfs: trying to free free bnode " + "%u(%d)\n", + node->this, node->type); kunmap(page); hfs_bnode_put(node); return; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 8af45fc5b051..b4ba1b319333 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -91,7 +91,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->dev = 0; } -static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) +static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, + u32 cnid, struct inode *inode) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); @@ -128,20 +129,32 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i if (cnid == inode->i_ino) { hfsplus_cat_set_perms(inode, &file->permissions); if (S_ISLNK(inode->i_mode)) { - file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); - file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); + file->user_info.fdType = + cpu_to_be32(HFSP_SYMLINK_TYPE); + file->user_info.fdCreator = + cpu_to_be32(HFSP_SYMLINK_CREATOR); } else { - file->user_info.fdType = cpu_to_be32(sbi->type); - file->user_info.fdCreator = cpu_to_be32(sbi->creator); + file->user_info.fdType = + cpu_to_be32(sbi->type); + file->user_info.fdCreator = + cpu_to_be32(sbi->creator); } - if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) - file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); + if (HFSPLUS_FLG_IMMUTABLE & + (file->permissions.rootflags | + file->permissions.userflags)) + file->flags |= + cpu_to_be16(HFSPLUS_FILE_LOCKED); } else { - file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); - file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); - file->user_info.fdFlags = cpu_to_be16(0x100); - file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; - file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); + file->user_info.fdType = + cpu_to_be32(HFSP_HARDLINK_TYPE); + file->user_info.fdCreator = + cpu_to_be32(HFSP_HFSPLUS_CREATOR); + file->user_info.fdFlags = + cpu_to_be16(0x100); + file->create_date = + HFSPLUS_I(sbi->hidden_dir)->create_date; + file->permissions.dev = + cpu_to_be32(HFSPLUS_I(inode)->linkid); } return sizeof(*file); } @@ -182,12 +195,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, return -EIO; } - hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), - &tmp.thread.nodeName); + hfsplus_cat_build_key_uni(fd->search_key, + be32_to_cpu(tmp.thread.parentID), + &tmp.thread.nodeName); return hfs_brec_find(fd); } -int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) +int hfsplus_create_cat(u32 cnid, struct inode *dir, + struct qstr *str, struct inode *inode) { struct super_block *sb = dir->i_sb; struct hfs_find_data fd; @@ -195,13 +210,15 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino int entry_size; int err; - dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); + dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", + str->name, cnid, inode->i_nlink); hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? + entry_size = hfsplus_fill_cat_thread(sb, &entry, + S_ISDIR(inode->i_mode) ? HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, - dir->i_ino, str); + dir->i_ino, str); err = hfs_brec_find(&fd); if (err != -ENOENT) { if (!err) @@ -227,7 +244,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct ino dir->i_size++; dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(dir); + hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); + hfs_find_exit(&fd); return 0; @@ -249,7 +267,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) int err, off; u16 type; - dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); + dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", + str ? str->name : NULL, cnid); hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (!str) { @@ -260,11 +279,15 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (err) goto out; - off = fd.entryoffset + offsetof(struct hfsplus_cat_thread, nodeName); + off = fd.entryoffset + + offsetof(struct hfsplus_cat_thread, nodeName); fd.search_key->cat.parent = cpu_to_be32(dir->i_ino); - hfs_bnode_read(fd.bnode, &fd.search_key->cat.name.length, off, 2); + hfs_bnode_read(fd.bnode, + &fd.search_key->cat.name.length, off, 2); len = be16_to_cpu(fd.search_key->cat.name.length) * 2; - hfs_bnode_read(fd.bnode, &fd.search_key->cat.name.unicode, off + 2, len); + hfs_bnode_read(fd.bnode, + &fd.search_key->cat.name.unicode, + off + 2, len); fd.search_key->key_len = cpu_to_be16(6 + len); } else hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); @@ -281,7 +304,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_DATA); #endif - off = fd.entryoffset + offsetof(struct hfsplus_cat_file, rsrc_fork); + off = fd.entryoffset + + offsetof(struct hfsplus_cat_file, rsrc_fork); hfs_bnode_read(fd.bnode, &fork, off, sizeof(fork)); hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); } @@ -308,7 +332,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dir->i_size--; dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(dir); + hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); out: hfs_find_exit(&fd); @@ -325,7 +349,8 @@ int hfsplus_rename_cat(u32 cnid, int entry_size, type; int err = 0; - dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, + dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", + cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); dst_fd = src_fd; @@ -353,7 +378,6 @@ int hfsplus_rename_cat(u32 cnid, goto out; dst_dir->i_size++; dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(dst_dir); /* finally remove the old entry */ hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); @@ -365,7 +389,6 @@ int hfsplus_rename_cat(u32 cnid, goto out; src_dir->i_size--; src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(src_dir); /* remove old thread entry */ hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); @@ -379,7 +402,8 @@ int hfsplus_rename_cat(u32 cnid, /* create new thread entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); - entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); + entry_size = hfsplus_fill_cat_thread(sb, &entry, type, + dst_dir->i_ino, dst_name); err = hfs_brec_find(&dst_fd); if (err != -ENOENT) { if (!err) @@ -387,6 +411,9 @@ int hfsplus_rename_cat(u32 cnid, goto out; } err = hfs_brec_insert(&dst_fd, &entry, entry_size); + + hfsplus_mark_inode_dirty(dst_dir, HFSPLUS_I_CAT_DIRTY); + hfsplus_mark_inode_dirty(src_dir, HFSPLUS_I_CAT_DIRTY); out: hfs_bnode_put(dst_fd.bnode); hfs_find_exit(&src_fd); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 9d59c0571f59..f896dc843026 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; - dentry->d_op = &hfsplus_dentry_operations; + d_set_d_op(dentry, &hfsplus_dentry_operations); dentry->d_fsdata = NULL; hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); @@ -66,11 +66,17 @@ again: goto fail; } cnid = be32_to_cpu(entry.file.id); - if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && - entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && - (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || - entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && - HFSPLUS_SB(sb)->hidden_dir) { + if (entry.file.user_info.fdType == + cpu_to_be32(HFSP_HARDLINK_TYPE) && + entry.file.user_info.fdCreator == + cpu_to_be32(HFSP_HFSPLUS_CREATOR) && + (entry.file.create_date == + HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)-> + create_date || + entry.file.create_date == + HFSPLUS_I(sb->s_root->d_inode)-> + create_date) && + HFSPLUS_SB(sb)->hidden_dir) { struct qstr str; char name[32]; @@ -83,11 +89,13 @@ again: linkid = 0; } else { dentry->d_fsdata = (void *)(unsigned long)cnid; - linkid = be32_to_cpu(entry.file.permissions.dev); + linkid = + be32_to_cpu(entry.file.permissions.dev); str.len = sprintf(name, "iNode%d", linkid); str.name = name; hfsplus_cat_build_key(sb, fd.search_key, - HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); + HFSPLUS_SB(sb)->hidden_dir->i_ino, + &str); goto again; } } else if (!dentry->d_fsdata) @@ -139,7 +147,8 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos++; /* fall through */ case 1: - hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, + fd.entrylength); if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { printk(KERN_ERR "hfs: bad catalog folder thread\n"); err = -EIO; @@ -169,14 +178,16 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) err = -EIO; goto out; } - hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, + fd.entrylength); type = be16_to_cpu(entry.type); len = HFSPLUS_MAX_STRLEN; err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len); if (err) goto out; if (type == HFSPLUS_FOLDER) { - if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { + if (fd.entrylength < + sizeof(struct hfsplus_cat_folder)) { printk(KERN_ERR "hfs: small dir entry\n"); err = -EIO; goto out; @@ -202,7 +213,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) err = -EIO; goto out; } - next: +next: filp->f_pos++; if (filp->f_pos >= inode->i_size) goto out; @@ -273,7 +284,8 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, HFSPLUS_I(inode)->linkid = id; cnid = sbi->next_cnid++; src_dentry->d_fsdata = (void *)(unsigned long)cnid; - res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); + res = hfsplus_create_cat(cnid, src_dir, + &src_dentry->d_name, inode); if (res) /* panic? */ goto out; @@ -485,6 +497,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { }; const struct file_operations hfsplus_dir_operations = { + .fsync = hfsplus_file_fsync, .read = generic_read_dir, .readdir = hfsplus_readdir, .unlocked_ioctl = hfsplus_ioctl, diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 0c9cb1820a52..52a0bcaa7b6d 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -83,7 +83,8 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) return be32_to_cpu(ext->start_block) + be32_to_cpu(ext->block_count); } -static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) +static void __hfsplus_ext_write_extent(struct inode *inode, + struct hfs_find_data *fd) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res; @@ -95,24 +96,32 @@ static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); res = hfs_brec_find(fd); - if (hip->flags & HFSPLUS_FLG_EXT_NEW) { + if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) return; hfs_brec_insert(fd, hip->cached_extents, sizeof(hfsplus_extent_rec)); - hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); + hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } else { if (res) return; hfs_bnode_write(fd->bnode, hip->cached_extents, fd->entryoffset, fd->entrylength); - hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; + hip->extent_state &= ~HFSPLUS_EXT_DIRTY; } + + /* + * We can't just use hfsplus_mark_inode_dirty here, because we + * also get called from hfsplus_write_inode, which should not + * redirty the inode. Instead the callers have to be careful + * to explicily mark the inode dirty, too. + */ + set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); } static void hfsplus_ext_write_extent_locked(struct inode *inode) { - if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { + if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); @@ -144,18 +153,20 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, return -ENOENT; if (fd->entrylength != sizeof(hfsplus_extent_rec)) return -EIO; - hfs_bnode_read(fd->bnode, extent, fd->entryoffset, sizeof(hfsplus_extent_rec)); + hfs_bnode_read(fd->bnode, extent, fd->entryoffset, + sizeof(hfsplus_extent_rec)); return 0; } -static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) +static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, + struct inode *inode, u32 block) { struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res; WARN_ON(!mutex_is_locked(&hip->extents_lock)); - if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) + if (hip->extent_state & HFSPLUS_EXT_DIRTY) __hfsplus_ext_write_extent(inode, fd); res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, @@ -164,10 +175,11 @@ static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct in HFSPLUS_TYPE_DATA); if (!res) { hip->cached_start = be32_to_cpu(fd->key->ext.start_block); - hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); + hip->cached_blocks = + hfsplus_ext_block_count(hip->cached_extents); } else { hip->cached_start = hip->cached_blocks = 0; - hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); + hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); } return res; } @@ -197,6 +209,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res = -EIO; u32 ablock, dblock, mask; + int was_dirty = 0; int shift; /* Convert inode block to disk allocation block */ @@ -223,27 +236,37 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, return -EIO; mutex_lock(&hip->extents_lock); + + /* + * hfsplus_ext_read_extent will write out a cached extent into + * the extents btree. In that case we may have to mark the inode + * dirty even for a pure read of an extent here. + */ + was_dirty = (hip->extent_state & HFSPLUS_EXT_DIRTY); res = hfsplus_ext_read_extent(inode, ablock); - if (!res) { - dblock = hfsplus_ext_find_block(hip->cached_extents, - ablock - hip->cached_start); - } else { + if (res) { mutex_unlock(&hip->extents_lock); return -EIO; } + dblock = hfsplus_ext_find_block(hip->cached_extents, + ablock - hip->cached_start); mutex_unlock(&hip->extents_lock); done: - dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); + dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", + inode->i_ino, (long long)iblock, dblock); mask = (1 << sbi->fs_shift) - 1; - map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); + map_bh(bh_result, sb, + (dblock << sbi->fs_shift) + sbi->blockoffset + + (iblock & mask)); if (create) { set_buffer_new(bh_result); hip->phys_size += sb->s_blocksize; hip->fs_blocks++; inode_add_bytes(inode, sb->s_blocksize); - mark_inode_dirty(inode); } + if (create || was_dirty) + mark_inode_dirty(inode); return 0; } @@ -326,7 +349,8 @@ found: } } -int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw *fork, int type) +int hfsplus_free_fork(struct super_block *sb, u32 cnid, + struct hfsplus_fork_raw *fork, int type) { struct hfs_find_data fd; hfsplus_extent_rec ext_entry; @@ -373,12 +397,13 @@ int hfsplus_file_extend(struct inode *inode) u32 start, len, goal; int res; - if (sbi->alloc_file->i_size * 8 < - sbi->total_blocks - sbi->free_blocks + 8) { - // extend alloc file - printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", - sbi->alloc_file->i_size * 8, - sbi->total_blocks, sbi->free_blocks); + if (sbi->total_blocks - sbi->free_blocks + 8 > + sbi->alloc_file->i_size * 8) { + /* extend alloc file */ + printk(KERN_ERR "hfs: extend alloc file! " + "(%llu,%u,%u)\n", + sbi->alloc_file->i_size * 8, + sbi->total_blocks, sbi->free_blocks); return -ENOSPC; } @@ -429,7 +454,7 @@ int hfsplus_file_extend(struct inode *inode) start, len); if (!res) { hfsplus_dump_extent(hip->cached_extents); - hip->flags |= HFSPLUS_FLG_EXT_DIRTY; + hip->extent_state |= HFSPLUS_EXT_DIRTY; hip->cached_blocks += len; } else if (res == -ENOSPC) goto insert_extent; @@ -438,7 +463,7 @@ out: mutex_unlock(&hip->extents_lock); if (!res) { hip->alloc_blocks += len; - mark_inode_dirty(inode); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); } return res; @@ -450,7 +475,7 @@ insert_extent: hip->cached_extents[0].start_block = cpu_to_be32(start); hip->cached_extents[0].block_count = cpu_to_be32(len); hfsplus_dump_extent(hip->cached_extents); - hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; + hip->extent_state |= HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW; hip->cached_start = hip->alloc_blocks; hip->cached_blocks = len; @@ -466,8 +491,9 @@ void hfsplus_file_truncate(struct inode *inode) u32 alloc_cnt, blk_cnt, start; int res; - dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", - inode->i_ino, (long long)hip->phys_size, inode->i_size); + dprint(DBG_INODE, "truncate: %lu, %llu -> %llu\n", + inode->i_ino, (long long)hip->phys_size, + inode->i_size); if (inode->i_size > hip->phys_size) { struct address_space *mapping = inode->i_mapping; @@ -481,7 +507,8 @@ void hfsplus_file_truncate(struct inode *inode) &page, &fsdata); if (res) return; - res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); + res = pagecache_write_end(NULL, mapping, size, + 0, 0, page, fsdata); if (res < 0) return; mark_inode_dirty(inode); @@ -513,12 +540,12 @@ void hfsplus_file_truncate(struct inode *inode) alloc_cnt - start, alloc_cnt - blk_cnt); hfsplus_dump_extent(hip->cached_extents); if (blk_cnt > start) { - hip->flags |= HFSPLUS_FLG_EXT_DIRTY; + hip->extent_state |= HFSPLUS_EXT_DIRTY; break; } alloc_cnt = start; hip->cached_start = hip->cached_blocks = 0; - hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); + hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); hfs_brec_remove(&fd); } hfs_find_exit(&fd); @@ -527,7 +554,8 @@ void hfsplus_file_truncate(struct inode *inode) hip->alloc_blocks = blk_cnt; out: hip->phys_size = inode->i_size; - hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; + hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> + sb->s_blocksize_bits; inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); - mark_inode_dirty(inode); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); } diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index cb3653efb57a..d6857523336d 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -23,13 +23,16 @@ #define DBG_EXTENT 0x00000020 #define DBG_BITMAP 0x00000040 -//#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) -//#define DBG_MASK (DBG_BNODE_MOD|DBG_CAT_MOD|DBG_INODE) -//#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT) +#if 0 +#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) +#define DBG_MASK (DBG_BNODE_MOD|DBG_CAT_MOD|DBG_INODE) +#define DBG_MASK (DBG_CAT_MOD|DBG_BNODE_REFS|DBG_INODE|DBG_EXTENT) +#endif #define DBG_MASK (0) #define dprint(flg, fmt, args...) \ - if (flg & DBG_MASK) printk(fmt , ## args) + if (flg & DBG_MASK) \ + printk(fmt , ## args) /* Runtime config options */ #define HFSPLUS_DEF_CR_TYPE 0x3F3F3F3F /* '????' */ @@ -37,7 +40,8 @@ #define HFSPLUS_TYPE_DATA 0x00 #define HFSPLUS_TYPE_RSRC 0xFF -typedef int (*btree_keycmp)(const hfsplus_btree_key *, const hfsplus_btree_key *); +typedef int (*btree_keycmp)(const hfsplus_btree_key *, + const hfsplus_btree_key *); #define NODE_HASH_SIZE 256 @@ -61,7 +65,6 @@ struct hfs_btree { unsigned int max_key_len; unsigned int depth; - //unsigned int map1_size, map_size; struct mutex tree_lock; unsigned int pages_per_bnode; @@ -107,8 +110,8 @@ struct hfsplus_vh; struct hfs_btree; struct hfsplus_sb_info { - struct buffer_head *s_vhbh; struct hfsplus_vh *s_vhdr; + struct hfsplus_vh *s_backup_vhdr; struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; struct hfs_btree *attr_tree; @@ -118,7 +121,8 @@ struct hfsplus_sb_info { /* Runtime variables */ u32 blockoffset; - u32 sect_count; + sector_t part_start; + sector_t sect_count; int fs_shift; /* immutable data from the volume header */ @@ -155,6 +159,12 @@ struct hfsplus_sb_info { #define HFSPLUS_SB_FORCE 2 #define HFSPLUS_SB_HFSX 3 #define HFSPLUS_SB_CASEFOLD 4 +#define HFSPLUS_SB_NOBARRIER 5 + +static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} struct hfsplus_inode_info { @@ -170,7 +180,7 @@ struct hfsplus_inode_info { u32 cached_blocks; hfsplus_extent_rec first_extents; hfsplus_extent_rec cached_extents; - unsigned long flags; + unsigned int extent_state; struct mutex extents_lock; /* @@ -185,6 +195,11 @@ struct hfsplus_inode_info { u32 linkid; /* + * Accessed using atomic bitops. + */ + unsigned long flags; + + /* * Protected by i_mutex. */ sector_t fs_blocks; @@ -195,12 +210,34 @@ struct hfsplus_inode_info { struct inode vfs_inode; }; -#define HFSPLUS_FLG_RSRC 0x0001 -#define HFSPLUS_FLG_EXT_DIRTY 0x0002 -#define HFSPLUS_FLG_EXT_NEW 0x0004 +#define HFSPLUS_EXT_DIRTY 0x0001 +#define HFSPLUS_EXT_NEW 0x0002 + +#define HFSPLUS_I_RSRC 0 /* represents a resource fork */ +#define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */ +#define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */ +#define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */ + +#define HFSPLUS_IS_RSRC(inode) \ + test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags) + +static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) +{ + return list_entry(inode, struct hfsplus_inode_info, vfs_inode); +} -#define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) -#define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) +/* + * Mark an inode dirty, and also mark the btree in which the + * specific type of metadata is stored. + * For data or metadata that gets written back by into the catalog btree + * by hfsplus_write_inode a plain mark_inode_dirty call is enough. + */ +static inline void hfsplus_mark_inode_dirty(struct inode *inode, + unsigned int flag) +{ + set_bit(flag, &HFSPLUS_I(inode)->flags); + mark_inode_dirty(inode); +} struct hfs_find_data { /* filled by caller */ @@ -318,9 +355,12 @@ int hfs_brec_read(struct hfs_find_data *, void *, int); int hfs_brec_goto(struct hfs_find_data *, int); /* catalog.c */ -int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *, u32, struct qstr *); +int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, + const hfsplus_btree_key *); +int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, + const hfsplus_btree_key *); +void hfsplus_cat_build_key(struct super_block *sb, + hfsplus_btree_key *, u32, struct qstr *); int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *); int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); int hfsplus_delete_cat(u32, struct inode *, struct qstr *); @@ -336,7 +376,8 @@ extern const struct file_operations hfsplus_dir_operations; int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); void hfsplus_ext_write_extent(struct inode *); int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); -int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); +int hfsplus_free_fork(struct super_block *, u32, + struct hfsplus_fork_raw *, int); int hfsplus_file_extend(struct inode *); void hfsplus_file_truncate(struct inode *); @@ -351,6 +392,7 @@ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *); int hfsplus_cat_write_inode(struct inode *); struct inode *hfsplus_new_inode(struct super_block *, int); void hfsplus_delete_inode(struct inode *); +int hfsplus_file_fsync(struct file *file, int datasync); /* ioctl.c */ long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); @@ -362,6 +404,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); /* options.c */ int hfsplus_parse_options(char *, struct hfsplus_sb_info *); +int hfsplus_parse_options_remount(char *input, int *force); void hfsplus_fill_defaults(struct hfsplus_sb_info *); int hfsplus_show_options(struct seq_file *, struct vfsmount *); @@ -375,45 +418,26 @@ extern u16 hfsplus_decompose_table[]; extern u16 hfsplus_compose_table[]; /* unicode.c */ -int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); -int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); -int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); -int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); -int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); -int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2); +int hfsplus_strcasecmp(const struct hfsplus_unistr *, + const struct hfsplus_unistr *); +int hfsplus_strcmp(const struct hfsplus_unistr *, + const struct hfsplus_unistr *); +int hfsplus_uni2asc(struct super_block *, + const struct hfsplus_unistr *, char *, int *); +int hfsplus_asc2uni(struct super_block *, + struct hfsplus_unistr *, const char *, int); +int hfsplus_hash_dentry(const struct dentry *dentry, + const struct inode *inode, struct qstr *str); +int hfsplus_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); - int hfs_part_find(struct super_block *, sector_t *, sector_t *); - -/* access macros */ -static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) -{ - return list_entry(inode, struct hfsplus_inode_info, vfs_inode); -} - -#define sb_bread512(sb, sec, data) ({ \ - struct buffer_head *__bh; \ - sector_t __block; \ - loff_t __start; \ - int __offset; \ - \ - __start = (loff_t)(sec) << HFSPLUS_SECTOR_SHIFT;\ - __block = __start >> (sb)->s_blocksize_bits; \ - __offset = __start & ((sb)->s_blocksize - 1); \ - __bh = sb_bread((sb), __block); \ - if (likely(__bh != NULL)) \ - data = (void *)(__bh->b_data + __offset);\ - else \ - data = NULL; \ - __bh; \ -}) +int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, + void *data, int rw); /* time macros */ #define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 6892899fd6fb..927cdd6d5bf5 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -36,7 +36,8 @@ #define HFSP_WRAPOFF_EMBEDSIG 0x7C #define HFSP_WRAPOFF_EMBEDEXT 0x7E -#define HFSP_HIDDENDIR_NAME "\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80HFS+ Private Data" +#define HFSP_HIDDENDIR_NAME \ + "\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80\xe2\x90\x80HFS+ Private Data" #define HFSP_HARDLINK_TYPE 0x686c6e6b /* 'hlnk' */ #define HFSP_HFSPLUS_CREATOR 0x6866732b /* 'hfs+' */ diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 8afd7e84f98d..a8df651747f0 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -8,6 +8,7 @@ * Inode handling routines */ +#include <linux/blkdev.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/pagemap.h> @@ -77,7 +78,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) if (!tree) return 0; if (tree->node_size >= PAGE_CACHE_SIZE) { - nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); + nidx = page->index >> + (tree->node_size_shift - PAGE_CACHE_SHIFT); spin_lock(&tree->hash_lock); node = hfs_bnode_findhash(tree, nidx); if (!node) @@ -90,7 +92,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) } spin_unlock(&tree->hash_lock); } else { - nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift); + nidx = page->index << + (PAGE_CACHE_SHIFT - tree->node_size_shift); i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift); spin_lock(&tree->hash_lock); do { @@ -166,8 +169,8 @@ const struct dentry_operations hfsplus_dentry_operations = { .d_compare = hfsplus_compare_dentry, }; -static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) +static struct dentry *hfsplus_file_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) { struct hfs_find_data fd; struct super_block *sb = dir->i_sb; @@ -190,7 +193,9 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent inode->i_ino = dir->i_ino; INIT_LIST_HEAD(&hip->open_dir_list); mutex_init(&hip->extents_lock); - hip->flags = HFSPLUS_FLG_RSRC; + hip->extent_state = 0; + hip->flags = 0; + set_bit(HFSPLUS_I_RSRC, &hip->flags); hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); err = hfsplus_find_cat(sb, dir->i_ino, &fd); @@ -219,7 +224,8 @@ out: return NULL; } -static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) +static void hfsplus_get_perms(struct inode *inode, + struct hfsplus_perm *perms, int dir) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); u16 mode; @@ -302,29 +308,41 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) return 0; } -static int hfsplus_file_fsync(struct file *filp, int datasync) +int hfsplus_file_fsync(struct file *file, int datasync) { - struct inode *inode = filp->f_mapping->host; - struct super_block * sb; - int ret, err; - - /* sync the inode to buffers */ - ret = write_inode_now(inode, 0); - - /* sync the superblock to buffers */ - sb = inode->i_sb; - if (sb->s_dirt) { - if (!(sb->s_flags & MS_RDONLY)) - hfsplus_sync_fs(sb, 1); - else - sb->s_dirt = 0; + struct inode *inode = file->f_mapping->host; + struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); + int error = 0, error2; + + /* + * Sync inode metadata into the catalog and extent trees. + */ + sync_inode_metadata(inode, 1); + + /* + * And explicitly write out the btrees. + */ + if (test_and_clear_bit(HFSPLUS_I_CAT_DIRTY, &hip->flags)) + error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping); + + if (test_and_clear_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags)) { + error2 = + filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); + if (!error) + error = error2; } - /* .. finally sync the buffers to disk */ - err = sync_blockdev(sb->s_bdev); - if (!ret) - ret = err; - return ret; + if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { + error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); + if (!error) + error = error2; + } + + if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + + return error; } static const struct inode_operations hfsplus_file_inode_operations = { @@ -337,7 +355,7 @@ static const struct inode_operations hfsplus_file_inode_operations = { }; static const struct file_operations hfsplus_file_operations = { - .llseek = generic_file_llseek, + .llseek = generic_file_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, .write = do_sync_write, @@ -370,6 +388,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) INIT_LIST_HEAD(&hip->open_dir_list); mutex_init(&hip->extents_lock); atomic_set(&hip->opencnt, 0); + hip->extent_state = 0; hip->flags = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); @@ -457,7 +476,8 @@ void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) } } -void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) +void hfsplus_inode_write_fork(struct inode *inode, + struct hfsplus_fork_raw *fork) { memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, sizeof(hfsplus_extent_rec)); @@ -499,13 +519,14 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_file)); - hfsplus_inode_read_fork(inode, HFSPLUS_IS_DATA(inode) ? - &file->data_fork : &file->rsrc_fork); + hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? + &file->rsrc_fork : &file->data_fork); hfsplus_get_perms(inode, &file->permissions, 0); inode->i_nlink = 1; if (S_ISREG(inode->i_mode)) { if (file->permissions.dev) - inode->i_nlink = be32_to_cpu(file->permissions.dev); + inode->i_nlink = + be32_to_cpu(file->permissions.dev); inode->i_op = &hfsplus_file_inode_operations; inode->i_fop = &hfsplus_file_operations; inode->i_mapping->a_ops = &hfsplus_aops; @@ -578,7 +599,9 @@ int hfsplus_cat_write_inode(struct inode *inode) sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); hfsplus_cat_set_perms(inode, &file->permissions); - if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) + if (HFSPLUS_FLG_IMMUTABLE & + (file->permissions.rootflags | + file->permissions.userflags)) file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); else file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED); @@ -588,6 +611,8 @@ int hfsplus_cat_write_inode(struct inode *inode) hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); } + + set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags); out: hfs_find_exit(&fd); return 0; diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 40a85a3ded6e..508ce662ce12 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -28,7 +28,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) if (inode->i_flags & S_IMMUTABLE) flags |= FS_IMMUTABLE_FL; - if (inode->i_flags |= S_APPEND) + if (inode->i_flags & S_APPEND) flags |= FS_APPEND_FL; if (hip->userflags & HFSPLUS_FLG_NODUMP) flags |= FS_NODUMP_FL; @@ -147,9 +147,11 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, res = -ERANGE; } else res = -EOPNOTSUPP; - if (!res) + if (!res) { hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } out: hfs_find_exit(&fd); return res; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index f9ab276a4d8d..bb62a5882147 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -23,6 +23,7 @@ enum { opt_umask, opt_uid, opt_gid, opt_part, opt_session, opt_nls, opt_nodecompose, opt_decompose, + opt_barrier, opt_nobarrier, opt_force, opt_err }; @@ -37,6 +38,8 @@ static const match_table_t tokens = { { opt_nls, "nls=%s" }, { opt_decompose, "decompose" }, { opt_nodecompose, "nodecompose" }, + { opt_barrier, "barrier" }, + { opt_nobarrier, "nobarrier" }, { opt_force, "force" }, { opt_err, NULL } }; @@ -65,6 +68,32 @@ static inline int match_fourchar(substring_t *arg, u32 *result) return 0; } +int hfsplus_parse_options_remount(char *input, int *force) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int token; + + if (!input) + return 0; + + while ((p = strsep(&input, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case opt_force: + *force = 1; + break; + default: + break; + } + } + + return 1; +} + /* Parse options from mount. Returns 0 on failure */ /* input is the options passed to mount() as a string */ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) @@ -136,7 +165,9 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) if (p) sbi->nls = load_nls(p); if (!sbi->nls) { - printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); + printk(KERN_ERR "hfs: unable to load " + "nls mapping \"%s\"\n", + p); kfree(p); return 0; } @@ -148,6 +179,12 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) case opt_nodecompose: set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); break; + case opt_barrier: + clear_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags); + break; + case opt_nobarrier: + set_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags); + break; case opt_force: set_bit(HFSPLUS_SB_FORCE, &sbi->flags); break; @@ -177,7 +214,8 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); if (sbi->type != HFSPLUS_DEF_CR_TYPE) seq_printf(seq, ",type=%.4s", (char *)&sbi->type); - seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid); + seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, + sbi->uid, sbi->gid); if (sbi->part >= 0) seq_printf(seq, ",part=%u", sbi->part); if (sbi->session >= 0) @@ -186,5 +224,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) seq_printf(seq, ",nls=%s", sbi->nls->charset); if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) seq_printf(seq, ",nodecompose"); + if (test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) + seq_printf(seq, ",nobarrier"); return 0; } diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 208b16c645cc..d66ad113b1cc 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -2,7 +2,8 @@ * linux/fs/hfsplus/part_tbl.c * * Copyright (C) 1996-1997 Paul H. Hargrove - * This file may be distributed under the terms of the GNU General Public License. + * This file may be distributed under the terms of + * the GNU General Public License. * * Original code to handle the new style Mac partition table based on * a patch contributed by Holger Schemel (aeglos@valinor.owl.de). @@ -13,6 +14,7 @@ * */ +#include <linux/slab.h> #include "hfsplus_fs.h" /* offsets to various blocks */ @@ -58,77 +60,94 @@ struct new_pmap { */ struct old_pmap { __be16 pdSig; /* Signature bytes */ - struct old_pmap_entry { + struct old_pmap_entry { __be32 pdStart; __be32 pdSize; __be32 pdFSID; } pdEntry[42]; } __packed; +static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm, + sector_t *part_start, sector_t *part_size) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); + int i; + + for (i = 0; i < 42; i++) { + struct old_pmap_entry *p = &pm->pdEntry[i]; + + if (p->pdStart && p->pdSize && + p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && + (sbi->part < 0 || sbi->part == i)) { + *part_start += be32_to_cpu(p->pdStart); + *part_size = be32_to_cpu(p->pdSize); + return 0; + } + } + + return -ENOENT; +} + +static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, + sector_t *part_start, sector_t *part_size) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); + int size = be32_to_cpu(pm->pmMapBlkCnt); + int res; + int i = 0; + + do { + if (!memcmp(pm->pmPartType, "Apple_HFS", 9) && + (sbi->part < 0 || sbi->part == i)) { + *part_start += be32_to_cpu(pm->pmPyPartStart); + *part_size = be32_to_cpu(pm->pmPartBlkCnt); + return 0; + } + + if (++i >= size) + return -ENOENT; + + res = hfsplus_submit_bio(sb->s_bdev, + *part_start + HFS_PMAP_BLK + i, + pm, READ); + if (res) + return res; + } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); + + return -ENOENT; +} + /* - * hfs_part_find() - * - * Parse the partition map looking for the - * start and length of the 'part'th HFS partition. + * Parse the partition map looking for the start and length of a + * HFS/HFS+ partition. */ int hfs_part_find(struct super_block *sb, - sector_t *part_start, sector_t *part_size) + sector_t *part_start, sector_t *part_size) { - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct buffer_head *bh; - __be16 *data; - int i, size, res; + void *data; + int res; + + data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; - res = -ENOENT; - bh = sb_bread512(sb, *part_start + HFS_PMAP_BLK, data); - if (!bh) - return -EIO; + res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, + data, READ); + if (res) + return res; - switch (be16_to_cpu(*data)) { + switch (be16_to_cpu(*((__be16 *)data))) { case HFS_OLD_PMAP_MAGIC: - { - struct old_pmap *pm; - struct old_pmap_entry *p; - - pm = (struct old_pmap *)bh->b_data; - p = pm->pdEntry; - size = 42; - for (i = 0; i < size; p++, i++) { - if (p->pdStart && p->pdSize && - p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && - (sbi->part < 0 || sbi->part == i)) { - *part_start += be32_to_cpu(p->pdStart); - *part_size = be32_to_cpu(p->pdSize); - res = 0; - } - } + res = hfs_parse_old_pmap(sb, data, part_start, part_size); break; - } case HFS_NEW_PMAP_MAGIC: - { - struct new_pmap *pm; - - pm = (struct new_pmap *)bh->b_data; - size = be32_to_cpu(pm->pmMapBlkCnt); - for (i = 0; i < size;) { - if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && - (sbi->part < 0 || sbi->part == i)) { - *part_start += be32_to_cpu(pm->pmPyPartStart); - *part_size = be32_to_cpu(pm->pmPartBlkCnt); - res = 0; - break; - } - brelse(bh); - bh = sb_bread512(sb, *part_start + HFS_PMAP_BLK + ++i, pm); - if (!bh) - return -EIO; - if (pm->pmSig != cpu_to_be16(HFS_NEW_PMAP_MAGIC)) - break; - } + res = hfs_parse_new_pmap(sb, data, part_start, part_size); + break; + default: + res = -ENOENT; break; - } } - brelse(bh); + kfree(data); return res; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 52cc746d3ba3..6ee6ad20acf2 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/pagemap.h> +#include <linux/blkdev.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/vfs.h> @@ -66,6 +67,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); mutex_init(&HFSPLUS_I(inode)->extents_lock); HFSPLUS_I(inode)->flags = 0; + HFSPLUS_I(inode)->extent_state = 0; HFSPLUS_I(inode)->rsrc_inode = NULL; atomic_set(&HFSPLUS_I(inode)->opencnt, 0); @@ -157,45 +159,65 @@ int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_vh *vhdr = sbi->s_vhdr; + int write_backup = 0; + int error, error2; + + if (!wait) + return 0; dprint(DBG_SUPER, "hfsplus_write_super\n"); - mutex_lock(&sbi->vh_mutex); - mutex_lock(&sbi->alloc_mutex); sb->s_dirt = 0; + /* + * Explicitly write out the special metadata inodes. + * + * While these special inodes are marked as hashed and written + * out peridocically by the flusher threads we redirty them + * during writeout of normal inodes, and thus the life lock + * prevents us from getting the latest state to disk. + */ + error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping); + error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); + if (!error) + error = error2; + error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); + if (!error) + error = error2; + + mutex_lock(&sbi->vh_mutex); + mutex_lock(&sbi->alloc_mutex); vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); vhdr->folder_count = cpu_to_be32(sbi->folder_count); vhdr->file_count = cpu_to_be32(sbi->file_count); - mark_buffer_dirty(sbi->s_vhbh); if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { - if (sbi->sect_count) { - struct buffer_head *bh; - u32 block, offset; - - block = sbi->blockoffset; - block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9); - offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1); - printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", - sbi->blockoffset, sbi->sect_count, - block, offset); - bh = sb_bread(sb, block); - if (bh) { - vhdr = (struct hfsplus_vh *)(bh->b_data + offset); - if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { - memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr)); - mark_buffer_dirty(bh); - brelse(bh); - } else - printk(KERN_WARNING "hfs: backup not found!\n"); - } - } + memcpy(sbi->s_backup_vhdr, sbi->s_vhdr, sizeof(*sbi->s_vhdr)); + write_backup = 1; } + + error2 = hfsplus_submit_bio(sb->s_bdev, + sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, + sbi->s_vhdr, WRITE_SYNC); + if (!error) + error = error2; + if (!write_backup) + goto out; + + error2 = hfsplus_submit_bio(sb->s_bdev, + sbi->part_start + sbi->sect_count - 2, + sbi->s_backup_vhdr, WRITE_SYNC); + if (!error) + error2 = error; +out: mutex_unlock(&sbi->alloc_mutex); mutex_unlock(&sbi->vh_mutex); - return 0; + + if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) + blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); + + return error; } static void hfsplus_write_super(struct super_block *sb) @@ -215,23 +237,22 @@ static void hfsplus_put_super(struct super_block *sb) if (!sb->s_fs_info) return; - if (sb->s_dirt) - hfsplus_write_super(sb); if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; vhdr->modify_date = hfsp_now2mt(); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); - mark_buffer_dirty(sbi->s_vhbh); - sync_dirty_buffer(sbi->s_vhbh); + + hfsplus_sync_fs(sb, 1); } hfs_btree_close(sbi->cat_tree); hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); iput(sbi->hidden_dir); - brelse(sbi->s_vhbh); + kfree(sbi->s_vhdr); + kfree(sbi->s_backup_vhdr); unload_nls(sbi->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -263,26 +284,31 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) return 0; if (!(*flags & MS_RDONLY)) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; - struct hfsplus_sb_info sbi; + int force = 0; - memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); - sbi.nls = HFSPLUS_SB(sb)->nls; - if (!hfsplus_parse_options(data, &sbi)) + if (!hfsplus_parse_options_remount(data, &force)) return -EINVAL; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { - printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, " - "running fsck.hfsplus is recommended. leaving read-only.\n"); + printk(KERN_WARNING "hfs: filesystem was " + "not cleanly unmounted, " + "running fsck.hfsplus is recommended. " + "leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; - } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { + } else if (force) { /* nothing */ - } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { - printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); + } else if (vhdr->attributes & + cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { + printk(KERN_WARNING "hfs: filesystem is marked locked, " + "leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; - } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { - printk(KERN_WARNING "hfs: filesystem is marked journaled, leaving read-only.\n"); + } else if (vhdr->attributes & + cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { + printk(KERN_WARNING "hfs: filesystem is " + "marked journaled, " + "leaving read-only.\n"); sb->s_flags |= MS_RDONLY; *flags |= MS_RDONLY; } @@ -372,17 +398,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { - printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " - "running fsck.hfsplus is recommended. mounting read-only.\n"); + printk(KERN_WARNING "hfs: Filesystem was " + "not cleanly unmounted, " + "running fsck.hfsplus is recommended. " + "mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { /* nothing */ } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); sb->s_flags |= MS_RDONLY; - } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " - "use the force option at your own risk, mounting read-only.\n"); + } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && + !(sb->s_flags & MS_RDONLY)) { + printk(KERN_WARNING "hfs: write access to " + "a journaled filesystem is not supported, " + "use the force option at your own risk, " + "mounting read-only.\n"); sb->s_flags |= MS_RDONLY; } @@ -419,7 +450,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto cleanup; } - sb->s_root->d_op = &hfsplus_dentry_operations; + d_set_d_op(sb->s_root, &hfsplus_dentry_operations); str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; @@ -449,19 +480,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) be32_add_cpu(&vhdr->write_count, 1); vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); - mark_buffer_dirty(sbi->s_vhbh); - sync_dirty_buffer(sbi->s_vhbh); + hfsplus_sync_fs(sb, 1); if (!sbi->hidden_dir) { - printk(KERN_DEBUG "hfs: create hidden dir...\n"); - mutex_lock(&sbi->vh_mutex); sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, &str, sbi->hidden_dir); mutex_unlock(&sbi->vh_mutex); - mark_inode_dirty(sbi->hidden_dir); + hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); } out: unload_nls(sbi->nls); @@ -488,11 +516,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) return i ? &i->vfs_inode : NULL; } -static void hfsplus_destroy_inode(struct inode *inode) +static void hfsplus_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); } +static void hfsplus_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hfsplus_i_callback); +} + #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) static struct dentry *hfsplus_mount(struct file_system_type *fs_type, diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index b66d67de882c..a3f0bfcc881e 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -17,14 +17,14 @@ /* Returns folded char, or 0 if ignorable */ static inline u16 case_fold(u16 c) { - u16 tmp; - - tmp = hfsplus_case_fold_table[c >> 8]; - if (tmp) - tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; - else - tmp = c; - return tmp; + u16 tmp; + + tmp = hfsplus_case_fold_table[c >> 8]; + if (tmp) + tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; + else + tmp = c; + return tmp; } /* Compare unicode strings, return values like normal strcmp */ @@ -118,7 +118,9 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) return NULL; } -int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) +int hfsplus_uni2asc(struct super_block *sb, + const struct hfsplus_unistr *ustr, + char *astr, int *len_p) { const hfsplus_unichr *ip; struct nls_table *nls = HFSPLUS_SB(sb)->nls; @@ -171,7 +173,8 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c goto same; c1 = be16_to_cpu(*ip); if (likely(compose)) - ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1); + ce1 = hfsplus_compose_lookup( + hfsplus_compose_table, c1); if (ce1) break; switch (c0) { @@ -199,7 +202,8 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c if (ce2) { i = 1; while (i < ustrlen) { - ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i])); + ce1 = hfsplus_compose_lookup(ce2, + be16_to_cpu(ip[i])); if (!ce1) break; i++; @@ -211,7 +215,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c goto done; } } - same: +same: switch (c0) { case 0: cc = 0x2400; @@ -222,7 +226,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c default: cc = c0; } - done: +done: res = nls->uni2char(cc, op, len); if (res < 0) { if (res == -ENAMETOOLONG) @@ -320,7 +324,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) +int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *str) { struct super_block *sb = dentry->d_sb; const char *astr; @@ -363,9 +368,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) +int hfsplus_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct super_block *sb = dentry->d_sb; + struct super_block *sb = parent->d_sb; int casefold, decompose, size; int dsize1, dsize2, len1, len2; const u16 *dstr1, *dstr2; @@ -375,10 +383,10 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); - astr1 = s1->name; - len1 = s1->len; - astr2 = s2->name; - len2 = s2->len; + astr1 = str; + len1 = len; + astr2 = name->name; + len2 = name->len; dsize1 = dsize2 = 0; dstr1 = dstr2 = NULL; @@ -388,7 +396,9 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * astr1 += size; len1 -= size; - if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) { + if (decompose) + dstr1 = decompose_unichar(c, &dsize1); + if (!decompose || !dstr1) { c1 = c; dstr1 = &c1; dsize1 = 1; @@ -400,7 +410,9 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * astr2 += size; len2 -= size; - if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) { + if (decompose) + dstr2 = decompose_unichar(c, &dsize2); + if (!decompose || !dstr2) { c2 = c; dstr2 = &c2; dsize2 = 1; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 8972c20b3216..196231794f64 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -24,6 +24,40 @@ struct hfsplus_wd { u16 embed_count; }; +static void hfsplus_end_io_sync(struct bio *bio, int err) +{ + if (err) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + complete(bio->bi_private); +} + +int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, + void *data, int rw) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct bio *bio; + + bio = bio_alloc(GFP_NOIO, 1); + bio->bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_end_io = hfsplus_end_io_sync; + bio->bi_private = &wait; + + /* + * We always submit one sector at a time, so bio_add_page must not fail. + */ + if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, + offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) + BUG(); + + submit_bio(rw, bio); + wait_for_completion(&wait); + + if (!bio_flagged(bio, BIO_UPTODATE)) + return -EIO; + return 0; +} + static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) { u32 extent; @@ -40,12 +74,14 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) !(attrib & HFSP_WRAP_ATTRIB_SPARED)) return 0; - wd->ablk_size = be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE)); + wd->ablk_size = + be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE)); if (wd->ablk_size < HFSPLUS_SECTOR_SIZE) return 0; if (wd->ablk_size % HFSPLUS_SECTOR_SIZE) return 0; - wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); + wd->ablk_start = + be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); wd->embed_start = (extent >> 16) & 0xFFFF; @@ -68,7 +104,8 @@ static int hfsplus_get_last_session(struct super_block *sb, if (HFSPLUS_SB(sb)->session >= 0) { te.cdte_track = HFSPLUS_SB(sb)->session; te.cdte_format = CDROM_LBA; - res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); + res = ioctl_by_bdev(sb->s_bdev, + CDROMREADTOCENTRY, (unsigned long)&te); if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { *start = (sector_t)te.cdte_addr.lba << 2; return 0; @@ -77,7 +114,8 @@ static int hfsplus_get_last_session(struct super_block *sb, return -EINVAL; } ms_info.addr_format = CDROM_LBA; - res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, (unsigned long)&ms_info); + res = ioctl_by_bdev(sb->s_bdev, CDROMMULTISESSION, + (unsigned long)&ms_info); if (!res && ms_info.xa_flag) *start = (sector_t)ms_info.addr.lba << 2; return 0; @@ -88,100 +126,112 @@ static int hfsplus_get_last_session(struct super_block *sb, int hfsplus_read_wrapper(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct buffer_head *bh; - struct hfsplus_vh *vhdr; struct hfsplus_wd wd; sector_t part_start, part_size; u32 blocksize; + int error = 0; + error = -EINVAL; blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE); if (!blocksize) - return -EINVAL; + goto out; if (hfsplus_get_last_session(sb, &part_start, &part_size)) - return -EINVAL; + goto out; if ((u64)part_start + part_size > 0x100000000ULL) { pr_err("hfs: volumes larger than 2TB are not supported yet\n"); - return -EINVAL; + goto out; } - while (1) { - bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); - if (!bh) - return -EIO; - - if (vhdr->signature == cpu_to_be16(HFSP_WRAP_MAGIC)) { - if (!hfsplus_read_mdb(vhdr, &wd)) - goto error; - wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; - part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; - part_size = wd.embed_count * wd.ablk_size; - brelse(bh); - bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); - if (!bh) - return -EIO; - } - if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) - break; - if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { - set_bit(HFSPLUS_SB_HFSX, &sbi->flags); - break; - } - brelse(bh); - /* check for a partition block + error = -ENOMEM; + sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!sbi->s_vhdr) + goto out; + sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); + if (!sbi->s_backup_vhdr) + goto out_free_vhdr; + +reread: + error = hfsplus_submit_bio(sb->s_bdev, + part_start + HFSPLUS_VOLHEAD_SECTOR, + sbi->s_vhdr, READ); + if (error) + goto out_free_backup_vhdr; + + error = -EINVAL; + switch (sbi->s_vhdr->signature) { + case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX): + set_bit(HFSPLUS_SB_HFSX, &sbi->flags); + /*FALLTHRU*/ + case cpu_to_be16(HFSPLUS_VOLHEAD_SIG): + break; + case cpu_to_be16(HFSP_WRAP_MAGIC): + if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) + goto out; + wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; + part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; + part_size = wd.embed_count * wd.ablk_size; + goto reread; + default: + /* + * Check for a partition block. + * * (should do this only for cdrom/loop though) */ if (hfs_part_find(sb, &part_start, &part_size)) - return -EINVAL; + goto out; + goto reread; + } + + error = hfsplus_submit_bio(sb->s_bdev, + part_start + part_size - 2, + sbi->s_backup_vhdr, READ); + if (error) + goto out_free_backup_vhdr; + + error = -EINVAL; + if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) { + printk(KERN_WARNING + "hfs: invalid secondary volume header\n"); + goto out_free_backup_vhdr; } - blocksize = be32_to_cpu(vhdr->blocksize); - brelse(bh); + blocksize = be32_to_cpu(sbi->s_vhdr->blocksize); - /* block size must be at least as large as a sector - * and a multiple of 2 + /* + * Block size must be at least as large as a sector and a multiple of 2. */ - if (blocksize < HFSPLUS_SECTOR_SIZE || - ((blocksize - 1) & blocksize)) - return -EINVAL; + if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize)) + goto out_free_backup_vhdr; sbi->alloc_blksz = blocksize; sbi->alloc_blksz_shift = 0; while ((blocksize >>= 1) != 0) sbi->alloc_blksz_shift++; blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); - /* align block size to block offset */ + /* + * Align block size to block offset. + */ while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) blocksize >>= 1; if (sb_set_blocksize(sb, blocksize) != blocksize) { - printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", blocksize); - return -EINVAL; + printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", + blocksize); + goto out_free_backup_vhdr; } sbi->blockoffset = part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); + sbi->part_start = part_start; sbi->sect_count = part_size; sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; - - bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); - if (!bh) - return -EIO; - - /* should still be the same... */ - if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { - if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) - goto error; - } else { - if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) - goto error; - } - - sbi->s_vhbh = bh; - sbi->s_vhdr = vhdr; - return 0; - error: - brelse(bh); - return -EINVAL; + +out_free_backup_vhdr: + kfree(sbi->s_backup_vhdr); +out_free_vhdr: + kfree(sbi->s_vhdr); +out: + return error; } diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2c0f148a49e6..d3244d949a4e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) -static int hostfs_d_delete(struct dentry *dentry) +static int hostfs_d_delete(const struct dentry *dentry) { return 1; } @@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args, static char *__dentry_name(struct dentry *dentry, char *name) { - char *p = __dentry_path(dentry, name, PATH_MAX); + char *p = dentry_path_raw(dentry, name, PATH_MAX); char *root; size_t len; - spin_unlock(&dcache_lock); - root = dentry->d_sb->s_fs_info; len = strlen(root); if (IS_ERR(p)) { @@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry) if (!name) return NULL; - spin_lock(&dcache_lock); return __dentry_name(dentry, name); /* will unlock */ } static char *inode_name(struct inode *ino) { struct dentry *dentry; - char *name = __getname(); - if (!name) - return NULL; + char *name; - spin_lock(&dcache_lock); - if (list_empty(&ino->i_dentry)) { - spin_unlock(&dcache_lock); - __putname(name); + dentry = d_find_alias(ino); + if (!dentry) return NULL; - } - dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); - return __dentry_name(dentry, name); /* will unlock */ + + name = dentry_name(dentry); + + dput(dentry); + + return name; } static char *follow_link(char *link) @@ -251,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode) } } -static void hostfs_destroy_inode(struct inode *inode) +static void hostfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kfree(HOSTFS_I(inode)); } +static void hostfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hostfs_i_callback); +} + static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { const char *root_path = vfs->mnt_sb->s_fs_info; @@ -609,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, goto out_put; d_add(dentry, inode); - dentry->d_op = &hostfs_dentry_ops; + d_set_d_op(dentry, &hostfs_dentry_ops); return NULL; out_put: @@ -746,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return err; } -int hostfs_permission(struct inode *ino, int desired) +int hostfs_permission(struct inode *ino, int desired, unsigned int flags) { char *name; int r = 0, w = 0, x = 0, err; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + if (desired & MAY_READ) r = 1; if (desired & MAY_WRITE) w = 1; if (desired & MAY_EXEC) x = 1; @@ -765,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired) err = access_file(name, r, w, x); __putname(name); if (!err) - err = generic_permission(ino, desired, NULL); + err = generic_permission(ino, desired, flags, NULL); return err; } diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 67d9d36b3d5f..32c13a94e1e9 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -12,7 +12,8 @@ * Note: the dentry argument is the parent dentry. */ -static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) +static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { unsigned long hash; int i; @@ -34,19 +35,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) return 0; } -static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +static int hpfs_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - unsigned al=a->len; - unsigned bl=b->len; - hpfs_adjust_length(a->name, &al); + unsigned al = len; + unsigned bl = name->len; + + hpfs_adjust_length(str, &al); /*hpfs_adjust_length(b->name, &bl);*/ - /* 'a' is the qstr of an already existing dentry, so the name - * must be valid. 'b' must be validated first. + + /* + * 'str' is the nane of an already existing dentry, so the name + * must be valid. 'name' must be validated first. */ - if (hpfs_chk_name(b->name, &bl)) + if (hpfs_chk_name(name->name, &bl)) return 1; - if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0)) + if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0)) return 1; return 0; } @@ -58,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = { void hpfs_set_dentry_operations(struct dentry *dentry) { - dentry->d_op = &hpfs_dentry_operations; + d_set_d_op(dentry, &hpfs_dentry_operations); } diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 11c2b4080f65..f4ad9e31ddc4 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -419,7 +419,7 @@ again: unlock_kernel(); return -ENOSPC; } - if (generic_permission(inode, MAY_WRITE, NULL) || + if (generic_permission(inode, MAY_WRITE, 0, NULL) || !S_ISREG(inode->i_mode) || get_write_access(inode)) { d_rehash(dentry); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 6c5f01597c3a..49935ba78db8 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void hpfs_destroy_inode(struct inode *inode) +static void hpfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } +static void hpfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hpfs_i_callback); +} + static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index f702b5f713fc..87ed48e0343d 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino) mntput(ino->i_sb->s_fs_info); } -static void hppfs_destroy_inode(struct inode *inode) +static void hppfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kfree(HPPFS_I(inode)); } +static void hppfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, hppfs_i_callback); +} + static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a5fe68189eed..9885082b470f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) return &p->vfs_inode; } +static void hugetlbfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); +} + static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); - kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); + call_rcu(&inode->i_rcu, hugetlbfs_i_callback); } static const struct address_space_operations hugetlbfs_aops = { diff --git a/fs/inode.c b/fs/inode.c index ae2727ab0c3a..da85e56378f3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -102,26 +102,29 @@ static DECLARE_RWSEM(iprune_sem); */ struct inodes_stat_t inodes_stat; -static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; -static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(unsigned int, nr_inodes); static struct kmem_cache *inode_cachep __read_mostly; -static inline int get_nr_inodes(void) +static int get_nr_inodes(void) { - return percpu_counter_sum_positive(&nr_inodes); + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_inodes, i); + return sum < 0 ? 0 : sum; } static inline int get_nr_inodes_unused(void) { - return percpu_counter_sum_positive(&nr_inodes_unused); + return inodes_stat.nr_unused; } int get_nr_dirty_inodes(void) { + /* not actually dirty inodes, but a wild approximation */ int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); return nr_dirty > 0 ? nr_dirty : 0; - } /* @@ -132,7 +135,6 @@ int proc_nr_inodes(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); - inodes_stat.nr_unused = get_nr_inodes_unused(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -224,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif - percpu_counter_inc(&nr_inodes); + this_cpu_inc(nr_inodes); return 0; out: @@ -255,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb) return inode; } +void free_inode_nonrcu(struct inode *inode) +{ + kmem_cache_free(inode_cachep, inode); +} +EXPORT_SYMBOL(free_inode_nonrcu); + void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); @@ -266,10 +274,17 @@ void __destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif - percpu_counter_dec(&nr_inodes); + this_cpu_dec(nr_inodes); } EXPORT_SYMBOL(__destroy_inode); +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(inode_cachep, inode); +} + static void destroy_inode(struct inode *inode) { BUG_ON(!list_empty(&inode->i_lru)); @@ -277,7 +292,7 @@ static void destroy_inode(struct inode *inode) if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else - kmem_cache_free(inode_cachep, (inode)); + call_rcu(&inode->i_rcu, i_callback); } /* @@ -335,7 +350,7 @@ static void inode_lru_list_add(struct inode *inode) { if (list_empty(&inode->i_lru)) { list_add(&inode->i_lru, &inode_lru); - percpu_counter_inc(&nr_inodes_unused); + inodes_stat.nr_unused++; } } @@ -343,7 +358,7 @@ static void inode_lru_list_del(struct inode *inode) { if (!list_empty(&inode->i_lru)) { list_del_init(&inode->i_lru); - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } } @@ -430,6 +445,7 @@ void end_writeback(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); inode_sync_wait(inode); + /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } EXPORT_SYMBOL(end_writeback); @@ -513,7 +529,7 @@ void evict_inodes(struct super_block *sb) list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } spin_unlock(&inode_lock); @@ -554,7 +570,7 @@ int invalidate_inodes(struct super_block *sb) list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } spin_unlock(&inode_lock); @@ -616,7 +632,7 @@ static void prune_icache(int nr_to_scan) if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { list_del_init(&inode->i_lru); - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; continue; } @@ -650,7 +666,7 @@ static void prune_icache(int nr_to_scan) */ list_move(&inode->i_lru, &freeable); list_del_init(&inode->i_wb_list); - percpu_counter_dec(&nr_inodes_unused); + inodes_stat.nr_unused--; } if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); @@ -1648,8 +1664,6 @@ void __init inode_init(void) SLAB_MEM_SPREAD), init_once); register_shrinker(&icache_shrinker); - percpu_counter_init(&nr_inodes, 0); - percpu_counter_init(&nr_inodes_unused, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist) diff --git a/fs/internal.h b/fs/internal.h index e43b9a4dbf4e..9687c2ee2735 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); +extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, struct vfsmount *); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index bfdeb82a53be..844a7903c72f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -26,16 +26,32 @@ #define BEQUIET -static int isofs_hashi(struct dentry *parent, struct qstr *qstr); -static int isofs_hash(struct dentry *parent, struct qstr *qstr); -static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); +static int isofs_hashi(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_hash(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_dentry_cmpi(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); +static int isofs_dentry_cmp(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET -static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); -static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); -static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); +static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode, + struct qstr *qstr); +static int isofs_dentry_cmpi_ms(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); +static int isofs_dentry_cmp_ms(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name); #endif static void isofs_put_super(struct super_block *sb) @@ -65,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void isofs_destroy_inode(struct inode *inode) +static void isofs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } +static void isofs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, isofs_i_callback); +} + static void init_once(void *foo) { struct iso_inode_info *ei = foo; @@ -160,7 +183,7 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -181,7 +204,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) +isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -206,100 +229,94 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) } /* - * Case insensitive compare of two isofs names. - */ -static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a, - struct qstr *b, int ms) -{ - int alen, blen; - - /* A filename cannot end in '.' or we treat it like it has none */ - alen = a->len; - blen = b->len; - if (ms) { - while (alen && a->name[alen-1] == '.') - alen--; - while (blen && b->name[blen-1] == '.') - blen--; - } - if (alen == blen) { - if (strnicmp(a->name, b->name, alen) == 0) - return 0; - } - return 1; -} - -/* - * Case sensitive compare of two isofs names. + * Compare of two isofs names. */ -static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, - struct qstr *b, int ms) +static int isofs_dentry_cmp_common( + unsigned int len, const char *str, + const struct qstr *name, int ms, int ci) { int alen, blen; /* A filename cannot end in '.' or we treat it like it has none */ - alen = a->len; - blen = b->len; + alen = name->len; + blen = len; if (ms) { - while (alen && a->name[alen-1] == '.') + while (alen && name->name[alen-1] == '.') alen--; - while (blen && b->name[blen-1] == '.') + while (blen && str[blen-1] == '.') blen--; } if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) - return 0; + if (ci) { + if (strnicmp(name->name, str, alen) == 0) + return 0; + } else { + if (strncmp(name->name, str, alen) == 0) + return 0; + } } return 1; } static int -isofs_hash(struct dentry *dentry, struct qstr *qstr) +isofs_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 0); } static int -isofs_hashi(struct dentry *dentry, struct qstr *qstr) +isofs_hashi(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 0); } static int -isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmp_common(dentry, a, b, 0); + return isofs_dentry_cmp_common(len, str, name, 0, 0); } static int -isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmpi_common(dentry, a, b, 0); + return isofs_dentry_cmp_common(len, str, name, 0, 1); } #ifdef CONFIG_JOLIET static int -isofs_hash_ms(struct dentry *dentry, struct qstr *qstr) +isofs_hash_ms(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 1); } static int -isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) +isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 1); } static int -isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmp_common(dentry, a, b, 1); + return isofs_dentry_cmp_common(len, str, name, 1, 0); } static int -isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) +isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - return isofs_dentry_cmpi_common(dentry, a, b, 1); + return isofs_dentry_cmp_common(len, str, name, 1, 1); } #endif @@ -932,7 +949,7 @@ root_found: table += 2; if (opt.check == 'r') table++; - s->s_root->d_op = &isofs_dentry_ops[table]; + d_set_d_op(s->s_root, &isofs_dentry_ops[table]); kfree(opt.iocharset); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 0d23abfd4280..679a849c3b27 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) qstr.name = compare; qstr.len = dlen; - return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr); + return dentry->d_op->d_compare(NULL, NULL, NULL, NULL, + dentry->d_name.len, dentry->d_name.name, &qstr); } /* @@ -171,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam struct inode *inode; struct page *page; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); page = alloc_page(GFP_USER); if (!page) diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 54a92fd02bbd..95b79672150a 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return rc; } -int jffs2_check_acl(struct inode *inode, int mask) +int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int rc; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 5e42de8d9541..3119f59253d3 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -26,7 +26,7 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -extern int jffs2_check_acl(struct inode *, int); +extern int jffs2_check_acl(struct inode *, int, unsigned int); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); extern int jffs2_init_acl_post(struct inode *); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index c86041b866a4..853b8e300084 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb) return &f->vfs_inode; } -static void jffs2_destroy_inode(struct inode *inode) +static void jffs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } +static void jffs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, jffs2_i_callback); +} + static void jffs2_i_init_once(void *foo) { struct jffs2_inode_info *f = foo; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 1057a4998e4e..e5de9422fa32 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -114,10 +114,14 @@ out: return rc; } -int jfs_check_acl(struct inode *inode, int mask) +int jfs_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); + struct posix_acl *acl; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 54e07559878d..f9285c4900fa 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,7 +20,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_check_acl(struct inode *, int); +int jfs_check_acl(struct inode *, int, unsigned int flags); int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_acl_chmod(struct inode *inode); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 231ca4af9bce..4414e3a42264 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -18,6 +18,7 @@ */ #include <linux/fs.h> +#include <linux/namei.h> #include <linux/ctype.h> #include <linux/quotaops.h> #include <linux/exportfs.h> @@ -1465,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc jfs_info("jfs_lookup: name = %s", name); if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); if ((name[0] == '.') && (len == 1)) inum = dip->i_ino; @@ -1494,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc dentry = d_splice_alias(ip, dentry); if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; + d_set_d_op(dentry, &jfs_ci_dentry_operations); return dentry; } @@ -1573,7 +1574,8 @@ const struct file_operations jfs_dir_operations = { .llseek = generic_file_llseek, }; -static int jfs_ci_hash(struct dentry *dir, struct qstr *this) +static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, + struct qstr *this) { unsigned long hash; int i; @@ -1586,32 +1588,63 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this) return 0; } -static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) +static int jfs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; +out: + return result; +} +static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) + return -ECHILD; /* - * We want creates to preserve case. A negative dentry, a, that - * has a different case than b may cause a new entry to be created - * with the wrong case. Since we can't tell if a comes from a negative - * dentry, we blindly replace it with b. This should be harmless if - * a is not a negative dentry. + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. */ - memcpy((unsigned char *)a->name, b->name, a->len); -out: - return result; + if (dentry->d_inode) + return 1; + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; + } + return 1; } const struct dentry_operations jfs_ci_dentry_operations = { .d_hash = jfs_ci_hash, .d_compare = jfs_ci_compare, + .d_revalidate = jfs_ci_revalidate, }; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 0669fc1cc3bf..3150d766e0d4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb) return &jfs_inode->vfs_inode; } +static void jfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct jfs_inode_info *ji = JFS_IP(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(jfs_inode_cachep, ji); +} + static void jfs_destroy_inode(struct inode *inode) { struct jfs_inode_info *ji = JFS_IP(inode); @@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode) ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); - kmem_cache_free(jfs_inode_cachep, ji); + call_rcu(&inode->i_rcu, jfs_i_callback); } static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -517,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_no_root; if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; + d_set_d_op(sb->s_root, &jfs_ci_dentry_operations); /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; diff --git a/fs/libfs.c b/fs/libfs.c index a3accdf528ad..889311e3d06b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,11 @@ #include <asm/uaccess.h> +static inline int simple_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -37,7 +42,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf) * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -static int simple_delete_dentry(struct dentry *dentry) +static int simple_delete_dentry(const struct dentry *dentry) { return 1; } @@ -54,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dentry->d_op = &simple_dentry_operations; + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } @@ -76,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) { - mutex_lock(&file->f_path.dentry->d_inode->i_mutex); + struct dentry *dentry = file->f_path.dentry; + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -84,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) if (offset >= 0) break; default: - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -94,21 +100,24 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) struct dentry *cursor = file->private_data; loff_t n = file->f_pos - 2; - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + /* d_lock not required for cursor */ list_del(&cursor->d_u.d_child); - p = file->f_path.dentry->d_subdirs.next; - while (n && p != &file->f_path.dentry->d_subdirs) { + p = dentry->d_subdirs.next; + while (n && p != &dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - if (!d_unhashed(next) && next->d_inode) + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(next)) n--; + spin_unlock(&next->d_lock); p = p->next; } list_add_tail(&cursor->d_u.d_child, p); - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); } } - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -148,29 +157,35 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) i++; /* fallthrough */ default: - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); if (filp->f_pos == 2) list_move(q, &dentry->d_subdirs); for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - if (d_unhashed(next) || !next->d_inode) + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); + if (!simple_positive(next)) { + spin_unlock(&next->d_lock); continue; + } - spin_unlock(&dcache_lock); + spin_unlock(&next->d_lock); + spin_unlock(&dentry->d_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) return 0; - spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); /* next is still alive */ list_move(q, p); + spin_unlock(&next->d_lock); p = q; filp->f_pos++; } - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); } return 0; } @@ -259,23 +274,23 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den return 0; } -static inline int simple_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - int simple_empty(struct dentry *dentry) { struct dentry *child; int ret = 0; - spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) - if (simple_positive(child)) + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(child)) { + spin_unlock(&child->d_lock); goto out; + } + spin_unlock(&child->d_lock); + } ret = 1; out: - spin_unlock(&dcache_lock); + spin_unlock(&dentry->d_lock); return ret; } diff --git a/fs/locks.c b/fs/locks.c index 8729347bcd1a..08415b2a6d36 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) goto out; if ((arg == F_WRLCK) - && ((atomic_read(&dentry->d_count) > 1) + && ((dentry->d_count > 1) || (atomic_read(&inode->i_count) > 1))) goto out; } diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 409dfd65e9a1..f9ddf0c388c8 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry, return __logfs_create(dir, dentry, inode, target, destlen); } -static int logfs_permission(struct inode *inode, int mask) +static int logfs_permission(struct inode *inode, int mask, unsigned int flags) { - return generic_permission(inode, mask, NULL); + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + return generic_permission(inode, mask, flags, NULL); } static int logfs_link(struct dentry *old_dentry, struct inode *dir, diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index d8c71ece098f..03b8c240aeda 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached) return __logfs_iget(sb, ino); } +static void logfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); +} + static void __logfs_destroy_inode(struct inode *inode) { struct logfs_inode *li = logfs_inode(inode); BUG_ON(li->li_block); list_del(&li->li_freeing_list); - kmem_cache_free(logfs_inode_cache, li); + call_rcu(&inode->i_rcu, logfs_i_callback); } static void logfs_destroy_inode(struct inode *inode) diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index f46ee8b0e135..9da29706f91c 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) super->s_journal_seg[i] = segno; super->s_journal_ec[i] = ec; logfs_set_segment_reserved(sb, segno); - err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); + err = btree_insert32(head, segno, (void *)1, GFP_NOFS); BUG_ON(err); /* mempool should prevent this */ err = logfs_erase_segment(sb, segno, 1); BUG_ON(err); /* FIXME: remount-ro would be nicer */ diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 6127baf0e188..ee99a9f5dfd3 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode) /* FIXME: transaction is part of logfs_block now. Is that enough? */ err = logfs_write_buf(master_inode, page, 0); + if (err) + move_page_to_inode(inode, page); + logfs_put_write_page(page); return err; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index fb2020858a34..ae0b83f476a6 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void minix_destroy_inode(struct inode *inode) +static void minix_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(minix_inode_cachep, minix_i(inode)); } +static void minix_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, minix_i_callback); +} + static void init_once(void *foo) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c0d35a3accef..1b9e07728a9f 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); diff --git a/fs/namei.c b/fs/namei.c index 5362af9b7372..24ece10470b6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname); /* * This does basic POSIX ACL permission checking */ -static int acl_permission_check(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask)) +static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, + int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) { umode_t mode = inode->i_mode; @@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask, mode >>= 6; else { if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { - int error = check_acl(inode, mask); + int error = check_acl(inode, mask, flags); if (error != -EAGAIN) return error; } @@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask, } /** - * generic_permission - check for access rights on a Posix-like filesystem + * generic_permission - check for access rights on a Posix-like filesystem * @inode: inode to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * @check_acl: optional callback to check for Posix ACLs + * @flags: IPERM_FLAG_ flags. * * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which - * are used for other things.. + * are used for other things. + * + * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk + * request cannot be satisfied (eg. requires blocking or too much complexity). + * It would then be called again in ref-walk mode. */ -int generic_permission(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask)) +int generic_permission(struct inode *inode, int mask, unsigned int flags, + int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) { int ret; /* * Do the basic POSIX ACL permission checks. */ - ret = acl_permission_check(inode, mask, check_acl); + ret = acl_permission_check(inode, mask, flags, check_acl); if (ret != -EACCES) return ret; @@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask) } if (inode->i_op->permission) - retval = inode->i_op->permission(inode, mask); + retval = inode->i_op->permission(inode, mask, 0); else - retval = generic_permission(inode, mask, inode->i_op->check_acl); + retval = generic_permission(inode, mask, 0, + inode->i_op->check_acl); if (retval) return retval; @@ -362,6 +368,18 @@ void path_get(struct path *path) EXPORT_SYMBOL(path_get); /** + * path_get_long - get a long reference to a path + * @path: path to get the reference to + * + * Given a path increment the reference count to the dentry and the vfsmount. + */ +void path_get_long(struct path *path) +{ + mntget_long(path->mnt); + dget(path->dentry); +} + +/** * path_put - put a reference to a path * @path: path to put the reference to * @@ -375,6 +393,185 @@ void path_put(struct path *path) EXPORT_SYMBOL(path_put); /** + * path_put_long - put a long reference to a path + * @path: path to put the reference to + * + * Given a path decrement the reference count to the dentry and the vfsmount. + */ +void path_put_long(struct path *path) +{ + dput(path->dentry); + mntput_long(path->mnt); +} + +/** + * nameidata_drop_rcu - drop this nameidata out of rcu-walk + * @nd: nameidata pathwalk data to drop + * Returns: 0 on success, -ECHILD on failure + * + * Path walking has 2 modes, rcu-walk and ref-walk (see + * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt + * to drop out of rcu-walk mode and take normal reference counts on dentries + * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take + * refcounts at the last known good point before rcu-walk got stuck, so + * ref-walk may continue from there. If this is not successful (eg. a seqcount + * has changed), then failure is returned and path walk restarts from the + * beginning in ref-walk mode. + * + * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into + * ref-walk. Must be called from rcu-walk context. + */ +static int nameidata_drop_rcu(struct nameidata *nd) +{ + struct fs_struct *fs = current->fs; + struct dentry *dentry = nd->path.dentry; + + BUG_ON(!(nd->flags & LOOKUP_RCU)); + if (nd->root.mnt) { + spin_lock(&fs->lock); + if (nd->root.mnt != fs->root.mnt || + nd->root.dentry != fs->root.dentry) + goto err_root; + } + spin_lock(&dentry->d_lock); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err; + BUG_ON(nd->inode != dentry->d_inode); + spin_unlock(&dentry->d_lock); + if (nd->root.mnt) { + path_get(&nd->root); + spin_unlock(&fs->lock); + } + mntget(nd->path.mnt); + + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + nd->flags &= ~LOOKUP_RCU; + return 0; +err: + spin_unlock(&dentry->d_lock); +err_root: + if (nd->root.mnt) + spin_unlock(&fs->lock); + return -ECHILD; +} + +/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ +static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) + return nameidata_drop_rcu(nd); + return 0; +} + +/** + * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk + * @nd: nameidata pathwalk data to drop + * @dentry: dentry to drop + * Returns: 0 on success, -ECHILD on failure + * + * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, + * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on + * @nd. Must be called from rcu-walk context. + */ +static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) +{ + struct fs_struct *fs = current->fs; + struct dentry *parent = nd->path.dentry; + + BUG_ON(!(nd->flags & LOOKUP_RCU)); + if (nd->root.mnt) { + spin_lock(&fs->lock); + if (nd->root.mnt != fs->root.mnt || + nd->root.dentry != fs->root.dentry) + goto err_root; + } + spin_lock(&parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err; + /* + * If the sequence check on the child dentry passed, then the child has + * not been removed from its parent. This means the parent dentry must + * be valid and able to take a reference at this point. + */ + BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); + BUG_ON(!parent->d_count); + parent->d_count++; + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); + if (nd->root.mnt) { + path_get(&nd->root); + spin_unlock(&fs->lock); + } + mntget(nd->path.mnt); + + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + nd->flags &= ~LOOKUP_RCU; + return 0; +err: + spin_unlock(&dentry->d_lock); + spin_unlock(&parent->d_lock); +err_root: + if (nd->root.mnt) + spin_unlock(&fs->lock); + return -ECHILD; +} + +/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ +static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) +{ + if (nd->flags & LOOKUP_RCU) + return nameidata_dentry_drop_rcu(nd, dentry); + return 0; +} + +/** + * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk + * @nd: nameidata pathwalk data to drop + * Returns: 0 on success, -ECHILD on failure + * + * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. + * nd->path should be the final element of the lookup, so nd->root is discarded. + * Must be called from rcu-walk context. + */ +static int nameidata_drop_rcu_last(struct nameidata *nd) +{ + struct dentry *dentry = nd->path.dentry; + + BUG_ON(!(nd->flags & LOOKUP_RCU)); + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + spin_lock(&dentry->d_lock); + if (!__d_rcu_to_refcount(dentry, nd->seq)) + goto err_unlock; + BUG_ON(nd->inode != dentry->d_inode); + spin_unlock(&dentry->d_lock); + + mntget(nd->path.mnt); + + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + + return 0; + +err_unlock: + spin_unlock(&dentry->d_lock); + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + return -ECHILD; +} + +/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ +static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) +{ + if (likely(nd->flags & LOOKUP_RCU)) + return nameidata_drop_rcu_last(nd); + return 0; +} + +/** * release_open_intent - free up open intent resources * @nd: pointer to nameidata */ @@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd) fput(nd->intent.open.file); } +static int d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + int status; + + status = dentry->d_op->d_revalidate(dentry, nd); + if (status == -ECHILD) { + if (nameidata_dentry_drop_rcu(nd, dentry)) + return status; + status = dentry->d_op->d_revalidate(dentry, nd); + } + + return status; +} + static inline struct dentry * do_revalidate(struct dentry *dentry, struct nameidata *nd) { - int status = dentry->d_op->d_revalidate(dentry, nd); + int status; + + status = d_revalidate(dentry, nd); if (unlikely(status <= 0)) { /* * The dentry failed validation. @@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) * the dentry otherwise d_revalidate is asking us * to return a fail status. */ - if (!status) { + if (status < 0) { + /* If we're in rcu-walk, we don't have a ref */ + if (!(nd->flags & LOOKUP_RCU)) + dput(dentry); + dentry = ERR_PTR(status); + + } else { + /* Don't d_invalidate in rcu-walk mode */ + if (nameidata_dentry_drop_rcu_maybe(nd, dentry)) + return ERR_PTR(-ECHILD); if (!d_invalidate(dentry)) { dput(dentry); dentry = NULL; } - } else { - dput(dentry); - dentry = ERR_PTR(status); } } return dentry; } +static inline int need_reval_dot(struct dentry *dentry) +{ + if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + return 0; + + if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) + return 0; + + return 1; +} + /* * force_reval_path - force revalidation of a dentry * @@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd) /* * only check on filesystems where it's possible for the dentry to - * become stale. It's assumed that if this flag is set then the - * d_revalidate op will also be defined. + * become stale. */ - if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + if (!need_reval_dot(dentry)) return 0; - status = dentry->d_op->d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd); if (status > 0) return 0; @@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd) * short-cut DAC fails, then call ->permission() to do more * complete permission check. */ -static int exec_permission(struct inode *inode) +static inline int exec_permission(struct inode *inode, unsigned int flags) { int ret; if (inode->i_op->permission) { - ret = inode->i_op->permission(inode, MAY_EXEC); - if (!ret) - goto ok; - return ret; + ret = inode->i_op->permission(inode, MAY_EXEC, flags); + } else { + ret = acl_permission_check(inode, MAY_EXEC, flags, + inode->i_op->check_acl); } - ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); - if (!ret) + if (likely(!ret)) goto ok; + if (ret == -ECHILD) + return ret; if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) goto ok; return ret; ok: - return security_inode_permission(inode, MAY_EXEC); + return security_inode_exec_permission(inode, flags); } static __always_inline void set_root(struct nameidata *nd) @@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd) static int link_path_walk(const char *, struct nameidata *); +static __always_inline void set_root_rcu(struct nameidata *nd) +{ + if (!nd->root.mnt) { + struct fs_struct *fs = current->fs; + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + } while (read_seqcount_retry(&fs->seq, seq)); + } +} + static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { + int ret; + if (IS_ERR(link)) goto fail; @@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l nd->path = nd->root; path_get(&nd->root); } + nd->inode = nd->path.dentry->d_inode; - return link_path_walk(link, nd); + ret = link_path_walk(link, nd); + return ret; fail: path_put(&nd->path); return PTR_ERR(link); @@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) static inline void path_to_nameidata(struct path *path, struct nameidata *nd) { - dput(nd->path.dentry); - if (nd->path.mnt != path->mnt) { - mntput(nd->path.mnt); - nd->path.mnt = path->mnt; + if (!(nd->flags & LOOKUP_RCU)) { + dput(nd->path.dentry); + if (nd->path.mnt != path->mnt) + mntput(nd->path.mnt); } + nd->path.mnt = path->mnt; nd->path.dentry = path->dentry; } @@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p) if (path->mnt != nd->path.mnt) { path_to_nameidata(path, nd); + nd->inode = nd->path.dentry->d_inode; dget(dentry); } mntget(path->mnt); + nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); @@ -591,6 +841,20 @@ loop: return err; } +static int follow_up_rcu(struct path *path) +{ + struct vfsmount *parent; + struct dentry *mountpoint; + + parent = path->mnt->mnt_parent; + if (parent == path->mnt) + return 0; + mountpoint = path->mnt->mnt_mountpoint; + path->dentry = mountpoint; + path->mnt = parent; + return 1; +} + int follow_up(struct path *path) { struct vfsmount *parent; @@ -612,9 +876,24 @@ int follow_up(struct path *path) return 1; } -/* no need for dcache_lock, as serialization is taken care in - * namespace.c +/* + * serialization is taken care of in namespace.c */ +static void __follow_mount_rcu(struct nameidata *nd, struct path *path, + struct inode **inode) +{ + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted; + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + return; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&path->dentry->d_seq); + *inode = path->dentry->d_inode; + } +} + static int __follow_mount(struct path *path) { int res = 0; @@ -645,9 +924,6 @@ static void follow_mount(struct path *path) } } -/* no need for dcache_lock, as serialization is taken care in - * namespace.c - */ int follow_down(struct path *path) { struct vfsmount *mounted; @@ -663,7 +939,42 @@ int follow_down(struct path *path) return 0; } -static __always_inline void follow_dotdot(struct nameidata *nd) +static int follow_dotdot_rcu(struct nameidata *nd) +{ + struct inode *inode = nd->inode; + + set_root_rcu(nd); + + while(1) { + if (nd->path.dentry == nd->root.dentry && + nd->path.mnt == nd->root.mnt) { + break; + } + if (nd->path.dentry != nd->path.mnt->mnt_root) { + struct dentry *old = nd->path.dentry; + struct dentry *parent = old->d_parent; + unsigned seq; + + seq = read_seqcount_begin(&parent->d_seq); + if (read_seqcount_retry(&old->d_seq, nd->seq)) + return -ECHILD; + inode = parent->d_inode; + nd->path.dentry = parent; + nd->seq = seq; + break; + } + if (!follow_up_rcu(&nd->path)) + break; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + inode = nd->path.dentry->d_inode; + } + __follow_mount_rcu(nd, &nd->path, &inode); + nd->inode = inode; + + return 0; +} + +static void follow_dotdot(struct nameidata *nd) { set_root(nd); @@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) break; } follow_mount(&nd->path); + nd->inode = nd->path.dentry->d_inode; } /* @@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, * It _is_ time-critical. */ static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path) + struct path *path, struct inode **inode) { struct vfsmount *mnt = nd->path.mnt; - struct dentry *dentry, *parent; + struct dentry *dentry, *parent = nd->path.dentry; struct inode *dir; /* * See if the low-level filesystem might want * to use its own hash.. */ - if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { - int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); + if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { + int err = parent->d_op->d_hash(parent, nd->inode, name); if (err < 0) return err; } @@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, * of a false negative due to a concurrent rename, we're going to * do the non-racy lookup, below. */ - dentry = __d_lookup(nd->path.dentry, name); - if (!dentry) - goto need_lookup; + if (nd->flags & LOOKUP_RCU) { + unsigned seq; + + *inode = nd->inode; + dentry = __d_lookup_rcu(parent, name, &seq, inode); + if (!dentry) { + if (nameidata_drop_rcu(nd)) + return -ECHILD; + goto need_lookup; + } + /* Memory barrier in read_seqcount_begin of child is enough */ + if (__read_seqcount_retry(&parent->d_seq, nd->seq)) + return -ECHILD; + + nd->seq = seq; + if (dentry->d_flags & DCACHE_OP_REVALIDATE) + goto need_revalidate; + path->mnt = mnt; + path->dentry = dentry; + __follow_mount_rcu(nd, path, inode); + } else { + dentry = __d_lookup(parent, name); + if (!dentry) + goto need_lookup; found: - if (dentry->d_op && dentry->d_op->d_revalidate) - goto need_revalidate; + if (dentry->d_flags & DCACHE_OP_REVALIDATE) + goto need_revalidate; done: - path->mnt = mnt; - path->dentry = dentry; - __follow_mount(path); + path->mnt = mnt; + path->dentry = dentry; + __follow_mount(path); + *inode = path->dentry->d_inode; + } return 0; need_lookup: - parent = nd->path.dentry; dir = parent->d_inode; + BUG_ON(nd->inode != dir); mutex_lock(&dir->i_mutex); /* @@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) static int link_path_walk(const char *name, struct nameidata *nd) { struct path next; - struct inode *inode; int err; unsigned int lookup_flags = nd->flags; @@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (!*name) goto return_reval; - inode = nd->path.dentry->d_inode; if (nd->depth) lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); /* At this point we know we have a real path component. */ for(;;) { + struct inode *inode; unsigned long hash; struct qstr this; unsigned int c; nd->flags |= LOOKUP_CONTINUE; - err = exec_permission(inode); + if (nd->flags & LOOKUP_RCU) { + err = exec_permission(nd->inode, IPERM_FLAG_RCU); + if (err == -ECHILD) { + if (nameidata_drop_rcu(nd)) + return -ECHILD; + goto exec_again; + } + } else { +exec_again: + err = exec_permission(nd->inode, 0); + } if (err) break; @@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; - follow_dotdot(nd); - inode = nd->path.dentry->d_inode; + if (nd->flags & LOOKUP_RCU) { + if (follow_dotdot_rcu(nd)) + return -ECHILD; + } else + follow_dotdot(nd); /* fallthrough */ case 1: continue; } /* This does the actual lookups.. */ - err = do_lookup(nd, &this, &next); + err = do_lookup(nd, &this, &next, &inode); if (err) break; - err = -ENOENT; - inode = next.dentry->d_inode; if (!inode) goto out_dput; if (inode->i_op->follow_link) { + /* We commonly drop rcu-walk here */ + if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) + return -ECHILD; + BUG_ON(inode != next.dentry->d_inode); err = do_follow_link(&next, nd); if (err) goto return_err; + nd->inode = nd->path.dentry->d_inode; err = -ENOENT; - inode = nd->path.dentry->d_inode; - if (!inode) + if (!nd->inode) break; - } else + } else { path_to_nameidata(&next, nd); + nd->inode = inode; + } err = -ENOTDIR; - if (!inode->i_op->lookup) + if (!nd->inode->i_op->lookup) break; continue; /* here ends the main loop */ @@ -913,32 +1264,39 @@ last_component: if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; - follow_dotdot(nd); - inode = nd->path.dentry->d_inode; + if (nd->flags & LOOKUP_RCU) { + if (follow_dotdot_rcu(nd)) + return -ECHILD; + } else + follow_dotdot(nd); /* fallthrough */ case 1: goto return_reval; } - err = do_lookup(nd, &this, &next); + err = do_lookup(nd, &this, &next, &inode); if (err) break; - inode = next.dentry->d_inode; if (follow_on_final(inode, lookup_flags)) { + if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) + return -ECHILD; + BUG_ON(inode != next.dentry->d_inode); err = do_follow_link(&next, nd); if (err) goto return_err; - inode = nd->path.dentry->d_inode; - } else + nd->inode = nd->path.dentry->d_inode; + } else { path_to_nameidata(&next, nd); + nd->inode = inode; + } err = -ENOENT; - if (!inode) + if (!nd->inode) break; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; - if (!inode->i_op->lookup) + if (!nd->inode->i_op->lookup) break; } goto return_base; @@ -958,25 +1316,43 @@ return_reval: * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { - err = -ESTALE; + if (need_reval_dot(nd->path.dentry)) { /* Note: we do not d_invalidate() */ - if (!nd->path.dentry->d_op->d_revalidate( - nd->path.dentry, nd)) + err = d_revalidate(nd->path.dentry, nd); + if (!err) + err = -ESTALE; + if (err < 0) break; } return_base: + if (nameidata_drop_rcu_last_maybe(nd)) + return -ECHILD; return 0; out_dput: - path_put_conditional(&next, nd); + if (!(nd->flags & LOOKUP_RCU)) + path_put_conditional(&next, nd); break; } - path_put(&nd->path); + if (!(nd->flags & LOOKUP_RCU)) + path_put(&nd->path); return_err: return err; } +static inline int path_walk_rcu(const char *name, struct nameidata *nd) +{ + current->total_link_count = 0; + + return link_path_walk(name, nd); +} + +static inline int path_walk_simple(const char *name, struct nameidata *nd) +{ + current->total_link_count = 0; + + return link_path_walk(name, nd); +} + static int path_walk(const char *name, struct nameidata *nd) { struct path save = nd->path; @@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd) return result; } +static void path_finish_rcu(struct nameidata *nd) +{ + if (nd->flags & LOOKUP_RCU) { + /* RCU dangling. Cancel it. */ + nd->flags &= ~LOOKUP_RCU; + nd->root.mnt = NULL; + rcu_read_unlock(); + br_read_unlock(vfsmount_lock); + } + if (nd->file) + fput(nd->file); +} + +static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) +{ + int retval = 0; + int fput_needed; + struct file *file; + + nd->last_type = LAST_ROOT; /* if there are only slashes... */ + nd->flags = flags | LOOKUP_RCU; + nd->depth = 0; + nd->root.mnt = NULL; + nd->file = NULL; + + if (*name=='/') { + struct fs_struct *fs = current->fs; + unsigned seq; + + br_read_lock(vfsmount_lock); + rcu_read_lock(); + + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + nd->path = nd->root; + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + + } else if (dfd == AT_FDCWD) { + struct fs_struct *fs = current->fs; + unsigned seq; + + br_read_lock(vfsmount_lock); + rcu_read_lock(); + + do { + seq = read_seqcount_begin(&fs->seq); + nd->path = fs->pwd; + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + + } else { + struct dentry *dentry; + + file = fget_light(dfd, &fput_needed); + retval = -EBADF; + if (!file) + goto out_fail; + + dentry = file->f_path.dentry; + + retval = -ENOTDIR; + if (!S_ISDIR(dentry->d_inode->i_mode)) + goto fput_fail; + + retval = file_permission(file, MAY_EXEC); + if (retval) + goto fput_fail; + + nd->path = file->f_path; + if (fput_needed) + nd->file = file; + + nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + br_read_lock(vfsmount_lock); + rcu_read_lock(); + } + nd->inode = nd->path.dentry->d_inode; + return 0; + +fput_fail: + fput_light(file, fput_needed); +out_fail: + return retval; +} + static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei fput_light(file, fput_needed); } + nd->inode = nd->path.dentry->d_inode; return 0; fput_fail: @@ -1054,16 +1518,53 @@ out_fail: static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { - int retval = path_init(dfd, name, flags, nd); - if (!retval) - retval = path_walk(name, nd); - if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && - nd->path.dentry->d_inode)) - audit_inode(name, nd->path.dentry); + int retval; + + /* + * Path walking is largely split up into 2 different synchronisation + * schemes, rcu-walk and ref-walk (explained in + * Documentation/filesystems/path-lookup.txt). These share much of the + * path walk code, but some things particularly setup, cleanup, and + * following mounts are sufficiently divergent that functions are + * duplicated. Typically there is a function foo(), and its RCU + * analogue, foo_rcu(). + * + * -ECHILD is the error number of choice (just to avoid clashes) that + * is returned if some aspect of an rcu-walk fails. Such an error must + * be handled by restarting a traditional ref-walk (which will always + * be able to complete). + */ + retval = path_init_rcu(dfd, name, flags, nd); + if (unlikely(retval)) + return retval; + retval = path_walk_rcu(name, nd); + path_finish_rcu(nd); if (nd->root.mnt) { path_put(&nd->root); nd->root.mnt = NULL; } + + if (unlikely(retval == -ECHILD || retval == -ESTALE)) { + /* slower, locked walk */ + if (retval == -ESTALE) + flags |= LOOKUP_REVAL; + retval = path_init(dfd, name, flags, nd); + if (unlikely(retval)) + return retval; + retval = path_walk(name, nd); + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } + } + + if (likely(!retval)) { + if (unlikely(!audit_dummy_context())) { + if (nd->path.dentry && nd->inode) + audit_inode(name, nd->path.dentry); + } + } + return retval; } @@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, path_get(&nd->path); nd->root = nd->path; path_get(&nd->root); + nd->inode = nd->path.dentry->d_inode; retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && - nd->path.dentry->d_inode)) + nd->inode)) audit_inode(name, nd->path.dentry); path_put(&nd->root); @@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name, struct dentry *dentry; int err; - err = exec_permission(inode); + err = exec_permission(inode, 0); if (err) return ERR_PTR(err); @@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name, * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { - err = base->d_op->d_hash(base, name); + if (base->d_flags & DCACHE_OP_HASH) { + err = base->d_op->d_hash(base, inode, name); dentry = ERR_PTR(err); if (err < 0) goto out; @@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); if (!dentry) @@ -1490,6 +1992,7 @@ out_unlock: mutex_unlock(&dir->d_inode->i_mutex); dput(nd->path.dentry); nd->path.dentry = path->dentry; + if (error) return error; /* Don't check for write permission, don't truncate */ @@ -1584,6 +2087,9 @@ exit: return ERR_PTR(error); } +/* + * Handle O_CREAT case for do_filp_open + */ static struct file *do_last(struct nameidata *nd, struct path *path, int open_flag, int acc_mode, int mode, const char *pathname) @@ -1597,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path, follow_dotdot(nd); dir = nd->path.dentry; case LAST_DOT: - if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { - if (!dir->d_op->d_revalidate(dir, nd)) { + if (need_reval_dot(dir)) { + error = d_revalidate(nd->path.dentry, nd); + if (!error) error = -ESTALE; + if (error < 0) goto exit; - } } /* fallthrough */ case LAST_ROOT: - if (open_flag & O_CREAT) - goto exit; - /* fallthrough */ + goto exit; case LAST_BIND: audit_inode(pathname, dir); goto ok; } /* trailing slashes? */ - if (nd->last.name[nd->last.len]) { - if (open_flag & O_CREAT) - goto exit; - nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; - } - - /* just plain open? */ - if (!(open_flag & O_CREAT)) { - error = do_lookup(nd, &nd->last, path); - if (error) - goto exit; - error = -ENOENT; - if (!path->dentry->d_inode) - goto exit_dput; - if (path->dentry->d_inode->i_op->follow_link) - return NULL; - error = -ENOTDIR; - if (nd->flags & LOOKUP_DIRECTORY) { - if (!path->dentry->d_inode->i_op->lookup) - goto exit_dput; - } - path_to_nameidata(path, nd); - audit_inode(pathname, nd->path.dentry); - goto ok; - } + if (nd->last.name[nd->last.len]) + goto exit; - /* OK, it's O_CREAT */ mutex_lock(&dir->d_inode->i_mutex); path->dentry = lookup_hash(nd); @@ -1711,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, return NULL; path_to_nameidata(path, nd); + nd->inode = path->dentry->d_inode; error = -EISDIR; - if (S_ISDIR(path->dentry->d_inode->i_mode)) + if (S_ISDIR(nd->inode->i_mode)) goto exit; ok: filp = finish_open(nd, open_flag, acc_mode); @@ -1743,11 +2225,14 @@ struct file *do_filp_open(int dfd, const char *pathname, struct path path; int count = 0; int flag = open_to_namei_flags(open_flag); - int force_reval = 0; + int flags; if (!(open_flag & O_CREAT)) mode = 0; + /* Must never be set by userspace */ + open_flag &= ~FMODE_NONOTIFY; + /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only * check for O_DSYNC if the need any syncing at all we enforce it's @@ -1769,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname, if (open_flag & O_APPEND) acc_mode |= MAY_APPEND; - /* find the parent */ -reval: - error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); + flags = LOOKUP_OPEN; + if (open_flag & O_CREAT) { + flags |= LOOKUP_CREATE; + if (open_flag & O_EXCL) + flags |= LOOKUP_EXCL; + } + if (open_flag & O_DIRECTORY) + flags |= LOOKUP_DIRECTORY; + if (!(open_flag & O_NOFOLLOW)) + flags |= LOOKUP_FOLLOW; + + filp = get_empty_filp(); + if (!filp) + return ERR_PTR(-ENFILE); + + filp->f_flags = open_flag; + nd.intent.open.file = filp; + nd.intent.open.flags = flag; + nd.intent.open.create_mode = mode; + + if (open_flag & O_CREAT) + goto creat; + + /* !O_CREAT, simple open */ + error = do_path_lookup(dfd, pathname, flags, &nd); + if (unlikely(error)) + goto out_filp; + error = -ELOOP; + if (!(nd.flags & LOOKUP_FOLLOW)) { + if (nd.inode->i_op->follow_link) + goto out_path; + } + error = -ENOTDIR; + if (nd.flags & LOOKUP_DIRECTORY) { + if (!nd.inode->i_op->lookup) + goto out_path; + } + audit_inode(pathname, nd.path.dentry); + filp = finish_open(&nd, open_flag, acc_mode); + return filp; + +creat: + /* OK, have to create the file. Find the parent. */ + error = path_init_rcu(dfd, pathname, + LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); if (error) - return ERR_PTR(error); - if (force_reval) - nd.flags |= LOOKUP_REVAL; + goto out_filp; + error = path_walk_rcu(pathname, &nd); + path_finish_rcu(&nd); + if (unlikely(error == -ECHILD || error == -ESTALE)) { + /* slower, locked walk */ + if (error == -ESTALE) { +reval: + flags |= LOOKUP_REVAL; + } + error = path_init(dfd, pathname, + LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); + if (error) + goto out_filp; - current->total_link_count = 0; - error = link_path_walk(pathname, &nd); - if (error) { - filp = ERR_PTR(error); - goto out; + error = path_walk_simple(pathname, &nd); } - if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) + if (unlikely(error)) + goto out_filp; + if (unlikely(!audit_dummy_context())) audit_inode(pathname, nd.path.dentry); /* * We have the parent and last component. */ - - error = -ENFILE; - filp = get_empty_filp(); - if (filp == NULL) - goto exit_parent; - nd.intent.open.file = filp; - filp->f_flags = open_flag; - nd.intent.open.flags = flag; - nd.intent.open.create_mode = mode; - nd.flags &= ~LOOKUP_PARENT; - nd.flags |= LOOKUP_OPEN; - if (open_flag & O_CREAT) { - nd.flags |= LOOKUP_CREATE; - if (open_flag & O_EXCL) - nd.flags |= LOOKUP_EXCL; - } - if (open_flag & O_DIRECTORY) - nd.flags |= LOOKUP_DIRECTORY; - if (!(open_flag & O_NOFOLLOW)) - nd.flags |= LOOKUP_FOLLOW; + nd.flags = flags; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path holder; - struct inode *inode = path.dentry->d_inode; void *cookie; error = -ELOOP; /* S_ISDIR part is a temporary automount kludge */ - if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode)) goto exit_dput; if (count++ == 32) goto exit_dput; @@ -1837,36 +2352,33 @@ reval: goto exit_dput; error = __do_follow_link(&path, &nd, &cookie); if (unlikely(error)) { + if (!IS_ERR(cookie) && nd.inode->i_op->put_link) + nd.inode->i_op->put_link(path.dentry, &nd, cookie); /* nd.path had been dropped */ - if (!IS_ERR(cookie) && inode->i_op->put_link) - inode->i_op->put_link(path.dentry, &nd, cookie); - path_put(&path); - release_open_intent(&nd); - filp = ERR_PTR(error); - goto out; + nd.path = path; + goto out_path; } holder = path; nd.flags &= ~LOOKUP_PARENT; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); - if (inode->i_op->put_link) - inode->i_op->put_link(holder.dentry, &nd, cookie); + if (nd.inode->i_op->put_link) + nd.inode->i_op->put_link(holder.dentry, &nd, cookie); path_put(&holder); } out: if (nd.root.mnt) path_put(&nd.root); - if (filp == ERR_PTR(-ESTALE) && !force_reval) { - force_reval = 1; + if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) goto reval; - } return filp; exit_dput: path_put_conditional(&path, &nd); +out_path: + path_put(&nd.path); +out_filp: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); -exit_parent: - path_put(&nd.path); filp = ERR_PTR(error); goto out; } @@ -2127,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry) { dget(dentry); shrink_dcache_parent(dentry); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) == 2) + if (dentry->d_count == 2) __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); } int vfs_rmdir(struct inode *dir, struct dentry *dentry) diff --git a/fs/namespace.c b/fs/namespace.c index 3dbfc072ec70..3ddfd9046c44 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } +/* + * vfsmount lock must be held for read + */ +static inline void mnt_add_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_add(mnt->mnt_pcp->mnt_count, n); +#else + preempt_disable(); + mnt->mnt_count += n; + preempt_enable(); +#endif +} + +static inline void mnt_set_count(struct vfsmount *mnt, int n) +{ +#ifdef CONFIG_SMP + this_cpu_write(mnt->mnt_pcp->mnt_count, n); +#else + mnt->mnt_count = n; +#endif +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_inc_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, 1); +} + +/* + * vfsmount lock must be held for read + */ +static inline void mnt_dec_count(struct vfsmount *mnt) +{ + mnt_add_count(mnt, -1); +} + +/* + * vfsmount lock must be held for write + */ +unsigned int mnt_get_count(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + unsigned int count = atomic_read(&mnt->mnt_longrefs); + int cpu; + + for_each_possible_cpu(cpu) { + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; + } + + return count; +#else + return mnt->mnt_count; +#endif +} + struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } - atomic_set(&mnt->mnt_count, 1); +#ifdef CONFIG_SMP + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); + if (!mnt->mnt_pcp) + goto out_free_devname; + + atomic_set(&mnt->mnt_longrefs, 1); +#else + mnt->mnt_count = 1; + mnt->mnt_writers = 0; +#endif + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -166,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name) #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif -#ifdef CONFIG_SMP - mnt->mnt_writers = alloc_percpu(int); - if (!mnt->mnt_writers) - goto out_free_devname; -#else - mnt->mnt_writers = 0; -#endif } return mnt; @@ -216,32 +277,32 @@ int __mnt_is_readonly(struct vfsmount *mnt) } EXPORT_SYMBOL_GPL(__mnt_is_readonly); -static inline void inc_mnt_writers(struct vfsmount *mnt) +static inline void mnt_inc_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; + this_cpu_inc(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers++; #endif } -static inline void dec_mnt_writers(struct vfsmount *mnt) +static inline void mnt_dec_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; + this_cpu_dec(mnt->mnt_pcp->mnt_writers); #else mnt->mnt_writers--; #endif } -static unsigned int count_mnt_writers(struct vfsmount *mnt) +static unsigned int mnt_get_writers(struct vfsmount *mnt) { #ifdef CONFIG_SMP unsigned int count = 0; int cpu; for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_writers, cpu); + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; } return count; @@ -273,9 +334,9 @@ int mnt_want_write(struct vfsmount *mnt) int ret = 0; preempt_disable(); - inc_mnt_writers(mnt); + mnt_inc_writers(mnt); /* - * The store to inc_mnt_writers must be visible before we pass + * The store to mnt_inc_writers must be visible before we pass * MNT_WRITE_HOLD loop below, so that the slowpath can see our * incremented count after it has set MNT_WRITE_HOLD. */ @@ -289,7 +350,7 @@ int mnt_want_write(struct vfsmount *mnt) */ smp_rmb(); if (__mnt_is_readonly(mnt)) { - dec_mnt_writers(mnt); + mnt_dec_writers(mnt); ret = -EROFS; goto out; } @@ -317,7 +378,7 @@ int mnt_clone_write(struct vfsmount *mnt) if (__mnt_is_readonly(mnt)) return -EROFS; preempt_disable(); - inc_mnt_writers(mnt); + mnt_inc_writers(mnt); preempt_enable(); return 0; } @@ -351,7 +412,7 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file); void mnt_drop_write(struct vfsmount *mnt) { preempt_disable(); - dec_mnt_writers(mnt); + mnt_dec_writers(mnt); preempt_enable(); } EXPORT_SYMBOL_GPL(mnt_drop_write); @@ -384,7 +445,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) * MNT_WRITE_HOLD, so it can't be decremented by another CPU while * we're counting up here. */ - if (count_mnt_writers(mnt) > 0) + if (mnt_get_writers(mnt) > 0) ret = -EBUSY; else mnt->mnt_flags |= MNT_READONLY; @@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt) kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP - free_percpu(mnt->mnt_writers); + free_percpu(mnt->mnt_pcp); #endif kmem_cache_free(mnt_cache, mnt); } @@ -492,6 +553,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } /* + * Clear dentry's mounted state if it has no remaining mounts. + * vfsmount_lock must be held for write. + */ +static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) +{ + unsigned u; + + for (u = 0; u < HASH_SIZE; u++) { + struct vfsmount *p; + + list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { + if (p->mnt_mountpoint == dentry) + return; + } + } + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); +} + +/* * vfsmount lock must be held for write */ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) @@ -502,7 +584,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - old_path->dentry->d_mounted--; + dentry_reset_mounted(old_path->mnt, old_path->dentry); } /* @@ -513,7 +595,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, { child_mnt->mnt_parent = mntget(mnt); child_mnt->mnt_mountpoint = dget(dentry); - dentry->d_mounted++; + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); } /* @@ -629,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, return NULL; } -static inline void __mntput(struct vfsmount *mnt) +static inline void mntfree(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; + /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this @@ -639,38 +724,123 @@ static inline void __mntput(struct vfsmount *mnt) * to make r/w->r/o transitions. */ /* - * atomic_dec_and_lock() used to deal with ->mnt_count decrements - * provides barriers, so count_mnt_writers() below is safe. AV + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. */ - WARN_ON(count_mnt_writers(mnt)); + WARN_ON(mnt_get_writers(mnt)); fsnotify_vfsmount_delete(mnt); dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); } -void mntput_no_expire(struct vfsmount *mnt) -{ -repeat: - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) - return; +#ifdef CONFIG_SMP +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ + if (!longrefs) { +put_again: + br_read_lock(vfsmount_lock); + if (likely(atomic_read(&mnt->mnt_longrefs))) { + mnt_dec_count(mnt); + br_read_unlock(vfsmount_lock); + return; + } + br_read_unlock(vfsmount_lock); + } else { + BUG_ON(!atomic_read(&mnt->mnt_longrefs)); + if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) + return; + } + br_write_lock(vfsmount_lock); - if (!atomic_dec_and_test(&mnt->mnt_count)) { + if (!longrefs) + mnt_dec_count(mnt); + else + atomic_dec(&mnt->mnt_longrefs); + if (mnt_get_count(mnt)) { br_write_unlock(vfsmount_lock); return; } - if (likely(!mnt->mnt_pinned)) { + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - __mntput(mnt); + acct_auto_close_mnt(mnt); + goto put_again; + } + br_write_unlock(vfsmount_lock); + mntfree(mnt); +} +#else +static inline void __mntput(struct vfsmount *mnt, int longrefs) +{ +put_again: + mnt_dec_count(mnt); + if (likely(mnt_get_count(mnt))) return; + br_write_lock(vfsmount_lock); + if (unlikely(mnt->mnt_pinned)) { + mnt_add_count(mnt, mnt->mnt_pinned + 1); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto put_again; } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; br_write_unlock(vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + mntfree(mnt); +} +#endif + +static void mntput_no_expire(struct vfsmount *mnt) +{ + __mntput(mnt, 0); +} + +void mntput(struct vfsmount *mnt) +{ + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 0); + } +} +EXPORT_SYMBOL(mntput); + +struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + mnt_inc_count(mnt); + return mnt; } -EXPORT_SYMBOL(mntput_no_expire); +EXPORT_SYMBOL(mntget); + +void mntput_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) { + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ + if (unlikely(mnt->mnt_expiry_mark)) + mnt->mnt_expiry_mark = 0; + __mntput(mnt, 1); + } +#else + mntput(mnt); +#endif +} +EXPORT_SYMBOL(mntput_long); + +struct vfsmount *mntget_long(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + if (mnt) + atomic_inc(&mnt->mnt_longrefs); + return mnt; +#else + return mntget(mnt); +#endif +} +EXPORT_SYMBOL(mntget_long); void mnt_pin(struct vfsmount *mnt) { @@ -678,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt) mnt->mnt_pinned++; br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { - atomic_inc(&mnt->mnt_count); + mnt_inc_count(mnt); mnt->mnt_pinned--; } br_write_unlock(vfsmount_lock); } - EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -985,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - br_read_lock(vfsmount_lock); + /* write lock needed for mnt_get_count */ + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += atomic_read(&p->mnt_count); + actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1017,10 +1186,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_read_lock(vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - br_read_unlock(vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1047,7 +1216,7 @@ void release_mounts(struct list_head *head) dput(dentry); mntput(m); } - mntput(mnt); + mntput_long(mnt); } } @@ -1073,7 +1242,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; - p->mnt_mountpoint->d_mounted--; + dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); } change_mnt_propagation(p, MS_PRIVATE); } @@ -1102,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - if (atomic_read(&mnt->mnt_count) != 2) + /* + * probably don't strictly need the lock here if we examined + * all race cases, but it's a slowpath. + */ + br_write_lock(vfsmount_lock); + if (mnt_get_count(mnt) != 2) { + br_write_lock(vfsmount_lock); return -EBUSY; + } + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1792,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, unlock: up_write(&namespace_sem); - mntput(newmnt); + mntput_long(newmnt); return err; } @@ -2125,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (fs) { if (p == fs->root.mnt) { rootmnt = p; - fs->root.mnt = mntget(q); + fs->root.mnt = mntget_long(q); } if (p == fs->pwd.mnt) { pwdmnt = p; - fs->pwd.mnt = mntget(q); + fs->pwd.mnt = mntget_long(q); } } p = next_mnt(p, mnt_ns->root); @@ -2138,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, up_write(&namespace_sem); if (rootmnt) - mntput(rootmnt); + mntput_long(rootmnt); if (pwdmnt) - mntput(pwdmnt); + mntput_long(pwdmnt); return new_ns; } @@ -2327,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); + error = 0; path_put(&root_parent); path_put(&parent_path); @@ -2353,6 +2531,7 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); + ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index f22b12e7d337..28f136d4aaec 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <linux/namei.h> #include <asm/uaccess.h> #include <asm/byteorder.h> @@ -74,9 +75,12 @@ const struct inode_operations ncp_dir_inode_operations = * Dentry operations routines */ static int ncp_lookup_validate(struct dentry *, struct nameidata *); -static int ncp_hash_dentry(struct dentry *, struct qstr *); -static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); -static int ncp_delete_dentry(struct dentry *); +static int ncp_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); +static int ncp_compare_dentry(const struct dentry *, const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); +static int ncp_delete_dentry(const struct dentry *); static const struct dentry_operations ncp_dentry_operations = { @@ -113,10 +117,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) #define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) -static inline int ncp_case_sensitive(struct dentry *dentry) +static inline int ncp_case_sensitive(const struct inode *i) { #ifdef CONFIG_NCPFS_NFS_NS - return ncp_namespace(dentry->d_inode) == NW_NS_NFS; + return ncp_namespace(i) == NW_NS_NFS; #else return 0; #endif /* CONFIG_NCPFS_NFS_NS */ @@ -127,14 +131,16 @@ static inline int ncp_case_sensitive(struct dentry *dentry) * is case-sensitive. */ static int -ncp_hash_dentry(struct dentry *dentry, struct qstr *this) +ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, + struct qstr *this) { - if (!ncp_case_sensitive(dentry)) { + if (!ncp_case_sensitive(inode)) { + struct super_block *sb = dentry->d_sb; struct nls_table *t; unsigned long hash; int i; - t = NCP_IO_TABLE(dentry); + t = NCP_IO_TABLE(sb); hash = init_name_hash(); for (i=0; i<this->len ; i++) hash = partial_name_hash(ncp_tolower(t, this->name[i]), @@ -145,15 +151,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this) } static int -ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) +ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - if (a->len != b->len) + if (len != name->len) return 1; - if (ncp_case_sensitive(dentry)) - return strncmp(a->name, b->name, a->len); + if (ncp_case_sensitive(pinode)) + return strncmp(str, name->name, len); - return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); + return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len); } /* @@ -162,7 +170,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) * Closing files can be safely postponed until iput() - it's done there anyway. */ static int -ncp_delete_dentry(struct dentry * dentry) +ncp_delete_dentry(const struct dentry * dentry) { struct inode *inode = dentry->d_inode; @@ -301,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) int res, val = 0, len; __u8 __name[NCP_MAXPATHLEN + 1]; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + parent = dget_parent(dentry); dir = parent->d_inode; @@ -384,21 +395,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget_locked(dent); + dget(dent); else dent = NULL; - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); goto out; } next = next->next; } - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); return NULL; out: @@ -592,7 +603,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, qname.hash = full_name_hash(qname.name, qname.len); if (dentry->d_op && dentry->d_op->d_hash) - if (dentry->d_op->d_hash(dentry, &qname) != 0) + if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0) goto end_advance; newdent = d_lookup(dentry, &qname); @@ -611,35 +622,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, shrink_dcache_parent(newdent); /* - * It is not as dangerous as it looks. NetWare's OS2 namespace is - * case preserving yet case insensitive. So we update dentry's name - * as received from server. We found dentry via d_lookup with our - * hash, so we know that hash does not change, and so replacing name - * should be reasonably safe. + * NetWare's OS2 namespace is case preserving yet case + * insensitive. So we update dentry's name as received from + * server. Parent dir's i_mutex is locked because we're in + * readdir. */ - if (qname.len == newdent->d_name.len && - memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) { - struct inode *inode = newdent->d_inode; - - /* - * Inside ncpfs all uses of d_name are either for debugging, - * or on functions which acquire inode mutex (mknod, creat, - * lookup). So grab i_mutex here, to be sure. d_path - * uses dcache_lock when generating path, so we should too. - * And finally d_compare is protected by dentry's d_lock, so - * here we go. - */ - if (inode) - mutex_lock(&inode->i_mutex); - spin_lock(&dcache_lock); - spin_lock(&newdent->d_lock); - memcpy((char *) newdent->d_name.name, qname.name, - newdent->d_name.len); - spin_unlock(&newdent->d_lock); - spin_unlock(&dcache_lock); - if (inode) - mutex_unlock(&inode->i_mutex); - } + dentry_update_name_case(newdent, &qname); } if (!newdent->d_inode) { @@ -649,7 +637,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, entry->ino = iunique(dir->i_sb, 2); inode = ncp_iget(dir->i_sb, entry); if (inode) { - newdent->d_op = &ncp_dentry_operations; + d_set_d_op(newdent, &ncp_dentry_operations); d_instantiate(newdent, inode); if (!hashed) d_rehash(newdent); @@ -657,7 +645,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, } else { struct inode *inode = newdent->d_inode; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ncp_update_inode2(inode, entry); mutex_unlock(&inode->i_mutex); } @@ -905,7 +893,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc if (inode) { ncp_new_dentry(dentry); add_entry: - dentry->d_op = &ncp_dentry_operations; + d_set_d_op(dentry, &ncp_dentry_operations); d_add(dentry, inode); error = 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 8fb93b604e73..9b39a5dd4131 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -29,6 +29,7 @@ #include <linux/vfs.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/namei.h> #include <linux/ncp_fs.h> @@ -58,11 +59,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ncp_destroy_inode(struct inode *inode) +static void ncp_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } +static void ncp_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ncp_i_callback); +} + static void init_once(void *foo) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; @@ -309,7 +317,12 @@ static void ncp_stop_tasks(struct ncp_server *server) { sk->sk_write_space = server->write_space; release_sock(sk); del_timer_sync(&server->timeout_tm); - flush_scheduled_work(); + + flush_work_sync(&server->rcv.tq); + if (sk->sk_socket->type == SOCK_STREAM) + flush_work_sync(&server->tx.tq); + else + flush_work_sync(&server->timeout_tq); } static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt) @@ -710,7 +723,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &ncp_root_dentry_operations; + d_set_d_op(sb->s_root, &ncp_root_dentry_operations); return 0; out_no_root: diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 3c57eca634ce..1220df75ff22 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *, const unsigned char *, unsigned int, int); #define NCP_ESC ':' -#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io) +#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io) #define ncp_tolower(t, c) nls_tolower(t, c) #define ncp_toupper(t, c) nls_toupper(t, c) #define ncp_strnicmp(t, s1, s2, len) \ @@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *, int ncp__vol2io(unsigned char *, unsigned int *, const unsigned char *, unsigned int, int); -#define NCP_IO_TABLE(dentry) NULL +#define NCP_IO_TABLE(sb) NULL #define ncp_tolower(t, c) tolower(c) #define ncp_toupper(t, c) toupper(c) #define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) #define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) -static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, - const unsigned char *s2, int len) +static inline int ncp_strnicmp(const struct nls_table *t, + const unsigned char *s1, const unsigned char *s2, int len) { while (len--) { if (tolower(*s1++) != tolower(*s2++)) @@ -193,7 +193,7 @@ ncp_renew_dentries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -205,7 +205,7 @@ ncp_renew_dentries(struct dentry *parent) next = next->next; } - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); } static inline void @@ -215,7 +215,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -223,7 +223,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) ncp_age_dentry(server, dentry); next = next->next; } - spin_unlock(&dcache_lock); + spin_unlock(&parent->d_lock); } struct ncp_cache_head { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 996dd8989a91..d33da530097a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); if (IS_ERR(inode)) goto out; @@ -938,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * component of the path. * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) +static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, + unsigned int mask) { if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) return 0; @@ -1018,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *dir; struct inode *inode; @@ -1027,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + parent = dget_parent(dentry); dir = parent->d_inode; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); @@ -1117,7 +1121,7 @@ out_error: /* * This is called from dput() when d_count is going to 0. */ -static int nfs_dentry_delete(struct dentry *dentry) +static int nfs_dentry_delete(const struct dentry *dentry) { dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -1188,7 +1192,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* * If we're doing an exclusive create, optimize away the lookup @@ -1333,7 +1337,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = ERR_PTR(-ENAMETOOLONG); goto out; } - dentry->d_op = NFS_PROTO(dir)->dentry_ops; + d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops); /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ @@ -1718,11 +1722,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count) > 1) { + if (dentry->d_count > 1) { spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); @@ -1733,7 +1735,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) need_rehash = 1; } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error || error == -ENOENT) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1868,7 +1869,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, - atomic_read(&new_dentry->d_count)); + new_dentry->d_count); /* * For non-directories, check whether the target is busy and if so, @@ -1886,7 +1887,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, rehash = new_dentry; } - if (atomic_read(&new_dentry->d_count) > 2) { + if (new_dentry->d_count > 2) { int err; /* copy the target dentry's name */ @@ -2188,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } -int nfs_permission(struct inode *inode, int mask) +int nfs_permission(struct inode *inode, int mask, unsigned int flags) { struct rpc_cred *cred; int res = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) @@ -2240,7 +2244,7 @@ out: out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) - res = generic_permission(inode, mask, NULL); + res = generic_permission(inode, mask, flags, NULL); goto out; } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index ac7b814ce162..5596c6a2881e 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -63,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ - spin_lock(&dcache_lock); + spin_lock(&sb->s_root->d_inode->i_lock); + spin_lock(&sb->s_root->d_lock); list_del_init(&sb->s_root->d_alias); - spin_unlock(&dcache_lock); + spin_unlock(&sb->s_root->d_lock); + spin_unlock(&sb->s_root->d_inode->i_lock); } return 0; } @@ -119,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fsinfo.fattr); return ret; @@ -226,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) security_d_instantiate(ret, inode); if (ret->d_op == NULL) - ret->d_op = server->nfs_client->rpc_ops->dentry_ops; + d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops); out: nfs_free_fattr(fattr); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e67e31c73416..017daa3bed38 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -void nfs_destroy_inode(struct inode *inode) +static void nfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } +void nfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nfs_i_callback); +} + static inline void nfs4_init_once(struct nfs_inode *nfsi) { #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index db6aa3673cf3..74aaf3963c10 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -49,12 +49,17 @@ char *nfs_path(const char *base, const struct dentry *dentry, char *buffer, ssize_t buflen) { - char *end = buffer+buflen; + char *end; int namelen; + unsigned seq; +rename_retry: + end = buffer+buflen; *--end = '\0'; buflen--; - spin_lock(&dcache_lock); + + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; @@ -65,7 +70,9 @@ char *nfs_path(const char *base, *--end = '/'; dentry = dentry->d_parent; } - spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; if (*end != '/') { if (--buflen < 0) goto Elong; @@ -82,7 +89,9 @@ char *nfs_path(const char *base, memcpy(end, base, namelen); return end; Elong_unlock: - spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; Elong: return ERR_PTR(-ENAMETOOLONG); } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7bdec8531400..8fe9eb47a97f 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - atomic_read(&dentry->d_count)); + dentry->d_count); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 116cab970e0f..fbd18c3074bb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4336,7 +4336,7 @@ __nfs4_state_shutdown(void) void nfs4_state_shutdown(void) { - cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); + cancel_delayed_work_sync(&laundromat_work); destroy_workqueue(laundry_wq); locks_end_grace(&nfsd4_manager); nfs4_lock_state(); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 184938fcff04..3a359023c9f7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out_dput_new; if (svc_msnfs(ffhp) && - ((atomic_read(&odentry->d_count) > 1) - || (atomic_read(&ndentry->d_count) > 1))) { + ((odentry->d_count > 1) || (ndentry->d_count > 1))) { host_err = -EPERM; goto out_dput_new; } @@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (type != S_IFDIR) { /* It's UNLINK */ #ifdef MSNFS if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (atomic_read(&rdentry->d_count) > 1)) { + (rdentry->d_count > 1)) { host_err = -EPERM; } else #endif diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 8b782b062baa..3ee67c67cc52 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -35,7 +35,20 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) { - return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); + return NILFS_I_NILFS(bmap->b_inode)->ns_dat; +} + +static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, + const char *fname, int err) +{ + struct inode *inode = bmap->b_inode; + + if (err == -EINVAL) { + nilfs_error(inode->i_sb, fname, + "broken bmap (inode number=%lu)\n", inode->i_ino); + err = -EIO; + } + return err; } /** @@ -66,8 +79,10 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); - if (ret < 0) + if (ret < 0) { + ret = nilfs_bmap_convert_error(bmap, __func__, ret); goto out; + } if (NILFS_BMAP_USE_VBN(bmap)) { ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, &blocknr); @@ -88,7 +103,8 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); up_read(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) @@ -144,7 +160,8 @@ int nilfs_bmap_insert(struct nilfs_bmap *bmap, down_write(&bmap->b_sem); ret = nilfs_bmap_do_insert(bmap, key, rec); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) @@ -180,9 +197,12 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) down_read(&bmap->b_sem); ret = bmap->b_ops->bop_last_key(bmap, &lastkey); - if (!ret) - *key = lastkey; up_read(&bmap->b_sem); + + if (ret < 0) + ret = nilfs_bmap_convert_error(bmap, __func__, ret); + else + *key = lastkey; return ret; } @@ -210,7 +230,8 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) down_write(&bmap->b_sem); ret = nilfs_bmap_do_delete(bmap, key); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) @@ -261,7 +282,8 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) down_write(&bmap->b_sem); ret = nilfs_bmap_do_truncate(bmap, key); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -300,7 +322,8 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) down_write(&bmap->b_sem); ret = bmap->b_ops->bop_propagate(bmap, bh); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -344,7 +367,8 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, down_write(&bmap->b_sem); ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** @@ -373,7 +397,8 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) down_write(&bmap->b_sem); ret = bmap->b_ops->bop_mark(bmap, key, level); up_write(&bmap->b_sem); - return ret; + + return nilfs_bmap_convert_error(bmap, __func__, ret); } /** diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 5115814cb745..388e9e8f5286 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -104,8 +104,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, if (pblocknr == 0) { pblocknr = blocknr; if (inode->i_ino != NILFS_DAT_INO) { - struct inode *dat = - nilfs_dat_inode(NILFS_I_NILFS(inode)); + struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; /* blocknr is a virtual block number */ err = nilfs_dat_translate(dat, blocknr, &pblocknr); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index cb003c8ee1f6..9d45773b79e6 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -91,7 +91,6 @@ static void nilfs_commit_chunk(struct page *page, unsigned from, unsigned to) { struct inode *dir = mapping->host; - struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); loff_t pos = page_offset(page) + from; unsigned len = to - from; unsigned nr_dirty, copied; @@ -103,7 +102,7 @@ static void nilfs_commit_chunk(struct page *page, i_size_write(dir, pos + copied); if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - err = nilfs_set_file_dirty(sbi, dir, nr_dirty); + err = nilfs_set_file_dirty(dir, nr_dirty); WARN_ON(err); /* do not happen */ unlock_page(page); } diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index c9a30d7ff6fc..2f560c9fb808 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -155,6 +155,7 @@ const struct inode_operations nilfs_file_inode_operations = { .truncate = nilfs_truncate, .setattr = nilfs_setattr, .permission = nilfs_permission, + .fiemap = nilfs_fiemap, }; /* end of file */ diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 33ad25ddd5c4..caf9a6a3fb54 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) int nilfs_init_gcinode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; inode->i_mode = S_IFREG; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); @@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode) ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - /* - * Add the inode to GC inode list. Garbage Collection - * is serialized and no two processes manipulate the - * list simultaneously. - */ - igrab(inode); - list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); - return 0; } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 9f8a2da67f90..bfc73d3a30ed 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -149,14 +149,9 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); - if (unlikely(err)) { - if (err == -EINVAL) - nilfs_error(sb, __func__, "ifile is broken"); - else - nilfs_warning(sb, __func__, - "unable to read inode: %lu", - (unsigned long) ino); - } + if (unlikely(err)) + nilfs_warning(sb, __func__, "unable to read inode: %lu", + (unsigned long) ino); return err; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 71d4bc8464e0..2fd440d8d6b8 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -58,7 +58,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct nilfs_inode_info *ii = NILFS_I(inode); __u64 blknum = 0; int err = 0, ret; - struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); + struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; down_read(&NILFS_MDT(dat)->mi_sem); @@ -96,11 +96,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, inode->i_ino, (unsigned long long)blkoff); err = 0; - } else if (err == -EINVAL) { - nilfs_error(inode->i_sb, __func__, - "broken bmap (inode=%lu)\n", - inode->i_ino); - err = -EIO; } nilfs_transaction_abort(inode->i_sb); goto out; @@ -109,6 +104,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); + set_buffer_delay(bh_result); map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed to proper value */ } else if (ret == -ENOENT) { @@ -185,10 +181,9 @@ static int nilfs_set_page_dirty(struct page *page) if (ret) { struct inode *inode = page->mapping->host; - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); - nilfs_set_file_dirty(sbi, inode, nr_dirty); + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } @@ -229,7 +224,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, start + copied); copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); + nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); return err ? : copied; } @@ -425,13 +420,12 @@ static int __nilfs_read_inode(struct super_block *sb, struct nilfs_root *root, unsigned long ino, struct inode *inode) { - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct inode *dat = nilfs_dat_inode(sbi->s_nilfs); + struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; struct buffer_head *bh; struct nilfs_inode *raw_inode; int err; - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); if (unlikely(err)) goto bad_inode; @@ -461,7 +455,7 @@ static int __nilfs_read_inode(struct super_block *sb, } nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); return 0; @@ -470,7 +464,7 @@ static int __nilfs_read_inode(struct super_block *sb, brelse(bh); bad_inode: - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); return err; } @@ -629,7 +623,7 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; - repeat: +repeat: ret = nilfs_bmap_last_key(ii->i_bmap, &b); if (ret == -ENOENT) return; @@ -646,14 +640,10 @@ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, nilfs_bmap_truncate(ii->i_bmap, b) == 0)) goto repeat; - failed: - if (ret == -EINVAL) - nilfs_error(ii->vfs_inode.i_sb, __func__, - "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); - else - nilfs_warning(ii->vfs_inode.i_sb, __func__, - "failed to truncate bmap (ino=%lu, err=%d)", - ii->vfs_inode.i_ino, ret); +failed: + nilfs_warning(ii->vfs_inode.i_sb, __func__, + "failed to truncate bmap (ino=%lu, err=%d)", + ii->vfs_inode.i_ino, ret); } void nilfs_truncate(struct inode *inode) @@ -682,7 +672,7 @@ void nilfs_truncate(struct inode *inode) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_mark_inode_dirty(inode); - nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); + nilfs_set_file_dirty(inode, 0); nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But truncate has no return value. */ @@ -785,20 +775,24 @@ out_err: return err; } -int nilfs_permission(struct inode *inode, int mask) +int nilfs_permission(struct inode *inode, int mask, unsigned int flags) { - struct nilfs_root *root = NILFS_I(inode)->i_root; + struct nilfs_root *root; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + root = NILFS_I(inode)->i_root; if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } -int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, - struct buffer_head **pbh) +int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) { + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); struct nilfs_inode_info *ii = NILFS_I(inode); int err; @@ -839,9 +833,9 @@ int nilfs_inode_dirty(struct inode *inode) return ret; } -int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, - unsigned nr_dirty) +int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) { + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); struct nilfs_inode_info *ii = NILFS_I(inode); atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); @@ -874,11 +868,10 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, int nilfs_mark_inode_dirty(struct inode *inode) { - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); struct buffer_head *ibh; int err; - err = nilfs_load_inode_block(sbi, inode, &ibh); + err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warning(inode->i_sb, __func__, "failed to reget inode block.\n"); @@ -920,3 +913,134 @@ void nilfs_dirty_inode(struct inode *inode) nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ } + +int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + struct the_nilfs *nilfs = NILFS_I_NILFS(inode); + __u64 logical = 0, phys = 0, size = 0; + __u32 flags = 0; + loff_t isize; + sector_t blkoff, end_blkoff; + sector_t delalloc_blkoff; + unsigned long delalloc_blklen; + unsigned int blkbits = inode->i_blkbits; + int ret, n; + + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); + + blkoff = start >> blkbits; + end_blkoff = (start + len - 1) >> blkbits; + + delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, + &delalloc_blkoff); + + do { + __u64 blkphy; + unsigned int maxblocks; + + if (delalloc_blklen && blkoff == delalloc_blkoff) { + if (size) { + /* End of the current extent */ + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, flags); + if (ret) + break; + } + if (blkoff > end_blkoff) + break; + + flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; + logical = blkoff << blkbits; + phys = 0; + size = delalloc_blklen << blkbits; + + blkoff = delalloc_blkoff + delalloc_blklen; + delalloc_blklen = nilfs_find_uncommitted_extent( + inode, blkoff, &delalloc_blkoff); + continue; + } + + /* + * Limit the number of blocks that we look up so as + * not to get into the next delayed allocation extent. + */ + maxblocks = INT_MAX; + if (delalloc_blklen) + maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, + maxblocks); + blkphy = 0; + + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + n = nilfs_bmap_lookup_contig( + NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + + if (n < 0) { + int past_eof; + + if (unlikely(n != -ENOENT)) + break; /* error */ + + /* HOLE */ + blkoff++; + past_eof = ((blkoff << blkbits) >= isize); + + if (size) { + /* End of the current extent */ + + if (past_eof) + flags |= FIEMAP_EXTENT_LAST; + + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, flags); + if (ret) + break; + size = 0; + } + if (blkoff > end_blkoff || past_eof) + break; + } else { + if (size) { + if (phys && blkphy << blkbits == phys + size) { + /* The current extent goes on */ + size += n << blkbits; + } else { + /* Terminate the current extent */ + ret = fiemap_fill_next_extent( + fieinfo, logical, phys, size, + flags); + if (ret || blkoff > end_blkoff) + break; + + /* Start another extent */ + flags = FIEMAP_EXTENT_MERGED; + logical = blkoff << blkbits; + phys = blkphy << blkbits; + size = n << blkbits; + } + } else { + /* Start a new extent */ + flags = FIEMAP_EXTENT_MERGED; + logical = blkoff << blkbits; + phys = blkphy << blkbits; + size = n << blkbits; + } + blkoff += n; + } + cond_resched(); + } while (true); + + /* If ret is 1 then we just hit the end of the extent array */ + if (ret == 1) + ret = 0; + + mutex_unlock(&inode->i_mutex); + return ret; +} diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index e00d9457c256..496738963fdb 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -233,7 +233,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs); + ret = nilfs_dat_get_vinfo(nilfs->ns_dat, buf, size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -242,8 +242,7 @@ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - struct inode *dat = nilfs_dat_inode(nilfs); - struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; int ret, i; @@ -337,6 +336,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; + struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -353,6 +353,17 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, ret = PTR_ERR(inode); goto failed; } + if (list_empty(&NILFS_I(inode)->i_dirty)) { + /* + * Add the inode to GC inode list. Garbage Collection + * is serialized and no two processes manipulate the + * list simultaneously. + */ + igrab(inode); + list_add(&NILFS_I(inode)->i_dirty, + &nilfs->ns_gc_inodes); + } + do { ret = nilfs_ioctl_move_inode_block(inode, vdesc, &buffers); @@ -409,7 +420,7 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, size_t nmembs = argv->v_nmembs; int ret; - ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs->ns_dat, buf, nmembs); return (ret < 0) ? ret : nmembs; } @@ -418,8 +429,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; - struct inode *dat = nilfs_dat_inode(nilfs); - struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bmap *bmap = NILFS_I(nilfs->ns_dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; int ret, i; @@ -438,7 +448,7 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, /* skip dead block */ continue; if (bdescs[i].bd_level == 0) { - ret = nilfs_mdt_mark_block_dirty(dat, + ret = nilfs_mdt_mark_block_dirty(nilfs->ns_dat, bdescs[i].bd_offset); if (ret < 0) { WARN_ON(ret == -ENOENT); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 39a5b84e2c9f..6a0e2a189f60 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -237,8 +237,6 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, * * %-ENOENT - the specified block does not exist (hole block) * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) - * * %-EROFS - Read only filesystem (for create mode) */ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, @@ -273,8 +271,6 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, * %-ENOMEM - Insufficient memory available. * * %-EIO - I/O error - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) { @@ -350,8 +346,6 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) * %-EIO - I/O error * * %-ENOENT - the specified block does not exist (hole block) - * - * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) */ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) { @@ -499,31 +493,29 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) struct buffer_head *bh_frozen; struct page *page; int blkbits = inode->i_blkbits; - int ret = -ENOMEM; page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); if (!page) - return ret; + return -ENOMEM; if (!page_has_buffers(page)) create_empty_buffers(page, 1 << blkbits, 0); bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); - if (bh_frozen) { - if (!buffer_uptodate(bh_frozen)) - nilfs_copy_buffer(bh_frozen, bh); - if (list_empty(&bh_frozen->b_assoc_buffers)) { - list_add_tail(&bh_frozen->b_assoc_buffers, - &shadow->frozen_buffers); - set_buffer_nilfs_redirected(bh); - } else { - brelse(bh_frozen); /* already frozen */ - } - ret = 0; + + if (!buffer_uptodate(bh_frozen)) + nilfs_copy_buffer(bh_frozen, bh); + if (list_empty(&bh_frozen->b_assoc_buffers)) { + list_add_tail(&bh_frozen->b_assoc_buffers, + &shadow->frozen_buffers); + set_buffer_nilfs_redirected(bh); + } else { + brelse(bh_frozen); /* already frozen */ } + unlock_page(page); page_cache_release(page); - return ret; + return 0; } struct buffer_head * diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 6e9557ecf161..98034271cd02 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -577,6 +577,7 @@ const struct inode_operations nilfs_dir_inode_operations = { .rename = nilfs_rename, .setattr = nilfs_setattr, .permission = nilfs_permission, + .fiemap = nilfs_fiemap, }; const struct inode_operations nilfs_special_inode_operations = { diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index f7560da5a567..777e8fd04304 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -190,11 +190,6 @@ static inline int nilfs_doing_construction(void) return nilfs_test_transaction_flag(NILFS_TI_WRITER); } -static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) -{ - return nilfs->ns_dat; -} - /* * function prototype */ @@ -256,14 +251,14 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); -int nilfs_permission(struct inode *inode, int mask); -extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, - struct buffer_head **); +int nilfs_permission(struct inode *inode, int mask, unsigned int flags); +int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); -extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, - unsigned); +int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); extern int nilfs_mark_inode_dirty(struct inode *); extern void nilfs_dirty_inode(struct inode *); +int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len); /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index a6c3c2e817f8..0c432416cfef 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -491,7 +491,7 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, } return nc; } - + void nilfs_mapping_init_once(struct address_space *mapping) { memset(mapping, 0, sizeof(*mapping)); @@ -546,3 +546,87 @@ int __nilfs_clear_page_dirty(struct page *page) } return TestClearPageDirty(page); } + +/** + * nilfs_find_uncommitted_extent - find extent of uncommitted data + * @inode: inode + * @start_blk: start block offset (in) + * @blkoff: start offset of the found extent (out) + * + * This function searches an extent of buffers marked "delayed" which + * starts from a block offset equal to or larger than @start_blk. If + * such an extent was found, this will store the start offset in + * @blkoff and return its length in blocks. Otherwise, zero is + * returned. + */ +unsigned long nilfs_find_uncommitted_extent(struct inode *inode, + sector_t start_blk, + sector_t *blkoff) +{ + unsigned int i; + pgoff_t index; + unsigned int nblocks_in_page; + unsigned long length = 0; + sector_t b; + struct pagevec pvec; + struct page *page; + + if (inode->i_mapping->nrpages == 0) + return 0; + + index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + pagevec_init(&pvec, 0); + +repeat: + pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, + pvec.pages); + if (pvec.nr == 0) + return length; + + if (length > 0 && pvec.pages[0]->index > index) + goto out; + + b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + i = 0; + do { + page = pvec.pages[i]; + + lock_page(page); + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + if (b < start_blk) + continue; + if (buffer_delay(bh)) { + if (length == 0) + *blkoff = b; + length++; + } else if (length > 0) { + goto out_locked; + } + } while (++b, bh = bh->b_this_page, bh != head); + } else { + if (length > 0) + goto out_locked; + + b += nblocks_in_page; + } + unlock_page(page); + + } while (++i < pagevec_count(&pvec)); + + index = page->index + 1; + pagevec_release(&pvec); + cond_resched(); + goto repeat; + +out_locked: + unlock_page(page); +out: + pagevec_release(&pvec); + return length; +} diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index fb9e8a8a2038..622df27cd891 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -66,6 +66,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct backing_dev_info *bdi, const struct address_space_operations *aops); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); +unsigned long nilfs_find_uncommitted_extent(struct inode *inode, + sector_t start_blk, + sector_t *blkoff); #define NILFS_PAGE_BUG(page, m, a...) \ do { nilfs_page_bug(page); BUG(); } while (0) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 5d2711c28da7..3dfcd3b7d389 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -535,7 +535,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, if (unlikely(err)) goto failed_page; - err = nilfs_set_file_dirty(sbi, inode, 1); + err = nilfs_set_file_dirty(inode, 1); if (unlikely(err)) goto failed_page; diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index 35a07157b980..7a17715f215f 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h @@ -27,14 +27,6 @@ #include <linux/types.h> #include <linux/fs.h> -/* - * Mount options - */ -struct nilfs_mount_options { - unsigned long mount_opt; - __u64 snapshot_cno; -}; - struct the_nilfs; struct nilfs_sc_info; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 687d090cea34..55ebae5c7f39 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -504,17 +504,6 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, return err; } -static int nilfs_handle_bmap_error(int err, const char *fname, - struct inode *inode, struct super_block *sb) -{ - if (err == -EINVAL) { - nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", - inode->i_ino); - err = -EIO; - } - return err; -} - /* * Callback functions that enumerate, mark, and collect dirty blocks */ @@ -524,9 +513,8 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci, int err; err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); + if (err < 0) + return err; err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_v)); @@ -539,13 +527,7 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - int err; - - err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); - return 0; + return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); } static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, @@ -588,9 +570,8 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, int err; err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); - if (unlikely(err < 0)) - return nilfs_handle_bmap_error(err, __func__, inode, - sci->sc_super); + if (err < 0) + return err; err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); if (!err) @@ -776,9 +757,8 @@ static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, ret++; if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) ret++; - if (ret || nilfs_doing_gc()) - if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) - ret++; + if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat)) + ret++; return ret; } @@ -814,7 +794,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) nilfs_mdt_clear_dirty(sci->sc_root->ifile); nilfs_mdt_clear_dirty(nilfs->ns_cpfile); nilfs_mdt_clear_dirty(nilfs->ns_sufile); - nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); + nilfs_mdt_clear_dirty(nilfs->ns_dat); } static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) @@ -923,7 +903,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + + nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + NILFS_SR_DAT_OFFSET(isz), 1); nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + NILFS_SR_CPFILE_OFFSET(isz), 1); @@ -1179,7 +1159,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.scnt++; /* Fall through */ case NILFS_ST_DAT: dat_stage: - err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), + err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, &nilfs_sc_dat_ops); if (unlikely(err)) break; @@ -1563,7 +1543,6 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, return 0; failed_bmap: - err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); return err; } @@ -1783,6 +1762,7 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err) if (!err) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); } brelse(bh); /* for b_assoc_buffers */ @@ -1909,6 +1889,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_redirected(bh); if (bh == segbuf->sb_super_root) { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f804d41ec9d3..70dfdd532b83 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -47,7 +47,6 @@ #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> -#include <linux/kobject.h> #include <linux/seq_file.h> #include <linux/mount.h> #include "nilfs.h" @@ -111,12 +110,17 @@ void nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); if (!(sb->s_flags & MS_RDONLY)) { @@ -136,13 +140,17 @@ void nilfs_error(struct super_block *sb, const char *function, void nilfs_warning(struct super_block *sb, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_WARNING "NILFS warning (device %s): %s: ", - sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); } @@ -162,10 +170,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return &ii->vfs_inode; } -void nilfs_destroy_inode(struct inode *inode) +static void nilfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + INIT_LIST_HEAD(&inode->i_dentry); + if (mdi) { kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); @@ -173,6 +184,11 @@ void nilfs_destroy_inode(struct inode *inode) kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } +void nilfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, nilfs_i_callback); +} + static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; @@ -838,7 +854,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, static int nilfs_tree_was_touched(struct dentry *root_dentry) { - return atomic_read(&root_dentry->d_count) > 1; + return root_dentry->d_count > 1; } /** @@ -1002,11 +1018,11 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; unsigned long old_sb_flags; - struct nilfs_mount_options old_opts; + unsigned long old_mount_opt; int err; old_sb_flags = sb->s_flags; - old_opts.mount_opt = sbi->s_mount_opt; + old_mount_opt = sbi->s_mount_opt; if (!parse_options(data, sb, 1)) { err = -EINVAL; @@ -1075,7 +1091,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) restore_opts: sb->s_flags = old_sb_flags; - sbi->s_mount_opt = old_opts.mount_opt; + sbi->s_mount_opt = old_mount_opt; return err; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 0254be2d73c6..ad4ac607cf57 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -329,7 +329,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) printk(KERN_INFO "NILFS: recovery complete.\n"); skip_recovery: - set_nilfs_loaded(nilfs); nilfs_clear_recovery_info(&ri); sbi->s_super->s_flags = s_flags; return 0; @@ -651,12 +650,11 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { - struct inode *dat = nilfs_dat_inode(nilfs); unsigned long ncleansegs; - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; return 0; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 69226e14b745..fd85e4c05c6b 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -36,8 +36,6 @@ /* the_nilfs struct */ enum { THE_NILFS_INIT = 0, /* Information from super_block is set */ - THE_NILFS_LOADED, /* Roll-back/roll-forward has done and - the latest checkpoint was loaded */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ THE_NILFS_GC_RUNNING, /* gc process is running */ THE_NILFS_SB_DIRTY, /* super block is dirty */ @@ -178,7 +176,6 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \ } THE_NILFS_FNS(INIT, init) -THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index b04f88eed09e..f35794b97e8e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response); + wait_event(group->fanotify_data.access_waitq, event->response || + atomic_read(&group->fanotify_data.bypass_perm)); + + if (!event->response) /* bypass_perm set */ + return 0; /* userspace responded, convert to something usable */ spin_lock(&event->lock); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 063224812b7e..8b61220cffc5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -106,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) return client_fd; } -static ssize_t fill_event_metadata(struct fsnotify_group *group, +static int fill_event_metadata(struct fsnotify_group *group, struct fanotify_event_metadata *metadata, struct fsnotify_event *event) { + int ret = 0; + pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group, metadata, event); metadata->event_len = FAN_EVENT_METADATA_LEN; + metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); - metadata->fd = create_fd(group, event); + if (unlikely(event->mask & FAN_Q_OVERFLOW)) + metadata->fd = FAN_NOFD; + else { + metadata->fd = create_fd(group, event); + if (metadata->fd < 0) + ret = metadata->fd; + } - return metadata->fd; + return ret; } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS @@ -200,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, mutex_lock(&group->fanotify_data.access_mutex); - if (group->fanotify_data.bypass_perm) { + if (atomic_read(&group->fanotify_data.bypass_perm)) { mutex_unlock(&group->fanotify_data.access_mutex); kmem_cache_free(fanotify_response_event_cache, re); event->response = FAN_ALLOW; @@ -257,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - fd = fill_event_metadata(group, &fanotify_event_metadata, event); - if (fd < 0) - return fd; + ret = fill_event_metadata(group, &fanotify_event_metadata, event); + if (ret < 0) + goto out; + fd = fanotify_event_metadata.fd; ret = prepare_for_access_response(group, event, fd); if (ret) goto out_close_fd; ret = -EFAULT; - if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) + if (copy_to_user(buf, &fanotify_event_metadata, + fanotify_event_metadata.event_len)) goto out_kill_access_response; - return FAN_EVENT_METADATA_LEN; + return fanotify_event_metadata.event_len; out_kill_access_response: remove_access_response(group, event, fd); out_close_fd: - sys_close(fd); + if (fd != FAN_NOFD) + sys_close(fd); +out: +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (event->mask & FAN_ALL_PERM_EVENTS) { + event->response = FAN_DENY; + wake_up(&group->fanotify_data.access_waitq); + } +#endif return ret; } @@ -382,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) mutex_lock(&group->fanotify_data.access_mutex); - group->fanotify_data.bypass_perm = true; + atomic_inc(&group->fanotify_data.bypass_perm); list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, @@ -586,11 +605,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark; __u32 added; + int ret = 0; fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); if (!fsn_mark) { - int ret; - if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) return -ENOSPC; @@ -600,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, fsnotify_init_mark(fsn_mark, fanotify_free_mark); ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); - if (ret) { - fanotify_free_mark(fsn_mark); - return ret; - } + if (ret) + goto err; } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - fsnotify_put_mark(fsn_mark); + if (added & ~mnt->mnt_fsnotify_mask) fsnotify_recalc_vfsmount_mask(mnt); - - return 0; +err: + fsnotify_put_mark(fsn_mark); + return ret; } static int fanotify_add_inode_mark(struct fsnotify_group *group, @@ -619,6 +636,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark; __u32 added; + int ret = 0; pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -634,8 +652,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) { - int ret; - if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) return -ENOSPC; @@ -645,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, fsnotify_init_mark(fsn_mark, fanotify_free_mark); ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); - if (ret) { - fanotify_free_mark(fsn_mark); - return ret; - } + if (ret) + goto err; } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - fsnotify_put_mark(fsn_mark); + if (added & ~inode->i_fsnotify_mask) fsnotify_recalc_inode_mask(inode); - return 0; +err: + fsnotify_put_mark(fsn_mark); + return ret; } /* fanotify syscalls */ @@ -687,8 +703,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ group = fsnotify_alloc_group(&fanotify_fsnotify_ops); - if (IS_ERR(group)) + if (IS_ERR(group)) { + free_uid(user); return PTR_ERR(group); + } group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); @@ -698,6 +716,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) mutex_init(&group->fanotify_data.access_mutex); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); + atomic_set(&group->fanotify_data.bypass_perm, 0); #endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: @@ -764,8 +783,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, if (flags & ~FAN_ALL_MARK_FLAGS) return -EINVAL; switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { - case FAN_MARK_ADD: + case FAN_MARK_ADD: /* fallthrough */ case FAN_MARK_REMOVE: + if (!mask) + return -EINVAL; case FAN_MARK_FLUSH: break; default: diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 20dc218707ca..79b47cbb5cd8 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* determine if the children should tell inode about their events */ watched = fsnotify_inode_watches_children(inode); - spin_lock(&dcache_lock); + spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ list_for_each_entry(alias, &inode->i_dentry, d_alias) { @@ -68,19 +68,21 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ + spin_lock(&alias->d_lock); list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { if (!child->d_inode) continue; - spin_lock(&child->d_lock); + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (watched) child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } + spin_unlock(&alias->d_lock); } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); } /* Notify this dentry's parent about a child's events. */ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 444c305a468c..4cd5d5d78f9f 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) if (ret >= 0) return ret; + fsnotify_put_group(group); atomic_dec(&user->inotify_devs); out_free_uid: free_uid(user); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 93622b175fc7..a627ed82c0a3 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) return NULL; } +static void ntfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); +} + void ntfs_destroy_big_inode(struct inode *inode) { ntfs_inode *ni = NTFS_I(inode); @@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode) BUG_ON(ni->page); if (!atomic_dec_and_test(&ni->count)) BUG(); - kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); + call_rcu(&inode->i_rcu, ntfs_i_callback); } static inline ntfs_inode *ntfs_alloc_extent_inode(void) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 391915093fe1..704f6b1742f3 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } -int ocfs2_check_acl(struct inode *inode, int mask) +int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; int ret = -EAGAIN; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return ret; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 5c5d31f05853..4fe7c9cf4bfb 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -26,7 +26,7 @@ struct ocfs2_acl_entry { __le32 e_id; }; -extern int ocfs2_check_acl(struct inode *, int); +extern int ocfs2_check_acl(struct inode *, int, unsigned int); extern int ocfs2_acl_chmod(struct inode *); extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, struct buffer_head *, struct buffer_head *, diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d55a10e2f300..1fbb0e20131b 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); + if (ocfs2_iocb_is_sem_locked(iocb)) { + up_read(&inode->i_alloc_sem); + ocfs2_iocb_clear_sem_locked(iocb); + } + ocfs2_iocb_clear_rw_locked(iocb); level = ocfs2_iocb_rw_locked_level(iocb); - if (!level) - up_read(&inode->i_alloc_sem); ocfs2_rw_unlock(inode, level); if (is_async) diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 76bfdfda691a..eceb456037c1 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) else clear_bit(1, (unsigned long *)&iocb->private); } + +/* + * Using a named enum representing lock types in terms of #N bit stored in + * iocb->private, which is going to be used for communication bewteen + * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). + */ +enum ocfs2_iocb_lock_bits { + OCFS2_IOCB_RW_LOCK = 0, + OCFS2_IOCB_RW_LOCK_LEVEL, + OCFS2_IOCB_SEM, + OCFS2_IOCB_NUM_LOCKS +}; + #define ocfs2_iocb_clear_rw_locked(iocb) \ - clear_bit(0, (unsigned long *)&iocb->private) + clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private) #define ocfs2_iocb_rw_locked_level(iocb) \ - test_bit(1, (unsigned long *)&iocb->private) + test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private) +#define ocfs2_iocb_set_sem_locked(iocb) \ + set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) +#define ocfs2_iocb_clear_sem_locked(iocb) \ + clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) +#define ocfs2_iocb_is_sem_locked(iocb) \ + test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) #endif /* OCFS2_FILE_H */ diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 8b50c1ad7a69..a6cc05302e9f 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -340,8 +340,7 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) static void o2hb_disarm_write_timeout(struct o2hb_region *reg) { - cancel_delayed_work(®->hr_write_timeout_work); - flush_scheduled_work(); + cancel_delayed_work_sync(®->hr_write_timeout_work); } static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index c7fba396392d..6c61771469af 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { define_mask(QUOTA), define_mask(REFCOUNT), define_mask(BASTS), + define_mask(RESERVATIONS), + define_mask(CLUSTER), define_mask(ERROR), define_mask(NOTICE), define_mask(KTHREAD), - define_mask(RESERVATIONS), }; static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index ea2ed9f56c94..34d6544357d9 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -81,7 +81,7 @@ #include <linux/sched.h> /* bits that are frequently given and infrequently matched in the low word */ -/* NOTE: If you add a flag, you need to also update mlog.c! */ +/* NOTE: If you add a flag, you need to also update masklog.c! */ #define ML_ENTRY 0x0000000000000001ULL /* func call entry */ #define ML_EXIT 0x0000000000000002ULL /* func call exit */ #define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ @@ -114,13 +114,14 @@ #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ -#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ +#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */ +#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */ +#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */ + /* bits that are infrequently given and frequently matched in the high word */ -#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ -#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ -#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ -#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ -#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */ +#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */ +#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */ +#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */ #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index cf3e16696216..a87366750f23 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -325,5 +325,7 @@ void o2quo_init(void) void o2quo_exit(void) { - flush_scheduled_work(); + struct o2quo_state *qs = &o2quo_state; + + flush_work_sync(&qs->qs_work); } diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 895532ac4d98..6d80ecc7834f 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) static int ocfs2_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; + struct inode *inode; int ret = 0; /* if all else fails, just return false */ - struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + struct ocfs2_super *osb; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + osb = OCFS2_SB(dentry->d_sb); mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -169,23 +175,25 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct list_head *p; struct dentry *dentry = NULL; - spin_lock(&dcache_lock); - + spin_lock(&inode->i_lock); list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); + spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { mlog(0, "dentry found: %.*s\n", dentry->d_name.len, dentry->d_name.name); - dget_locked(dentry); + dget_dlock(dentry); + spin_unlock(&dentry->d_lock); break; } + spin_unlock(&dentry->d_lock); dentry = NULL; } - spin_unlock(&dcache_lock); + spin_unlock(&inode->i_lock); return dentry; } diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c49f6de0e7ab..d417b3f9b0c7 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, di->i_dx_root = cpu_to_le64(dr_blkno); + spin_lock(&OCFS2_I(dir)->ip_lock); OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); + spin_unlock(&OCFS2_I(dir)->ip_lock); ocfs2_journal_dirty(handle, di_bh); @@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir, goto out_commit; } + spin_lock(&OCFS2_I(dir)->ip_lock); OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); + spin_unlock(&OCFS2_I(dir)->ip_lock); di->i_dx_root = cpu_to_le64(0ULL); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f564b0e5f80d..59f0f6bdfc62 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) */ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, - int *numlocks) + int *numlocks, + int *hasrefs) { int ret; int i; @@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, assert_spin_locked(&res->spinlock); + *numlocks = 0; + *hasrefs = 0; + ret = -EINVAL; if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { mlog(0, "cannot migrate lockres with unknown owner!\n"); @@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, } *numlocks = count; - mlog(0, "migrateable lockres having %d locks\n", *numlocks); + + count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + if (count < O2NM_MAX_NODES) + *hasrefs = 1; + + mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, + res->lockname.len, res->lockname.name, *numlocks, *hasrefs); leave: return ret; @@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, const char *name; unsigned int namelen; int mle_added = 0; - int numlocks; + int numlocks, hasrefs; int wake = 0; if (!dlm_grab(dlm)) @@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, name = res->lockname.name; namelen = res->lockname.len; - mlog(0, "migrating %.*s to %u\n", namelen, name, target); + mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); /* * ensure this lockres is a proper candidate for migration */ spin_lock(&res->spinlock); - ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); + ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); if (ret < 0) { spin_unlock(&res->spinlock); goto leave; @@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, spin_unlock(&res->spinlock); /* no work to do */ - if (numlocks == 0) { - mlog(0, "no locks were found on this lockres! done!\n"); + if (numlocks == 0 && !hasrefs) goto leave; - } /* * preallocate up front @@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, * find a node to migrate the lockres to */ - mlog(0, "picking a migration node\n"); spin_lock(&dlm->spinlock); /* pick a new node */ if (!test_bit(target, dlm->domain_map) || target >= O2NM_MAX_NODES) { target = dlm_pick_migration_target(dlm, res); } - mlog(0, "node %u chosen for migration\n", target); + mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, + namelen, name, target); if (target >= O2NM_MAX_NODES || !test_bit(target, dlm->domain_map)) { @@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int ret; int lock_dropped = 0; - int numlocks; + int numlocks, hasrefs; spin_lock(&res->spinlock); if (res->owner != dlm->node_num) { @@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) } /* No need to migrate a lockres having no locks */ - ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); - if (ret >= 0 && numlocks == 0) { + ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); + if (ret >= 0 && numlocks == 0 && !hasrefs) { spin_unlock(&res->spinlock); goto leave; } @@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, } queue++; } + + nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + if (nodenum < O2NM_MAX_NODES) { + spin_unlock(&res->spinlock); + return nodenum; + } spin_unlock(&res->spinlock); mlog(0, "have not found a suitable target yet! checking domain map\n"); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b2df490a19ed..8c5c0eddc365 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb) return &ip->ip_vfs_inode; } -static void dlmfs_destroy_inode(struct inode *inode) +static void dlmfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } +static void dlmfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, dlmfs_i_callback); +} + static void dlmfs_evict_inode(struct inode *inode) { int status; diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 19ad145d2af3..6adafa576065 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -138,7 +138,7 @@ check_gen: result = d_obtain_alias(inode); if (!IS_ERR(result)) - result->d_op = &ocfs2_dentry_ops; + d_set_d_op(result, &ocfs2_dentry_ops); else mlog_errno(PTR_ERR(result)); @@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); if (!IS_ERR(parent)) - parent->d_op = &ocfs2_dentry_ops; + d_set_d_op(parent, &ocfs2_dentry_ops); bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 77b4c04a2809..bdadbae09094 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1307,10 +1307,13 @@ bail: return err; } -int ocfs2_permission(struct inode *inode, int mask) +int ocfs2_permission(struct inode *inode, int mask, unsigned int flags) { int ret; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + mlog_entry_void(); ret = ocfs2_inode_lock(inode, NULL, 0); @@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask) goto out; } - ret = generic_permission(inode, mask, ocfs2_check_acl); + ret = generic_permission(inode, mask, flags, ocfs2_check_acl); ocfs2_inode_unlock(inode, 0); out: @@ -2241,11 +2244,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, mutex_lock(&inode->i_mutex); + ocfs2_iocb_clear_sem_locked(iocb); + relock: /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ if (direct_io) { down_read(&inode->i_alloc_sem); have_alloc_sem = 1; + /* communicate with ocfs2_dio_end_io */ + ocfs2_iocb_set_sem_locked(iocb); } /* @@ -2382,8 +2389,10 @@ out: ocfs2_rw_unlock(inode, rw_level); out_sems: - if (have_alloc_sem) + if (have_alloc_sem) { up_read(&inode->i_alloc_sem); + ocfs2_iocb_clear_sem_locked(iocb); + } mutex_unlock(&inode->i_mutex); @@ -2527,6 +2536,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, goto bail; } + ocfs2_iocb_clear_sem_locked(iocb); + /* * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. @@ -2534,6 +2545,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, if (filp->f_flags & O_DIRECT) { down_read(&inode->i_alloc_sem); have_alloc_sem = 1; + ocfs2_iocb_set_sem_locked(iocb); ret = ocfs2_rw_lock(inode, 0); if (ret < 0) { @@ -2575,8 +2587,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, } bail: - if (have_alloc_sem) + if (have_alloc_sem) { up_read(&inode->i_alloc_sem); + ocfs2_iocb_clear_sem_locked(iocb); + } if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); mlog_exit(ret); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 97bf761c9e7c..f5afbbef6703 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -int ocfs2_permission(struct inode *inode, int mask); +int ocfs2_permission(struct inode *inode, int mask, unsigned int flags); int ocfs2_should_update_atime(struct inode *inode, struct vfsmount *vfsmnt); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ca35f81a13bb..30c523144452 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&oi->ip_lock); bail_add: - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); ret = d_splice_alias(inode, dentry); if (inode) { @@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, @@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry, } ihold(inode); - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); out_commit: @@ -1797,7 +1797,7 @@ static int ocfs2_symlink(struct inode *dir, mlog_errno(status); goto bail; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, @@ -2462,7 +2462,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto out_commit; } - dentry->d_op = &ocfs2_dentry_ops; + d_set_d_op(dentry, &ocfs2_dentry_ops); d_instantiate(dentry, inode); status = 0; out_commit: diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index c2e4f8222e2f..bf2e7764920e 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -350,7 +350,7 @@ enum { #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE NUM_SYSTEM_INODES }; -#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE +#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE #define NUM_LOCAL_SYSTEM_INODES \ (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index cfeab7ce3697..17ff46fa8a10 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } -static void ocfs2_destroy_inode(struct inode *inode) +static void ocfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); } +static void ocfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ocfs2_i_callback); +} + static unsigned long long ocfs2_max_file_offset(unsigned int bbits, unsigned int cbits) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 911e61f348fc..a2a5bff774e3 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb) return &oi->vfs_inode; } -static void openprom_destroy_inode(struct inode *inode) +static void openprom_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(op_inode_cachep, OP_I(inode)); } +static void openprom_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, openprom_i_callback); +} + static struct inode *openprom_iget(struct super_block *sb, ino_t ino) { struct inode *inode; diff --git a/fs/pipe.c b/fs/pipe.c index 04629f36e397..68f1f8e4e23b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -999,12 +999,12 @@ struct file *create_write_pipe(int flags) goto err; err = -ENOMEM; - path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); - path.dentry->d_op = &pipefs_dentry_operations; + d_set_d_op(path.dentry, &pipefs_dentry_operations); d_instantiate(path.dentry, inode); err = -ENFILE; @@ -1253,6 +1253,10 @@ out: return ret; } +static const struct super_operations pipefs_ops = { + .destroy_inode = free_inode_nonrcu, +}; + /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need @@ -1262,7 +1266,7 @@ out: static struct dentry *pipefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); + return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { @@ -1288,7 +1292,7 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { unregister_filesystem(&pipe_fs_type); - mntput(pipe_mnt); + mntput_long(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/pnode.c b/fs/pnode.c index 8066b8dd748f..d42514e32380 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -288,7 +288,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; return (mycount > count); } @@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. * - * vfsmount lock must be held for read or write + * vfsmount lock must be held for write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 2758e2afc518..288a49e098bf 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -15,6 +15,7 @@ proc-y += devices.o proc-y += interrupts.o proc-y += loadavg.o proc-y += meminfo.o +proc-y += proc_console.o proc-y += stat.o proc-y += uptime.o proc-y += version.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 182845147fe4..b20962c71a52 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = { #endif +#ifdef CONFIG_SCHED_AUTOGROUP +/* + * Print out autogroup related information: + */ +static int sched_autogroup_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_autogroup_show_task(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_autogroup_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct task_struct *p; + char buffer[PROC_NUMBUF]; + long nice; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + err = strict_strtol(strstrip(buffer), 0, &nice); + if (err) + return -EINVAL; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = nice; + err = proc_sched_autogroup_set_nice(p, &err); + if (err) + count = err; + + put_task_struct(p); + + return count; +} + +static int sched_autogroup_open(struct inode *inode, struct file *filp) +{ + int ret; + + ret = single_open(filp, sched_autogroup_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_pid_sched_autogroup_operations = { + .open = sched_autogroup_open, + .read = seq_read, + .write = sched_autogroup_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SCHED_AUTOGROUP */ + static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { @@ -1719,10 +1795,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat */ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); + struct inode *inode; + struct task_struct *task; const struct cred *cred; + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { @@ -1744,7 +1826,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } -static int pid_delete_dentry(struct dentry * dentry) +static int pid_delete_dentry(const struct dentry * dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, @@ -1888,12 +1970,19 @@ static int proc_fd_link(struct inode *inode, struct path *path) static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); - int fd = proc_fd(inode); + struct inode *inode; + struct task_struct *task; + int fd; struct files_struct *files; const struct cred *cred; + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + fd = proc_fd(inode); + if (task) { files = get_files_struct(task); if (files) { @@ -1969,7 +2058,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2101,11 +2190,13 @@ static const struct file_operations proc_fd_operations = { * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -static int proc_fd_permission(struct inode *inode, int mask) +static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) { int rv; - rv = generic_permission(inode, mask, NULL); + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + rv = generic_permission(inode, mask, flags, NULL); if (rv == 0) return 0; if (task_pid(current) == proc_pid(inode)) @@ -2137,7 +2228,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, NULL)) @@ -2196,7 +2287,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) @@ -2563,8 +2654,14 @@ static const struct pid_entry proc_base_stuff[] = { */ static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); + struct inode *inode; + struct task_struct *task; + + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); if (task) { put_task_struct(task); return 1; @@ -2615,7 +2712,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; + d_set_d_op(dentry, &proc_base_dentry_operations); d_add(dentry, inode); error = NULL; out: @@ -2733,6 +2830,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif +#ifdef CONFIG_SCHED_AUTOGROUP + REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), +#endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK INF("syscall", S_IRUSR, proc_pid_syscall), @@ -2926,7 +3026,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -3169,7 +3269,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index dd29f0337661..f766be29d2c7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = { * smarter: we could keep a "volatile" flag in the * inode to indicate which ones to keep. */ -static int proc_delete_dentry(struct dentry * dentry) +static int proc_delete_dentry(const struct dentry * dentry) { return 1; } @@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, out_unlock: if (inode) { - dentry->d_op = &proc_dentry_operations; + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3ddb6068177c..6bcb926b101b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb) return inode; } -static void proc_destroy_inode(struct inode *inode) +static void proc_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } +static void proc_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, proc_i_callback); +} + static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/proc/proc_console.c b/fs/proc/proc_console.c new file mode 100644 index 000000000000..8a707609f528 --- /dev/null +++ b/fs/proc/proc_console.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2010 Werner Fink, Jiri Slaby + * + * Licensed under GPLv2 + */ + +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/tty_driver.h> + +/* + * This is handler for /proc/consoles + */ +static int show_console_dev(struct seq_file *m, void *v) +{ + static const struct { + short flag; + char name; + } con_flags[] = { + { CON_ENABLED, 'E' }, + { CON_CONSDEV, 'C' }, + { CON_BOOT, 'B' }, + { CON_PRINTBUFFER, 'p' }, + { CON_BRL, 'b' }, + { CON_ANYTIME, 'a' }, + }; + char flags[ARRAY_SIZE(con_flags) + 1]; + struct console *con = v; + unsigned int a; + int len; + dev_t dev = 0; + + if (con->device) { + const struct tty_driver *driver; + int index; + driver = con->device(con, &index); + if (driver) { + dev = MKDEV(driver->major, driver->minor_start); + dev += index; + } + } + + for (a = 0; a < ARRAY_SIZE(con_flags); a++) + flags[a] = (con->flags & con_flags[a].flag) ? + con_flags[a].name : ' '; + flags[a] = 0; + + seq_printf(m, "%s%d%n", con->name, con->index, &len); + len = 21 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-', + con->write ? 'W' : '-', con->unblank ? 'U' : '-', + flags); + if (dev) + seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); + + seq_printf(m, "\n"); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + struct console *con; + loff_t off = 0; + + acquire_console_sem(); + for_each_console(con) + if (off++ == *pos) + break; + + return con; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct console *con = v; + ++*pos; + return con->next; +} + +static void c_stop(struct seq_file *m, void *v) +{ + release_console_sem(); +} + +static const struct seq_operations consoles_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_console_dev +}; + +static int consoles_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &consoles_op); +} + +static const struct file_operations proc_consoles_operations = { + .open = consoles_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int register_proc_consoles(void) +{ + proc_create("consoles", 0, NULL, &proc_consoles_operations); + return 0; +} +module_init(register_proc_consoles); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index b652cb00906b..09a1f92a34ef 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -5,6 +5,7 @@ #include <linux/sysctl.h> #include <linux/proc_fs.h> #include <linux/security.h> +#include <linux/namei.h> #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -120,7 +121,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; err = NULL; - dentry->d_op = &proc_sys_dentry_operations; + d_set_d_op(dentry, &proc_sys_dentry_operations); d_add(dentry, inode); out: @@ -201,7 +202,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, dput(child); return -ENOMEM; } else { - child->d_op = &proc_sys_dentry_operations; + d_set_d_op(child, &proc_sys_dentry_operations); d_add(child, inode); } } else { @@ -294,7 +295,7 @@ out: return ret; } -static int proc_sys_permission(struct inode *inode, int mask) +static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) { /* * sysctl entries that are not writeable, @@ -304,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask) struct ctl_table *table; int error; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + /* Executable files are not allowed under /proc/sys/ */ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) return -EACCES; @@ -389,23 +393,30 @@ static const struct inode_operations proc_sys_dir_operations = { static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; return !PROC_I(dentry->d_inode)->sysctl->unregistering; } -static int proc_sys_delete(struct dentry *dentry) +static int proc_sys_delete(const struct dentry *dentry) { return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } -static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, - struct qstr *name) +static int proc_sys_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct dentry *dentry = container_of(qstr, struct dentry, d_name); - if (qstr->len != name->len) + /* Although proc doesn't have negative dentries, rcu-walk means + * that inode here can be NULL */ + if (!inode) + return 0; + if (name->len != len) return 1; - if (memcmp(qstr->name, name->name, name->len)) + if (memcmp(name->name, str, len)) return 1; - return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); + return !sysctl_is_seen(PROC_I(inode)->sysctl); } static const struct dentry_operations proc_sys_dentry_operations = { diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 2367fb3f70bc..74802bc5ded9 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void) /* Do some basic Verification. */ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || (ehdr.e_type != ET_CORE) || - !vmcore_elf_check_arch(&ehdr) || + !vmcore_elf64_check_arch(&ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64 || ehdr.e_ident[EI_VERSION] != EV_CURRENT || ehdr.e_version != EV_CURRENT || diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fcada42f1aa3..e63b4171d583 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void qnx4_destroy_inode(struct inode *inode) +static void qnx4_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } +static void qnx4_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, qnx4_i_callback); +} + static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b243117b8752..2575682a9ead 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void reiserfs_destroy_inode(struct inode *inode) +static void reiserfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } +static void reiserfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, reiserfs_i_callback); +} + static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5d04a7828e7a..3cfb2e933644 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -870,11 +870,14 @@ out: return err; } -static int reiserfs_check_acl(struct inode *inode, int mask) +static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags) { struct posix_acl *acl; int error = -EAGAIN; /* do regular unix permission checks by default */ + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); if (acl) { @@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s) return 0; } -int reiserfs_permission(struct inode *inode, int mask) +int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; /* * We don't do permission checks on the internal objects. * Permissions are determined by the "owning" object. @@ -965,13 +970,16 @@ int reiserfs_permission(struct inode *inode, int mask) * Stat data v1 doesn't support ACLs. */ if (get_inode_sd_version(inode) != STAT_DATA_V1) - return generic_permission(inode, mask, reiserfs_check_acl); + return generic_permission(inode, mask, flags, + reiserfs_check_acl); #endif - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; return -EPERM; } @@ -990,7 +998,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - dentry->d_op = &xattr_lookup_poison_ops; + d_set_d_op(dentry, &xattr_lookup_poison_ops); if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6647f90e55cd..2305e3121cb1 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb) /* * return a spent inode to the slab cache */ -static void romfs_destroy_inode(struct inode *inode) +static void romfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } +static void romfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, romfs_i_callback); +} + /* * get filesystem statistics */ diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 24de30ba34c1..20700b9f2b4c 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb) } -static void squashfs_destroy_inode(struct inode *inode) +static void squashfs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); } +static void squashfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, squashfs_i_callback); +} + static struct file_system_type squashfs_fs_type = { .owner = THIS_MODULE, diff --git a/fs/super.c b/fs/super.c index ca696155cd9a..823e061faa87 100644 --- a/fs/super.c +++ b/fs/super.c @@ -30,6 +30,7 @@ #include <linux/idr.h> #include <linux/mutex.h> #include <linux/backing-dev.h> +#include <linux/rculist_bl.h> #include "internal.h" @@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_LIST_HEAD(&s->s_files); #endif INIT_LIST_HEAD(&s->s_instances); - INIT_HLIST_HEAD(&s->s_anon); + INIT_HLIST_BL_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); init_rwsem(&s->s_umount); @@ -1139,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return mnt; err: - mntput(mnt); + mntput_long(mnt); return ERR_PTR(err); } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 7e54bac8c4b0..ea9120a830d8 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd) goto repeat; } -static int sysfs_dentry_delete(struct dentry *dentry) +static int sysfs_dentry_delete(const struct dentry *dentry) { struct sysfs_dirent *sd = dentry->d_fsdata; return !!(sd->s_flags & SYSFS_FLAG_REMOVED); @@ -239,9 +239,13 @@ static int sysfs_dentry_delete(struct dentry *dentry) static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *sd; int is_dir; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + sd = dentry->d_fsdata; mutex_lock(&sysfs_mutex); /* The sysfs dirent has been deleted */ @@ -701,7 +705,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - dentry->d_op = &sysfs_dentry_ops; + d_set_d_op(dentry, &sysfs_dentry_ops); dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 442f34ff1af8..c8769dc222d8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -165,10 +165,7 @@ int sysfs_merge_group(struct kobject *kobj, struct attribute *const *attr; int i; - if (grp) - dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); - else - dir_sd = sysfs_get(kobj->sd); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); if (!dir_sd) return -ENOENT; @@ -195,10 +192,7 @@ void sysfs_unmerge_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; struct attribute *const *attr; - if (grp) - dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); - else - dir_sd = sysfs_get(kobj->sd); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); if (dir_sd) { for (attr = grp->attrs; *attr; ++attr) sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index cffb1fd8ba33..0a12eb89cd32 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/sysfs.h> #include <linux/xattr.h> #include <linux/security.h> #include "sysfs.h" @@ -348,13 +349,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha return -ENOENT; } -int sysfs_permission(struct inode *inode, int mask) +int sysfs_permission(struct inode *inode, int mask, unsigned int flags) { - struct sysfs_dirent *sd = inode->i_private; + struct sysfs_dirent *sd; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + sd = inode->i_private; mutex_lock(&sysfs_mutex); sysfs_refresh_inode(sd, inode); mutex_unlock(&sysfs_mutex); - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index d9be60a2e956..3d28af31d863 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -9,6 +9,7 @@ */ #include <linux/lockdep.h> +#include <linux/kobject_ns.h> #include <linux/fs.h> struct sysfs_open_dirent; @@ -200,7 +201,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); void sysfs_evict_inode(struct inode *inode); int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); -int sysfs_permission(struct inode *inode, int mask); +int sysfs_permission(struct inode *inode, int mask, unsigned int flags); int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index de44d067b9e6..0630eb969a28 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb) return &si->vfs_inode; } -static void sysv_destroy_inode(struct inode *inode) +static void sysv_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } +static void sysv_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, sysv_i_callback); +} + static void init_once(void *p) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 11e7f7d11cd0..b5e68da2db32 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static int sysv_hash(struct dentry *dentry, struct qstr *qstr) +static int sysv_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *qstr) { /* Truncate the name in place, avoids having to define a compare function. */ @@ -47,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st struct inode * inode = NULL; ino_t ino; - dentry->d_op = dir->i_sb->s_root->d_op; + d_set_d_op(dentry, dir->i_sb->s_root->d_op); if (dentry->d_name.len > SYSV_NAMELEN) return ERR_PTR(-ENAMETOOLONG); ino = sysv_inode_by_name(dentry); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 3d9c62be0c10..76712aefc4ab 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size) if (sbi->s_forced_ro) sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) - sb->s_root->d_op = &sysv_dentry_operations; + d_set_d_op(sb->s_root, &sysv_dentry_operations); return 1; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 91fac54c70e3..6e11c2975dcf 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb) return &ui->vfs_inode; }; +static void ubifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ubifs_inode *ui = ubifs_inode(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ubifs_inode_slab, ui); +} + static void ubifs_destroy_inode(struct inode *inode) { struct ubifs_inode *ui = ubifs_inode(inode); kfree(ui->data); - kmem_cache_free(ubifs_inode_slab, inode); + call_rcu(&inode->i_rcu, ubifs_i_callback); } /* diff --git a/fs/udf/super.c b/fs/udf/super.c index 4a5c7c61836a..b539d53320fb 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void udf_destroy_inode(struct inode *inode) +static void udf_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } +static void udf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, udf_i_callback); +} + static void init_once(void *foo) { struct udf_inode_info *ei = (struct udf_inode_info *)foo; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 2c47daed56da..2c61ac5d4e48 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ufs_destroy_inode(struct inode *inode) +static void ufs_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } +static void ufs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ufs_i_callback); +} + static void init_once(void *foo) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b2771862fd3d..39f4f809bb68 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } int -xfs_check_acl(struct inode *inode, int mask) +xfs_check_acl(struct inode *inode, int mask, unsigned int flags) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip; struct posix_acl *acl; int error = -EAGAIN; + ip = XFS_I(inode); trace_xfs_check_acl(ip); /* @@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask) if (!XFS_IFORK_Q(ip)) return -EAGAIN; + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + return -EAGAIN; + } + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 0135e2a669d7..11dd72070cbb 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -42,7 +42,7 @@ struct xfs_acl { #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) #ifdef CONFIG_XFS_POSIX_ACL -extern int xfs_check_acl(struct inode *inode, int mask); +extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags); extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); extern int xfs_acl_chmod(struct inode *inode); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0cdd26932d8e..d7de5a3f7867 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -91,6 +91,17 @@ xfs_inode_alloc( return ip; } +STATIC void +xfs_inode_free_callback( + struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct xfs_inode *ip = XFS_I(inode); + + INIT_LIST_HEAD(&inode->i_dentry); + kmem_zone_free(xfs_inode_zone, ip); +} + void xfs_inode_free( struct xfs_inode *ip) @@ -134,7 +145,7 @@ xfs_inode_free( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); + call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); } /* diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 45ce15dc5b2b..edfa178bafb6 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -408,7 +408,7 @@ xfs_mru_cache_flush( spin_lock(&mru->lock); if (mru->queued) { spin_unlock(&mru->lock); - cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); + cancel_delayed_work_sync(&mru->work); spin_lock(&mru->lock); } |