diff options
Diffstat (limited to 'fs')
244 files changed, 6148 insertions, 5235 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 9f7270f36b2a..525da2e8f73b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -62,6 +62,16 @@ source "fs/autofs/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" +config CUSE + tristate "Character device in Userpace support" + depends on FUSE_FS + help + This FUSE extension allows character devices to be + implemented in userspace. + + If you want to develop or use userspace character device + based on CUSE, answer Y or M. + config GENERIC_ACL bool select FS_POSIX_ACL diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index e0a85dbeeb88..a6665f37f456 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -53,6 +53,7 @@ struct adfs_dir_ops { int (*update)(struct adfs_dir *dir, struct object_info *obj); int (*create)(struct adfs_dir *dir, struct object_info *obj); int (*remove)(struct adfs_dir *dir, struct object_info *obj); + int (*sync)(struct adfs_dir *dir); void (*free)(struct adfs_dir *dir); }; @@ -90,7 +91,8 @@ extern const struct dentry_operations adfs_dentry_operations; extern struct adfs_dir_ops adfs_f_dir_ops; extern struct adfs_dir_ops adfs_fplus_dir_ops; -extern int adfs_dir_update(struct super_block *sb, struct object_info *obj); +extern int adfs_dir_update(struct super_block *sb, struct object_info *obj, + int wait); /* file.c */ extern const struct inode_operations adfs_file_inode_operations; diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index e867ccf37246..4d4073447d1a 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -83,7 +83,7 @@ out: } int -adfs_dir_update(struct super_block *sb, struct object_info *obj) +adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait) { int ret = -EINVAL; #ifdef CONFIG_ADFS_FS_RW @@ -106,6 +106,12 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj) ret = ops->update(&dir, obj); write_unlock(&adfs_dir_lock); + if (wait) { + int err = ops->sync(&dir); + if (!ret) + ret = err; + } + ops->free(&dir); out: #endif @@ -199,7 +205,7 @@ const struct file_operations adfs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .readdir = adfs_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, }; static int diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index ea7df2146921..31df6adf0de6 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -437,6 +437,22 @@ bad_dir: #endif } +static int +adfs_f_sync(struct adfs_dir *dir) +{ + int err = 0; + int i; + + for (i = dir->nr_buffers - 1; i >= 0; i--) { + struct buffer_head *bh = dir->bh[i]; + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + err = -EIO; + } + + return err; +} + static void adfs_f_free(struct adfs_dir *dir) { @@ -456,5 +472,6 @@ struct adfs_dir_ops adfs_f_dir_ops = { .setpos = adfs_f_setpos, .getnext = adfs_f_getnext, .update = adfs_f_update, + .sync = adfs_f_sync, .free = adfs_f_free }; diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 1ec644e32df9..139e0f345f18 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -161,6 +161,22 @@ out: return ret; } +static int +adfs_fplus_sync(struct adfs_dir *dir) +{ + int err = 0; + int i; + + for (i = dir->nr_buffers - 1; i >= 0; i--) { + struct buffer_head *bh = dir->bh[i]; + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + err = -EIO; + } + + return err; +} + static void adfs_fplus_free(struct adfs_dir *dir) { @@ -175,5 +191,6 @@ struct adfs_dir_ops adfs_fplus_dir_ops = { .read = adfs_fplus_read, .setpos = adfs_fplus_setpos, .getnext = adfs_fplus_getnext, + .sync = adfs_fplus_sync, .free = adfs_fplus_free }; diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 36e381c6a99a..8224d54a2afb 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -30,7 +30,7 @@ const struct file_operations adfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, - .fsync = file_fsync, + .fsync = simple_fsync, .write = do_sync_write, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index e647200262a2..05b3a677201d 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -376,7 +376,7 @@ out: * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -int adfs_write_inode(struct inode *inode, int unused) +int adfs_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; struct object_info obj; @@ -391,7 +391,7 @@ int adfs_write_inode(struct inode *inode, int unused) obj.attr = ADFS_I(inode)->attr; obj.size = inode->i_size; - ret = adfs_dir_update(sb, &obj); + ret = adfs_dir_update(sb, &obj, wait); unlock_kernel(); return ret; } diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 92ab4fbc2031..568081b93f73 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -62,7 +62,7 @@ static DEFINE_RWLOCK(adfs_map_lock); #define GET_FRAG_ID(_map,_start,_idmask) \ ({ \ unsigned char *_m = _map + (_start >> 3); \ - u32 _frag = get_unaligned((u32 *)_m); \ + u32 _frag = get_unaligned_le32(_m); \ _frag >>= (_start & 7); \ _frag & _idmask; \ }) diff --git a/fs/adfs/super.c b/fs/adfs/super.c index dd9becca4241..0ec5aaf47aa7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb) int i; struct adfs_sb_info *asb = ADFS_SB(sb); + lock_kernel(); + for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); kfree(asb); sb->s_fs_info = NULL; + + unlock_kernel(); } static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 1a2d5e3c7f4e..e511dc621a2e 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -182,6 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent void affs_free_prealloc(struct inode *inode); extern void affs_truncate(struct inode *); +int affs_file_fsync(struct file *, struct dentry *, int); /* dir.c */ diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 7b36904dbeac..8ca8f3a55599 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -21,7 +21,7 @@ const struct file_operations affs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .readdir = affs_readdir, - .fsync = file_fsync, + .fsync = affs_file_fsync, }; /* diff --git a/fs/affs/file.c b/fs/affs/file.c index 9246cb4aa018..184e55c1c9ba 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -34,7 +34,7 @@ const struct file_operations affs_file_operations = { .mmap = generic_file_mmap, .open = affs_file_open, .release = affs_file_release, - .fsync = file_fsync, + .fsync = affs_file_fsync, .splice_read = generic_file_splice_read, }; @@ -915,3 +915,15 @@ affs_truncate(struct inode *inode) } affs_free_prealloc(inode); } + +int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode * inode = dentry->d_inode; + int ret, err; + + ret = write_inode_now(inode, 0); + err = sync_blockdev(inode->i_sb->s_bdev); + if (!ret) + ret = err; + return ret; +} diff --git a/fs/affs/super.c b/fs/affs/super.c index 63f5183f263b..104fdcb3a7fc 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -16,6 +16,7 @@ #include <linux/parser.h> #include <linux/magic.h> #include <linux/sched.h> +#include <linux/smp_lock.h> #include "affs.h" extern struct timezone sys_tz; @@ -24,49 +25,67 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_remount (struct super_block *sb, int *flags, char *data); static void +affs_commit_super(struct super_block *sb, int clean) +{ + struct affs_sb_info *sbi = AFFS_SB(sb); + struct buffer_head *bh = sbi->s_root_bh; + struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); + + tail->bm_flag = cpu_to_be32(clean); + secs_to_datestamp(get_seconds(), &tail->disk_change); + affs_fix_checksum(sb, bh); + mark_buffer_dirty(bh); +} + +static void affs_put_super(struct super_block *sb) { struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); - if (!(sb->s_flags & MS_RDONLY)) { - AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1); - secs_to_datestamp(get_seconds(), - &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change); - affs_fix_checksum(sb, sbi->s_root_bh); - mark_buffer_dirty(sbi->s_root_bh); - } + lock_kernel(); + + if (!(sb->s_flags & MS_RDONLY)) + affs_commit_super(sb, 1); kfree(sbi->s_prefix); affs_free_bitmap(sb); affs_brelse(sbi->s_root_bh); kfree(sbi); sb->s_fs_info = NULL; - return; + + unlock_kernel(); } static void affs_write_super(struct super_block *sb) { int clean = 2; - struct affs_sb_info *sbi = AFFS_SB(sb); + lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) { // if (sbi->s_bitmap[i].bm_bh) { // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { // clean = 0; - AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean); - secs_to_datestamp(get_seconds(), - &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change); - affs_fix_checksum(sb, sbi->s_root_bh); - mark_buffer_dirty(sbi->s_root_bh); + affs_commit_super(sb, clean); sb->s_dirt = !clean; /* redo until bitmap synced */ } else sb->s_dirt = 0; + unlock_super(sb); pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); } +static int +affs_sync_fs(struct super_block *sb, int wait) +{ + lock_super(sb); + affs_commit_super(sb, 2); + sb->s_dirt = 0; + unlock_super(sb); + return 0; +} + static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) @@ -124,6 +143,7 @@ static const struct super_operations affs_sops = { .clear_inode = affs_clear_inode, .put_super = affs_put_super, .write_super = affs_write_super, + .sync_fs = affs_sync_fs, .statfs = affs_statfs, .remount_fs = affs_remount, .show_options = generic_show_options, @@ -507,6 +527,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } + lock_kernel(); replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; @@ -514,8 +535,10 @@ affs_remount(struct super_block *sb, int *flags, char *data) sbi->s_uid = uid; sbi->s_gid = gid; - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + unlock_kernel(); return 0; + } if (*flags & MS_RDONLY) { sb->s_dirt = 1; while (sb->s_dirt) @@ -524,6 +547,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) } else res = affs_init_bitmap(sb, flags); + unlock_kernel(); return res; } diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2b9e2d03a390..c52be53f6946 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) case -EBUSY: /* someone else made a mount here whilst we were busy */ while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; default: diff --git a/fs/afs/super.c b/fs/afs/super.c index 76828e5f8a39..ad0514d0115f 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb) _enter(""); + lock_kernel(); + afs_put_volume(as->volume); + unlock_kernel(); + _leave(""); } diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 4eb4d8dfb2f1..2316e944a109 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, } path.mnt = mnt; path_get(&path); - if (!follow_down(&path.mnt, &path.dentry)) { + if (!follow_down(&path)) { path_put(&path); DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } - while (d_mountpoint(path.dentry) && - follow_down(&path.mnt, &path.dentry)) + while (d_mountpoint(path.dentry) && follow_down(&path)); ; umount_ok = may_umount(path.mnt); path_put(&path); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index b7ff33c63101..8f7cdde41733 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); -static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry) +static inline int autofs4_follow_mount(struct path *path) { int res = 0; - while (d_mountpoint(*dentry)) { - int followed = follow_down(mnt, dentry); + while (d_mountpoint(path->dentry)) { + int followed = follow_down(path); if (!followed) break; res = 1; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 84168c0dcc2d..f3da2eb51f56 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -192,77 +192,42 @@ static int autofs_dev_ioctl_protosubver(struct file *fp, return 0; } -/* - * Walk down the mount stack looking for an autofs mount that - * has the requested device number (aka. new_encode_dev(sb->s_dev). - */ -static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno) +static int find_autofs_mount(const char *pathname, + struct path *res, + int test(struct path *path, void *data), + void *data) { - struct dentry *dentry; - struct inode *inode; - struct super_block *sb; - dev_t s_dev; - unsigned int err; - + struct path path; + int err = kern_path(pathname, 0, &path); + if (err) + return err; err = -ENOENT; - - /* Lookup the dentry name at the base of our mount point */ - dentry = d_lookup(nd->path.dentry, &nd->last); - if (!dentry) - goto out; - - dput(nd->path.dentry); - nd->path.dentry = dentry; - - /* And follow the mount stack looking for our autofs mount */ - while (follow_down(&nd->path.mnt, &nd->path.dentry)) { - inode = nd->path.dentry->d_inode; - if (!inode) - break; - - sb = inode->i_sb; - s_dev = new_encode_dev(sb->s_dev); - if (devno == s_dev) { - if (sb->s_magic == AUTOFS_SUPER_MAGIC) { + while (path.dentry == path.mnt->mnt_root) { + if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) { + if (test(&path, data)) { + path_get(&path); + if (!err) /* already found some */ + path_put(res); + *res = path; err = 0; - break; } } + if (!follow_up(&path)) + break; } -out: + path_put(&path); return err; } -/* - * Walk down the mount stack looking for an autofs mount that - * has the requested mount type (ie. indirect, direct or offset). - */ -static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type) +static int test_by_dev(struct path *path, void *p) { - struct dentry *dentry; - struct autofs_info *ino; - unsigned int err; - - err = -ENOENT; - - /* Lookup the dentry name at the base of our mount point */ - dentry = d_lookup(nd->path.dentry, &nd->last); - if (!dentry) - goto out; - - dput(nd->path.dentry); - nd->path.dentry = dentry; + return path->mnt->mnt_sb->s_dev == *(dev_t *)p; +} - /* And follow the mount stack looking for our autofs mount */ - while (follow_down(&nd->path.mnt, &nd->path.dentry)) { - ino = autofs4_dentry_ino(nd->path.dentry); - if (ino && ino->sbi->type & type) { - err = 0; - break; - } - } -out: - return err; +static int test_by_type(struct path *path, void *p) +{ + struct autofs_info *ino = autofs4_dentry_ino(path->dentry); + return ino && ino->sbi->type & *(unsigned *)p; } static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) @@ -283,31 +248,25 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) * Open a file descriptor on the autofs mount point corresponding * to the given path and device number (aka. new_encode_dev(sb->s_dev)). */ -static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) +static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) { - struct file *filp; - struct nameidata nd; int err, fd; fd = get_unused_fd(); if (likely(fd >= 0)) { - /* Get nameidata of the parent directory */ - err = path_lookup(path, LOOKUP_PARENT, &nd); + struct file *filp; + struct path path; + + err = find_autofs_mount(name, &path, test_by_dev, &devid); if (err) goto out; /* - * Search down, within the parent, looking for an - * autofs super block that has the device number + * Find autofs super block that has the device number * corresponding to the autofs fs we want to open. */ - err = autofs_dev_ioctl_find_super(&nd, devid); - if (err) { - path_put(&nd.path); - goto out; - } - filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, + filp = dentry_open(path.dentry, path.mnt, O_RDONLY, current_cred()); if (IS_ERR(filp)) { err = PTR_ERR(filp); @@ -340,7 +299,7 @@ static int autofs_dev_ioctl_openmount(struct file *fp, param->ioctlfd = -1; path = param->path; - devid = param->openmount.devid; + devid = new_decode_dev(param->openmount.devid); err = 0; fd = autofs_dev_ioctl_open_mountpoint(path, devid); @@ -475,8 +434,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, struct autofs_dev_ioctl *param) { struct autofs_info *ino; - struct nameidata nd; - const char *path; + struct path path; dev_t devid; int err = -ENOENT; @@ -485,32 +443,24 @@ static int autofs_dev_ioctl_requester(struct file *fp, goto out; } - path = param->path; - devid = new_encode_dev(sbi->sb->s_dev); + devid = sbi->sb->s_dev; param->requester.uid = param->requester.gid = -1; - /* Get nameidata of the parent directory */ - err = path_lookup(path, LOOKUP_PARENT, &nd); + err = find_autofs_mount(param->path, &path, test_by_dev, &devid); if (err) goto out; - err = autofs_dev_ioctl_find_super(&nd, devid); - if (err) - goto out_release; - - ino = autofs4_dentry_ino(nd.path.dentry); + ino = autofs4_dentry_ino(path.dentry); if (ino) { err = 0; - autofs4_expire_wait(nd.path.dentry); + autofs4_expire_wait(path.dentry); spin_lock(&sbi->fs_lock); param->requester.uid = ino->uid; param->requester.gid = ino->gid; spin_unlock(&sbi->fs_lock); } - -out_release: - path_put(&nd.path); + path_put(&path); out: return err; } @@ -569,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - struct nameidata nd; - const char *path; + struct path path; + const char *name; unsigned int type; unsigned int devid, magic; int err = -ENOENT; @@ -580,71 +530,46 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, goto out; } - path = param->path; + name = param->path; type = param->ismountpoint.in.type; param->ismountpoint.out.devid = devid = 0; param->ismountpoint.out.magic = magic = 0; if (!fp || param->ioctlfd == -1) { - if (autofs_type_any(type)) { - struct super_block *sb; - - err = path_lookup(path, LOOKUP_FOLLOW, &nd); - if (err) - goto out; - - sb = nd.path.dentry->d_sb; - devid = new_encode_dev(sb->s_dev); - } else { - struct autofs_info *ino; - - err = path_lookup(path, LOOKUP_PARENT, &nd); - if (err) - goto out; - - err = autofs_dev_ioctl_find_sbi_type(&nd, type); - if (err) - goto out_release; - - ino = autofs4_dentry_ino(nd.path.dentry); - devid = autofs4_get_dev(ino->sbi); - } - + if (autofs_type_any(type)) + err = kern_path(name, LOOKUP_FOLLOW, &path); + else + err = find_autofs_mount(name, &path, test_by_type, &type); + if (err) + goto out; + devid = new_encode_dev(path.mnt->mnt_sb->s_dev); err = 0; - if (nd.path.dentry->d_inode && - nd.path.mnt->mnt_root == nd.path.dentry) { + if (path.dentry->d_inode && + path.mnt->mnt_root == path.dentry) { err = 1; - magic = nd.path.dentry->d_inode->i_sb->s_magic; + magic = path.dentry->d_inode->i_sb->s_magic; } } else { - dev_t dev = autofs4_get_dev(sbi); + dev_t dev = sbi->sb->s_dev; - err = path_lookup(path, LOOKUP_PARENT, &nd); + err = find_autofs_mount(name, &path, test_by_dev, &dev); if (err) goto out; - err = autofs_dev_ioctl_find_super(&nd, dev); - if (err) - goto out_release; - - devid = dev; + devid = new_encode_dev(dev); - err = have_submounts(nd.path.dentry); + err = have_submounts(path.dentry); - if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { - if (follow_down(&nd.path.mnt, &nd.path.dentry)) { - struct inode *inode = nd.path.dentry->d_inode; - magic = inode->i_sb->s_magic; - } + if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) { + if (follow_down(&path)) + magic = path.mnt->mnt_sb->s_magic; } } param->ismountpoint.out.devid = devid; param->ismountpoint.out.magic = magic; - -out_release: - path_put(&nd.path); + path_put(&path); out: return err; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3077d8f16523..aa39ae83f019 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry, static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) { struct dentry *top = dentry; + struct path path = {.mnt = mnt, .dentry = dentry}; int status = 1; DPRINTK("dentry %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - mntget(mnt); - dget(dentry); + path_get(&path); - if (!follow_down(&mnt, &dentry)) + if (!follow_down(&path)) goto done; - if (is_autofs4_dentry(dentry)) { - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + if (is_autofs4_dentry(path.dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) @@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) * Otherwise it's an offset mount and we need to check * if we can umount its mount, if there is one. */ - if (!d_mountpoint(dentry)) { + if (!d_mountpoint(path.dentry)) { status = 0; goto done; } @@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) status = 0; done: DPRINTK("returning = %d", status); - dput(dentry); - mntput(mnt); + path_put(&path); return status; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e383bf0334f1..b96a3c57359d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) nd->flags); /* * For an expire of a covered direct or offset mount we need - * to beeak out of follow_down() at the autofs mount trigger + * to break out of follow_down() at the autofs mount trigger * (d_mounted--), so we can see the expiring flag, and manage * the blocking and following here until the expire is completed. */ @@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (ino->flags & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); /* Follow down to our covering mount. */ - if (!follow_down(&nd->path.mnt, &nd->path.dentry)) + if (!follow_down(&nd->path)) goto done; goto follow; } @@ -230,8 +230,7 @@ follow: * to follow it. */ if (d_mountpoint(dentry)) { - if (!autofs4_follow_mount(&nd->path.mnt, - &nd->path.dentry)) { + if (!autofs4_follow_mount(&nd->path)) { status = -ENOENT; goto out_error; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 76afd0d6b86c..9367b6297d84 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts) static void befs_put_super(struct super_block *sb) { + lock_kernel(); + kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; @@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; - return; + + unlock_kernel(); } /* Allocate private field of the superblock, fill it. diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 4dd1b623f937..54bd07d44e68 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -79,7 +79,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) const struct file_operations bfs_dir_operations = { .read = generic_read_dir, .readdir = bfs_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, .llseek = generic_file_llseek, }; @@ -205,7 +205,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink = 1; } de->ino = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); inode->i_ctime = dir->i_ctime; @@ -267,7 +267,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_ctime = CURRENT_TIME_SEC; inode_dec_link_count(new_inode); } - mark_buffer_dirty(old_bh); + mark_buffer_dirty_inode(old_bh, old_dir); error = 0; end_rename: @@ -320,7 +320,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name, for (i = 0; i < BFS_NAMELEN; i++) de->name[i] = (i < namelen) ? name[i] : 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); brelse(bh); return 0; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index cc4062d12ca2..6f60336c6628 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -30,6 +30,7 @@ MODULE_LICENSE("GPL"); #define dprintf(x...) #endif +static void bfs_write_super(struct super_block *s); void dump_imap(const char *prefix, struct super_block *s); struct inode *bfs_iget(struct super_block *sb, unsigned long ino) @@ -97,14 +98,15 @@ error: return ERR_PTR(-EIO); } -static int bfs_write_inode(struct inode *inode, int unused) +static int bfs_write_inode(struct inode *inode, int wait) { + struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; unsigned long i_sblock; struct bfs_inode *di; struct buffer_head *bh; int block, off; - struct bfs_sb_info *info = BFS_SB(inode->i_sb); + int err = 0; dprintf("ino=%08x\n", ino); @@ -145,9 +147,14 @@ static int bfs_write_inode(struct inode *inode, int unused) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); + if (wait) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + err = -EIO; + } brelse(bh); mutex_unlock(&info->bfs_lock); - return 0; + return err; } static void bfs_delete_inode(struct inode *inode) @@ -209,6 +216,26 @@ static void bfs_delete_inode(struct inode *inode) clear_inode(inode); } +static int bfs_sync_fs(struct super_block *sb, int wait) +{ + struct bfs_sb_info *info = BFS_SB(sb); + + mutex_lock(&info->bfs_lock); + mark_buffer_dirty(info->si_sbh); + sb->s_dirt = 0; + mutex_unlock(&info->bfs_lock); + + return 0; +} + +static void bfs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + bfs_sync_fs(sb, 1); + else + sb->s_dirt = 0; +} + static void bfs_put_super(struct super_block *s) { struct bfs_sb_info *info = BFS_SB(s); @@ -216,11 +243,18 @@ static void bfs_put_super(struct super_block *s) if (!info) return; + lock_kernel(); + + if (s->s_dirt) + bfs_write_super(s); + brelse(info->si_sbh); mutex_destroy(&info->bfs_lock); kfree(info->si_imap); kfree(info); s->s_fs_info = NULL; + + unlock_kernel(); } static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -240,17 +274,6 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static void bfs_write_super(struct super_block *s) -{ - struct bfs_sb_info *info = BFS_SB(s); - - mutex_lock(&info->bfs_lock); - if (!(s->s_flags & MS_RDONLY)) - mark_buffer_dirty(info->si_sbh); - s->s_dirt = 0; - mutex_unlock(&info->bfs_lock); -} - static struct kmem_cache *bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) @@ -298,6 +321,7 @@ static const struct super_operations bfs_sops = { .delete_inode = bfs_delete_inode, .put_super = bfs_put_super, .write_super = bfs_write_super, + .sync_fs = bfs_sync_fs, .statfs = bfs_statfs, }; @@ -25,7 +25,6 @@ #include <linux/module.h> #include <linux/mempool.h> #include <linux/workqueue.h> -#include <linux/blktrace_api.h> #include <scsi/sg.h> /* for struct sg_iovec */ #include <trace/events/block.h> @@ -358,9 +357,9 @@ static void bio_kmalloc_destructor(struct bio *bio) * * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate * a bio. This is due to the mempool guarantees. To make this work, callers - * must never allocate more than 1 bio at the time from this pool. Callers + * must never allocate more than 1 bio at a time from this pool. Callers * that need to allocate more than 1 bio must always submit the previously - * allocate bio for IO before attempting to allocate a new one. Failure to + * allocated bio for IO before attempting to allocate a new one. Failure to * do so can cause livelocks under memory pressure. * **/ diff --git a/fs/block_dev.c b/fs/block_dev.c index 931f6b8c4b2f..3a6d4fb2a329 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, iov, offset, nr_segs, blkdev_get_blocks, NULL); } +int __sync_blockdev(struct block_device *bdev, int wait) +{ + if (!bdev) + return 0; + if (!wait) + return filemap_flush(bdev->bd_inode->i_mapping); + return filemap_write_and_wait(bdev->bd_inode->i_mapping); +} + /* * Write out and wait upon all the dirty data associated with a block * device via its mapping. Does not take the superblock lock. */ int sync_blockdev(struct block_device *bdev) { - int ret = 0; - - if (bdev) - ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); - return ret; + return __sync_blockdev(bdev, 1); } EXPORT_SYMBOL(sync_blockdev); @@ -199,7 +204,7 @@ int fsync_bdev(struct block_device *bdev) { struct super_block *sb = get_super(bdev); if (sb) { - int res = fsync_super(sb); + int res = sync_filesystem(sb); drop_super(sb); return res; } @@ -241,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb->s_frozen = SB_FREEZE_WRITE; smp_wmb(); - __fsync_super(sb); + sync_filesystem(sb); sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d50d49d990a..d28d29c95f7c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -42,6 +42,8 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); + /* * end_io_wq structs are used to do processing in task context when an IO is * complete. This is used during reads to verify checksums, and it is used @@ -1342,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) free_extent_map(em); } +/* + * If this fails, caller must call bdi_destroy() to get rid of the + * bdi again. + */ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { - bdi_init(bdi); + int err; + + bdi->capabilities = BDI_CAP_MAP_COPY; + err = bdi_init(bdi); + if (err) + return err; + + err = bdi_register(bdi, NULL, "btrfs-%d", + atomic_inc_return(&btrfs_bdi_num)); + if (err) + return err; + bdi->ra_pages = default_backing_dev_info.ra_pages; - bdi->state = 0; - bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; bdi->unplug_io_data = info; bdi->congested_fn = btrfs_congested_fn; @@ -1569,7 +1584,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - setup_bdi(fs_info, &fs_info->bdi); + if (setup_bdi(fs_info, &fs_info->bdi)) + goto fail_bdi; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; @@ -1946,8 +1962,8 @@ fail_iput: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); +fail_bdi: bdi_destroy(&fs_info->bdi); - fail: kfree(extent_root); kfree(tree_root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5b68330f8585..8612b3a09811 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2322,7 +2322,6 @@ err: btrfs_update_inode(trans, root, dir); btrfs_drop_nlink(inode); ret = btrfs_update_inode(trans, root, inode); - dir->i_sb->s_dirt = 1; out: return ret; } @@ -2806,7 +2805,6 @@ error: pending_del_nr); } btrfs_free_path(path); - inode->i_sb->s_dirt = 1; return ret; } @@ -3768,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: @@ -3833,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: @@ -3880,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) drop_inode = 1; - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); @@ -3962,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); @@ -4991,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); if (drop_inode) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 708ac06b953b..9f179d4832d5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -394,10 +394,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_root *root = btrfs_sb(sb); int ret; - if (sb->s_flags & MS_RDONLY) - return 0; - - sb->s_dirt = 0; if (!wait) { filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; @@ -408,7 +404,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); - sb->s_dirt = 0; return ret; } @@ -454,11 +449,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - static int btrfs_test_super(struct super_block *s, void *data) { struct btrfs_fs_devices *test_fs_devices = data; @@ -689,7 +679,6 @@ static int btrfs_unfreeze(struct super_block *sb) static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, - .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .write_inode = btrfs_write_inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2e177d7f4bb9..4e83457ea253 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -543,13 +543,13 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, btrfs_free_log(trans, root); btrfs_update_reloc_root(trans, root); - if (root->commit_root == root->node) - continue; - - free_extent_buffer(root->commit_root); - root->commit_root = btrfs_root_node(root); + if (root->commit_root != root->node) { + free_extent_buffer(root->commit_root); + root->commit_root = btrfs_root_node(root); + btrfs_set_root_node(&root->root_item, + root->node); + } - btrfs_set_root_node(&root->root_item, root->node); err = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 1e962348d111..431accd475a7 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -354,7 +354,9 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) /* make sure all pages pinned by operations on behalf of the netfs are * written to disc */ cachefiles_begin_secure(cache, &saved_cred); - ret = fsync_super(cache->mnt->mnt_sb); + down_read(&cache->mnt->mnt_sb->s_umount); + ret = sync_filesystem(cache->mnt->mnt_sb); + up_read(&cache->mnt->mnt_sb->s_umount); cachefiles_end_secure(cache, saved_cred); if (ret == -EIO) diff --git a/fs/char_dev.c b/fs/char_dev.c index 38f71222a552..b7c9d5187a75 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp) p = inode->i_cdev; if (!p) { inode->i_cdev = p = new; - inode->i_cindex = idx; list_add(&inode->i_devices, &p->list); new = NULL; } else if (!cdev_get(p)) @@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp) return ret; } +int cdev_index(struct inode *inode) +{ + int idx; + struct kobject *kobj; + + kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); + if (!kobj) + return -1; + kobject_put(kobj); + return idx; +} + void cd_forget(struct inode *inode) { spin_lock(&cdev_lock); @@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init); EXPORT_SYMBOL(cdev_alloc); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); +EXPORT_SYMBOL(cdev_index); EXPORT_SYMBOL(register_chrdev); EXPORT_SYMBOL(unregister_chrdev); EXPORT_SYMBOL(directly_mappable_cdev_bdi); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 83d62759c7c7..3bb11be8b6a8 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, case -EBUSY: /* someone else made a mount here whilst we were busy */ while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; default: diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0a10a59b6392..0d92114195ab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb) cFYI(1, ("Empty cifs superblock info passed to unmount")); return; } + + lock_kernel(); + rc = cifs_umount(sb, cifs_sb); if (rc) cERROR(1, ("cifs_umount failed with return code %d", rc)); @@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb) unload_nls(cifs_sb->local_nls); kfree(cifs_sb); - return; + + unlock_kernel(); } static int diff --git a/fs/compat.c b/fs/compat.c index bb2a9b2e8173..cdd51a3a7c53 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -471,7 +471,7 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, ret = sys_fcntl(fd, cmd, (unsigned long)&f); set_fs(old_fs); if (cmd == F_GETLK && ret == 0) { - /* GETLK was successfule and we need to return the data... + /* GETLK was successful and we need to return the data... * but it needs to fit in the compat structure. * l_start shouldn't be too big, unless the original * start + end is greater than COMPAT_OFF_T_MAX, in which @@ -812,10 +812,8 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, } } - lock_kernel(); retval = do_mount((char*)dev_page, dir_page, (char*)type_page, flags, (void*)data_page); - unlock_kernel(); out4: free_page(data_page); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index b83f6bcfa51a..0aac371bff0b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1765,7 +1765,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that - * use compatiable ioctls + * use compatible ioctls */ static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 762d287123ca..da6061a6df40 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -39,6 +39,9 @@ struct configfs_dirent { umode_t s_mode; struct dentry * s_dentry; struct iattr * s_iattr; +#ifdef CONFIG_LOCKDEP + int s_depth; +#endif }; #define CONFIGFS_ROOT 0x0001 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 05373db21a4e..8e48b52205aa 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -78,11 +78,97 @@ static const struct dentry_operations configfs_dentry_ops = { .d_delete = configfs_d_delete, }; +#ifdef CONFIG_LOCKDEP + +/* + * Helpers to make lockdep happy with our recursive locking of default groups' + * inodes (see configfs_attach_group() and configfs_detach_group()). + * We put default groups i_mutexes in separate classes according to their depth + * from the youngest non-default group ancestor. + * + * For a non-default group A having default groups A/B, A/C, and A/C/D, default + * groups A/B and A/C will have their inode's mutex in class + * default_group_class[0], and default group A/C/D will be in + * default_group_class[1]. + * + * The lock classes are declared and assigned in inode.c, according to the + * s_depth value. + * The s_depth value is initialized to -1, adjusted to >= 0 when attaching + * default groups, and reset to -1 when all default groups are attached. During + * attachment, if configfs_create() sees s_depth > 0, the lock class of the new + * inode's mutex is set to default_group_class[s_depth - 1]. + */ + +static void configfs_init_dirent_depth(struct configfs_dirent *sd) +{ + sd->s_depth = -1; +} + +static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, + struct configfs_dirent *sd) +{ + int parent_depth = parent_sd->s_depth; + + if (parent_depth >= 0) + sd->s_depth = parent_depth + 1; +} + +static void +configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) +{ + /* + * item's i_mutex class is already setup, so s_depth is now only + * used to set new sub-directories s_depth, which is always done + * with item's i_mutex locked. + */ + /* + * sd->s_depth == -1 iff we are a non default group. + * else (we are a default group) sd->s_depth > 0 (see + * create_dir()). + */ + if (sd->s_depth == -1) + /* + * We are a non default group and we are going to create + * default groups. + */ + sd->s_depth = 0; +} + +static void +configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) +{ + /* We will not create default groups anymore. */ + sd->s_depth = -1; +} + +#else /* CONFIG_LOCKDEP */ + +static void configfs_init_dirent_depth(struct configfs_dirent *sd) +{ +} + +static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, + struct configfs_dirent *sd) +{ +} + +static void +configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) +{ +} + +static void +configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) +{ +} + +#endif /* CONFIG_LOCKDEP */ + /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ -static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd, - void * element) +static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, + void *element, int type) { struct configfs_dirent * sd; @@ -94,6 +180,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare INIT_LIST_HEAD(&sd->s_links); INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; + sd->s_type = type; + configfs_init_dirent_depth(sd); spin_lock(&configfs_dirent_lock); if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); @@ -138,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, { struct configfs_dirent * sd; - sd = configfs_new_dirent(parent_sd, element); + sd = configfs_new_dirent(parent_sd, element, type); if (IS_ERR(sd)) return PTR_ERR(sd); sd->s_mode = mode; - sd->s_type = type; sd->s_dentry = dentry; if (dentry) { dentry->d_fsdata = configfs_get(sd); @@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p, error = configfs_make_dirent(p->d_fsdata, d, k, mode, CONFIGFS_DIR | CONFIGFS_USET_CREATING); if (!error) { + configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata); error = configfs_create(d, mode, init_dir); if (!error) { inc_nlink(p->d_inode); @@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item, * error, as rmdir() would. */ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + configfs_adjust_dir_dirent_depth_before_populate(sd); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; } + configfs_adjust_dir_dirent_depth_after_populate(sd); mutex_unlock(&dentry->d_inode->i_mutex); if (ret) d_delete(dentry); @@ -916,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * Note, btw, that this can be called at *any* time, even when a configfs * subsystem isn't registered, or when configfs is loading or unloading. * Just like configfs_register_subsystem(). So we take the same - * precautions. We pin the filesystem. We lock each i_mutex _in_order_ - * on our way down the tree. If we can find the target item in the + * precautions. We pin the filesystem. We lock configfs_dirent_lock. + * If we can find the target item in the * configfs tree, it must be part of the subsystem tree as well, so we - * do not need the subsystem semaphore. Holding the i_mutex chain locks - * out mkdir() and rmdir(), who might be racing us. + * do not need the subsystem semaphore. Holding configfs_dirent_lock helps + * locking out mkdir() and rmdir(), who might be racing us. */ /* @@ -933,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * do that so we can unlock it if we find nothing. * * Here we do a depth-first search of the dentry hierarchy looking for - * our object. We take i_mutex on each step of the way down. IT IS - * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, - * we'll drop the i_mutex. + * our object. + * We deliberately ignore items tagged as dropping since they are virtually + * dead, as well as items in the middle of attachment since they virtually + * do not exist yet. This completes the locking out of racing mkdir() and + * rmdir(). + * Note: subdirectories in the middle of attachment start with s_type = + * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When + * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of + * s_type is in configfs_new_dirent(), which has configfs_dirent_lock. * - * If the target is not found, -ENOENT is bubbled up and we have released - * all locks. If the target was found, the locks will be cleared by - * configfs_depend_rollback(). + * If the target is not found, -ENOENT is bubbled up. * * This adds a requirement that all config_items be unique! * - * This is recursive because the locking traversal is tricky. There isn't + * This is recursive. There isn't * much on the stack, though, so folks that need this function - be careful * about your stack! Patches will be accepted to make it iterative. */ @@ -955,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin, BUG_ON(!origin || !sd); - /* Lock this guy on the way down */ - mutex_lock(&sd->s_dentry->d_inode->i_mutex); if (sd->s_element == target) /* Boo-yah */ goto out; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { - if (child_sd->s_type & CONFIGFS_DIR) { + if ((child_sd->s_type & CONFIGFS_DIR) && + !(child_sd->s_type & CONFIGFS_USET_DROPPING) && + !(child_sd->s_type & CONFIGFS_USET_CREATING)) { ret = configfs_depend_prep(child_sd->s_dentry, target); if (!ret) @@ -970,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin, } /* We looped all our children and didn't find target */ - mutex_unlock(&sd->s_dentry->d_inode->i_mutex); ret = -ENOENT; out: return ret; } -/* - * This is ONLY called if configfs_depend_prep() did its job. So we can - * trust the entire path from item back up to origin. - * - * We walk backwards from item, unlocking each i_mutex. We finish by - * unlocking origin. - */ -static void configfs_depend_rollback(struct dentry *origin, - struct config_item *item) -{ - struct dentry *dentry = item->ci_dentry; - - while (dentry != origin) { - mutex_unlock(&dentry->d_inode->i_mutex); - dentry = dentry->d_parent; - } - - mutex_unlock(&origin->d_inode->i_mutex); -} - int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target) { @@ -1037,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys, /* Ok, now we can trust subsys/s_item */ - /* Scan the tree, locking i_mutex recursively, return 0 if found */ + spin_lock(&configfs_dirent_lock); + /* Scan the tree, return 0 if found */ ret = configfs_depend_prep(subsys_sd->s_dentry, target); if (ret) - goto out_unlock_fs; + goto out_unlock_dirent_lock; - /* We hold all i_mutexes from the subsystem down to the target */ + /* + * We are sure that the item is not about to be removed by rmdir(), and + * not in the middle of attachment by mkdir(). + */ p = target->ci_dentry->d_fsdata; p->s_dependent_count += 1; - configfs_depend_rollback(subsys_sd->s_dentry, target); - +out_unlock_dirent_lock: + spin_unlock(&configfs_dirent_lock); out_unlock_fs: mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); @@ -1072,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, struct configfs_dirent *sd; /* - * Since we can trust everything is pinned, we just need i_mutex - * on the item. + * Since we can trust everything is pinned, we just need + * configfs_dirent_lock. */ - mutex_lock(&target->ci_dentry->d_inode->i_mutex); + spin_lock(&configfs_dirent_lock); sd = target->ci_dentry->d_fsdata; BUG_ON(sd->s_dependent_count < 1); @@ -1086,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, * After this unlock, we cannot trust the item to stay alive! * DO NOT REFERENCE item after this unlock. */ - mutex_unlock(&target->ci_dentry->d_inode->i_mutex); + spin_unlock(&configfs_dirent_lock); } EXPORT_SYMBOL(configfs_undepend_item); @@ -1286,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; - /* - * Here's where we check for dependents. We're protected by - * i_mutex. - */ - if (sd->s_dependent_count) - return -EBUSY; - /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; @@ -1316,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); - ret = configfs_detach_prep(dentry, &wait_mutex); - if (ret) - configfs_detach_rollback(dentry); + /* + * Here's where we check for dependents. We're protected by + * configfs_dirent_lock. + * If no dependent, atomically tag the item as dropping. + */ + ret = sd->s_dependent_count ? -EBUSY : 0; + if (!ret) { + ret = configfs_detach_prep(dentry, &wait_mutex); + if (ret) + configfs_detach_rollback(dentry); + } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); @@ -1429,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { - file->private_data = configfs_new_dirent(parent_sd, NULL); + file->private_data = configfs_new_dirent(parent_sd, NULL, 0); if (IS_ERR(file->private_data)) err = PTR_ERR(file->private_data); else diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5d349d38e056..4921e7426d95 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -33,10 +33,15 @@ #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/sched.h> +#include <linux/lockdep.h> #include <linux/configfs.h> #include "configfs_internal.h" +#ifdef CONFIG_LOCKDEP +static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; +#endif + extern struct super_block * configfs_sb; static const struct address_space_operations configfs_aops = { @@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) return inode; } +#ifdef CONFIG_LOCKDEP + +static void configfs_set_inode_lock_class(struct configfs_dirent *sd, + struct inode *inode) +{ + int depth = sd->s_depth; + + if (depth > 0) { + if (depth <= ARRAY_SIZE(default_group_class)) { + lockdep_set_class(&inode->i_mutex, + &default_group_class[depth - 1]); + } else { + /* + * In practice the maximum level of locking depth is + * already reached. Just inform about possible reasons. + */ + printk(KERN_INFO "configfs: Too many levels of inodes" + " for the locking correctness validator.\n"); + printk(KERN_INFO "Spurious warnings may appear.\n"); + } + } +} + +#else /* CONFIG_LOCKDEP */ + +static void configfs_set_inode_lock_class(struct configfs_dirent *sd, + struct inode *inode) +{ +} + +#endif /* CONFIG_LOCKDEP */ + int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) { int error = 0; @@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode * struct inode *p_inode = dentry->d_parent->d_inode; p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; } + configfs_set_inode_lock_class(sd, inode); goto Proceed; } else diff --git a/fs/dcache.c b/fs/dcache.c index 75659a6fd1f8..9e5cd3c3a6ba 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root, spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); - if (!IS_ROOT(dentry) && d_unhashed(dentry) && + if (d_unlinked(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) goto Elong; @@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) spin_lock(&dcache_lock); prepend(&end, &buflen, "\0", 1); - if (!IS_ROOT(dentry) && d_unhashed(dentry) && + if (d_unlinked(dentry) && (prepend(&end, &buflen, "//deleted", 9) != 0)) goto Elong; if (buflen < 1) @@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) read_unlock(¤t->fs->lock); error = -ENOENT; - /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { + if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 858fba14aaa6..c4dfa1dcc86f 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) spin_unlock(&ls->ls_recover_list_lock); if (!found) - de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); + de = kzalloc(sizeof(struct dlm_direntry) + len, + ls->ls_allocation); return de; } @@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls) dlm_dir_clear(ls); - last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); + last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation); if (!last_name) goto out; @@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, if (namelen > DLM_RESNAME_MAXLEN) return -EINVAL; - de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); + de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation); if (!de) return -ENOMEM; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index cd8e2df3c295..d489fcc86713 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -384,7 +384,7 @@ static void threads_stop(void) dlm_astd_stop(); } -static int new_lockspace(char *name, int namelen, void **lockspace, +static int new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { struct dlm_ls *ls; @@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace, break; } ls->ls_create_count++; - module_put(THIS_MODULE); - error = 1; /* not an error, return 0 */ + *lockspace = ls; + error = 1; break; } spin_unlock(&lslist_lock); - if (error < 0) - goto out; if (error) - goto ret_zero; + goto out; error = -ENOMEM; @@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace, dlm_create_debug_file(ls); log_debug(ls, "join complete"); - ret_zero: *lockspace = ls; return 0; @@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, return error; } -int dlm_new_lockspace(char *name, int namelen, void **lockspace, +int dlm_new_lockspace(const char *name, int namelen, void **lockspace, uint32_t flags, int lvblen) { int error = 0; @@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, error = new_lockspace(name, namelen, lockspace, flags, lvblen); if (!error) ls_count++; - else if (!ls_count) + if (error > 0) + error = 0; + if (!ls_count) threads_stop(); out: mutex_unlock(&ls_lock); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 609108a83267..cdb580a9c7a2 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk) lowcomms_write_space(sk); } +int dlm_lowcomms_connect_node(int nodeid) +{ + struct connection *con; + + if (nodeid == dlm_our_nodeid()) + return 0; + + con = nodeid2con(nodeid, GFP_NOFS); + if (!con) + return -ENOMEM; + lowcomms_connect_sock(con); + return 0; +} + /* Make a socket active */ static int add_sock(struct socket *sock, struct connection *con) { @@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con, return; } - new_con = nodeid2con(nodeid, GFP_KERNEL); + new_con = nodeid2con(nodeid, GFP_NOFS); if (!new_con) return; @@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con) * the same time and the connections cross on the wire. * In this case we store the incoming one in "othercon" */ - newcon = nodeid2con(nodeid, GFP_KERNEL); + newcon = nodeid2con(nodeid, GFP_NOFS); if (!newcon) { result = -ENOMEM; goto accept_err; @@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con) struct connection *othercon = newcon->othercon; if (!othercon) { - othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL); + othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); if (!othercon) { log_print("failed to allocate incoming socket"); mutex_unlock(&newcon->sock_mutex); @@ -1421,7 +1435,7 @@ static int work_start(void) static void stop_conn(struct connection *con) { con->flags |= 0x0F; - if (con->sock) + if (con->sock && con->sock->sk) con->sock->sk->sk_user_data = NULL; } diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index a9a9618c0d3f..1311e6426287 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void); int dlm_lowcomms_close(int nodeid); void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void dlm_lowcomms_commit_buffer(void *mh); +int dlm_lowcomms_connect_node(int nodeid); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 26133f05ae3a..b128775913b2 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -17,6 +17,7 @@ #include "recover.h" #include "rcom.h" #include "config.h" +#include "lowcomms.h" static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { @@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) static int dlm_add_member(struct dlm_ls *ls, int nodeid) { struct dlm_member *memb; - int w; + int w, error; - memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); if (!memb) return -ENOMEM; @@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid) return w; } + error = dlm_lowcomms_connect_node(nodeid); + if (error < 0) { + kfree(memb); + return error; + } + memb->nodeid = nodeid; memb->weight = w; add_ordered_member(ls, memb); @@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls) ls->ls_total_weight = total; - array = kmalloc(sizeof(int) * total, GFP_KERNEL); + array = kmalloc(sizeof(int) * total, ls->ls_allocation); if (!array) return; @@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) continue; log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); - memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); if (!memb) return -ENOMEM; memb->nodeid = rv->new[i]; @@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls) int *ids = NULL, *new = NULL; int error, ids_count = 0, new_count = 0; - rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); + rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation); if (!rv) return -ENOMEM; diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index daa4183fbb84..7a2307c08911 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) struct rq_entry *e; int length = ms->m_header.h_length - sizeof(struct dlm_message); - e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); + e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation); if (!e) { log_print("dlm_add_requestqueue: out of memory len %d", length); return; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index fa4c7e7d15d9..12d649602d3a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -27,6 +27,7 @@ #include <linux/mount.h> #include <linux/key.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include <linux/file.h> #include <linux/crypto.h> #include "ecryptfs_kernel.h" @@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb) { struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); + lock_kernel(); + ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); ecryptfs_set_superblock_private(sb, NULL); + + unlock_kernel(); } /** diff --git a/fs/eventfd.c b/fs/eventfd.c index 2a701d593d35..3f0e1974abdc 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -16,6 +16,7 @@ #include <linux/anon_inodes.h> #include <linux/eventfd.h> #include <linux/syscalls.h> +#include <linux/module.h> struct eventfd_ctx { wait_queue_head_t wqh; @@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n) return n; } +EXPORT_SYMBOL_GPL(eventfd_signal); static int eventfd_release(struct inode *inode, struct file *file) { @@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd) return file; } +EXPORT_SYMBOL_GPL(eventfd_fget); SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { diff --git a/fs/exofs/common.h b/fs/exofs/common.h index b1512c4bb8c7..24667eedc023 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h @@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or, int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr); -int osd_req_read_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); - -int osd_req_write_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void *buff, u64 len); - #endif /*ifndef __EXOFS_COM_H__*/ diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ba8d9fab4693..77d0a295eb1c 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, struct inode *inode) { struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; - struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; pcol->sbi = sbi; - pcol->req_q = req_q; + pcol->req_q = osd_request_queue(sbi->s_dev); pcol->inode = inode; pcol->expected_pages = expected_pages; @@ -266,7 +265,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync) goto err; } - osd_req_read(or, &obj, pcol->bio, i_start); + osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); if (is_sync) { exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); @@ -522,7 +521,8 @@ static int write_exec(struct page_collect *pcol) *pcol_copy = *pcol; - osd_req_write(or, &obj, pcol_copy->bio, i_start); + pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ + osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); if (unlikely(ret)) { EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c index 06ca92672eb5..b3d2ccb87aaa 100644 --- a/fs/exofs/osd.c +++ b/fs/exofs/osd.c @@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) return -EIO; } - -int osd_req_read_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) -{ - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; - struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); - - if (!bio) - return -ENOMEM; - - osd_req_read(or, obj, bio, offset); - return 0; -} - -int osd_req_write_kern(struct osd_request *or, - const struct osd_obj_id *obj, u64 offset, void* buff, u64 len) -{ - struct request_queue *req_q = or->osd_dev->scsi_device->request_queue; - struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL); - - if (!bio) - return -ENOMEM; - - osd_req_write(or, obj, bio, offset); - return 0; -} diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 9f1985e857e2..8216c5b77b53 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -200,20 +200,21 @@ static const struct export_operations exofs_export_ops; /* * Write the superblock to the OSD */ -static void exofs_write_super(struct super_block *sb) +static int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; struct osd_request *or; struct osd_obj_id obj; - int ret; + int ret = -ENOMEM; fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); if (!fscb) { EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); - return; + return -ENOMEM; } + lock_super(sb); lock_kernel(); sbi = sb->s_fs_info; fscb->s_nextid = cpu_to_le64(sbi->s_nextid); @@ -246,7 +247,17 @@ out: if (or) osd_end_request(or); unlock_kernel(); + unlock_super(sb); kfree(fscb); + return ret; +} + +static void exofs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + exofs_sync_fs(sb, 1); + else + sb->s_dirt = 0; } /* @@ -258,6 +269,11 @@ static void exofs_put_super(struct super_block *sb) int num_pend; struct exofs_sb_info *sbi = sb->s_fs_info; + lock_kernel(); + + if (sb->s_dirt) + exofs_write_super(sb); + /* make sure there are no pending commands */ for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; num_pend = atomic_read(&sbi->s_curr_pending)) { @@ -271,6 +287,8 @@ static void exofs_put_super(struct super_block *sb) osduld_put_device(sbi->s_dev); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } /* @@ -484,6 +502,7 @@ static const struct super_operations exofs_sops = { .delete_inode = exofs_delete_inode, .put_super = exofs_put_super, .write_super = exofs_write_super, + .sync_fs = exofs_sync_fs, .statfs = exofs_statfs, }; diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index e0b2b43c1fdb..f42af45cfd88 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT2_FS) += ext2.o -ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \ +ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2999d72153b7..003500498c22 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -720,5 +720,5 @@ const struct file_operations ext2_dir_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif - .fsync = ext2_sync_file, + .fsync = simple_fsync, }; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 3203042b36ef..f2e5811936d0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -27,7 +27,7 @@ struct ext2_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ @@ -113,9 +113,6 @@ extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); -/* fsync.c */ -extern int ext2_sync_file (struct file *, struct dentry *, int); - /* ialloc.c */ extern struct inode * ext2_new_inode (struct inode *, int); extern void ext2_free_inode (struct inode *); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 45ed07122182..2b9e47dc9222 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -55,7 +55,7 @@ const struct file_operations ext2_file_operations = { .mmap = generic_file_mmap, .open = generic_file_open, .release = ext2_release_file, - .fsync = ext2_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -72,7 +72,7 @@ const struct file_operations ext2_xip_file_operations = { .mmap = xip_file_mmap, .open = generic_file_open, .release = ext2_release_file, - .fsync = ext2_sync_file, + .fsync = simple_fsync, }; #endif diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c deleted file mode 100644 index fc66c93fcb5c..000000000000 --- a/fs/ext2/fsync.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * linux/fs/ext2/fsync.c - * - * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) - * from - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * from - * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * - * ext2fs fsync primitive - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Removed unnecessary code duplication for little endian machines - * and excessive __inline__s. - * Andi Kleen, 1997 - * - * Major simplications and cleanup - we only need to do the metadata, because - * we can depend on generic_block_fdatasync() to sync the data blocks. - */ - -#include "ext2.h" -#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ - - -/* - * File may be NULL when we are called. Perhaps we shouldn't - * even pass file to fsync ? - */ - -int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = ext2_sync_inode(inode); - if (ret == 0) - ret = err; - return ret; -} diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index acf678831103..29ed682061f6 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -41,8 +41,6 @@ MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); -static int ext2_update_inode(struct inode * inode, int do_sync); - /* * Test whether an inode is a fast symlink. */ @@ -66,7 +64,7 @@ void ext2_delete_inode (struct inode * inode) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); - ext2_update_inode(inode, inode_needs_sync(inode)); + ext2_write_inode(inode, inode_needs_sync(inode)); inode->i_size = 0; if (inode->i_blocks) @@ -1337,7 +1335,7 @@ bad_inode: return ERR_PTR(ret); } -static int ext2_update_inode(struct inode * inode, int do_sync) +int ext2_write_inode(struct inode *inode, int do_sync) { struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; @@ -1442,11 +1440,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync) return err; } -int ext2_write_inode(struct inode *inode, int wait) -{ - return ext2_update_inode(inode, wait); -} - int ext2_sync_inode(struct inode *inode) { struct writeback_control wbc = { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e3c748faf2db..458999638c3d 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -42,6 +42,7 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es); static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); +static int ext2_sync_fs(struct super_block *sb, int wait); void ext2_error (struct super_block * sb, const char * function, const char * fmt, ...) @@ -114,6 +115,11 @@ static void ext2_put_super (struct super_block * sb) int i; struct ext2_sb_info *sbi = EXT2_SB(sb); + lock_kernel(); + + if (sb->s_dirt) + ext2_write_super(sb); + ext2_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY)) { struct ext2_super_block *es = sbi->s_es; @@ -135,7 +141,7 @@ static void ext2_put_super (struct super_block * sb) kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; + unlock_kernel(); } static struct kmem_cache * ext2_inode_cachep; @@ -304,6 +310,7 @@ static const struct super_operations ext2_sops = { .delete_inode = ext2_delete_inode, .put_super = ext2_put_super, .write_super = ext2_write_super, + .sync_fs = ext2_sync_fs, .statfs = ext2_statfs, .remount_fs = ext2_remount, .clear_inode = ext2_clear_inode, @@ -1127,25 +1134,36 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) * set s_state to EXT2_VALID_FS after some corrections. */ -void ext2_write_super (struct super_block * sb) +static int ext2_sync_fs(struct super_block *sb, int wait) { - struct ext2_super_block * es; + struct ext2_super_block *es = EXT2_SB(sb)->s_es; + lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) { - es = EXT2_SB(sb)->s_es; - - if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { - ext2_debug ("setting valid to 0\n"); - es->s_state &= cpu_to_le16(~EXT2_VALID_FS); - es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); - es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_mtime = cpu_to_le32(get_seconds()); - ext2_sync_super(sb, es); - } else - ext2_commit_super (sb, es); + if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { + ext2_debug("setting valid to 0\n"); + es->s_state &= cpu_to_le16(~EXT2_VALID_FS); + es->s_free_blocks_count = + cpu_to_le32(ext2_count_free_blocks(sb)); + es->s_free_inodes_count = + cpu_to_le32(ext2_count_free_inodes(sb)); + es->s_mtime = cpu_to_le32(get_seconds()); + ext2_sync_super(sb, es); + } else { + ext2_commit_super(sb, es); } sb->s_dirt = 0; unlock_kernel(); + + return 0; +} + + +void ext2_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ext2_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static int ext2_remount (struct super_block * sb, int * flags, char * data) @@ -1157,6 +1175,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) unsigned long old_sb_flags; int err; + lock_kernel(); + /* Store the old options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; @@ -1192,12 +1212,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + unlock_kernel(); return 0; + } if (*flags & MS_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || - !(sbi->s_mount_state & EXT2_VALID_FS)) + !(sbi->s_mount_state & EXT2_VALID_FS)) { + unlock_kernel(); return 0; + } /* * OK, we are remounting a valid rw partition rdonly, so set * the rdonly flag and then mark the partition as valid again. @@ -1224,12 +1248,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sb->s_flags &= ~MS_RDONLY; } ext2_sync_super(sb, es); + unlock_kernel(); return 0; restore_opts: sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_resuid = old_opts.s_resuid; sbi->s_resgid = old_opts.s_resgid; sb->s_flags = old_sb_flags; + unlock_kernel(); return err; } diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 225202db8974..27967f92e820 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -649,7 +649,7 @@ do_more: count = overflow; goto do_more; } - sb->s_dirt = 1; + error_return: brelse(bitmap_bh); ext3_std_error(sb, err); @@ -1708,7 +1708,6 @@ allocated: if (!fatal) fatal = err; - sb->s_dirt = 1; if (fatal) goto out; diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index dd13d60d524b..b39991285136 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -181,7 +181,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) err = ext3_journal_dirty_metadata(handle, bitmap_bh); if (!fatal) fatal = err; - sb->s_dirt = 1; + error_return: brelse(bitmap_bh); ext3_std_error(sb, fatal); @@ -537,7 +537,6 @@ got: percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); - sb->s_dirt = 1; inode->i_uid = current_fsuid(); if (test_opt (sb, GRPID)) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index fcfa24361856..b0248c6d5d4c 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2960,7 +2960,6 @@ static int ext3_do_update_inode(handle_t *handle, ext3_update_dynamic_rev(sb); EXT3_SET_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_LARGE_FILE); - sb->s_dirt = 1; handle->h_sync = 1; err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 78fdf3836370..8a0b26340b54 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -934,7 +934,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) EXT3_INODES_PER_GROUP(sb)); ext3_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; exit_journal: unlock_super(sb); @@ -1066,7 +1065,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, } es->s_blocks_count = cpu_to_le32(o_blocks_count + add); ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - sb->s_dirt = 1; unlock_super(sb); ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c70d52afb10..26aa64dee6aa 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -67,7 +67,6 @@ static const char *ext3_decode_error(struct super_block * sb, int errno, static int ext3_remount (struct super_block * sb, int * flags, char * data); static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext3_unfreeze(struct super_block *sb); -static void ext3_write_super (struct super_block * sb); static int ext3_freeze(struct super_block *sb); /* @@ -399,6 +398,8 @@ static void ext3_put_super (struct super_block * sb) struct ext3_super_block *es = sbi->s_es; int i, err; + lock_kernel(); + ext3_xattr_put_super(sb); err = journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -447,7 +448,8 @@ static void ext3_put_super (struct super_block * sb) sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; + + unlock_kernel(); } static struct kmem_cache *ext3_inode_cachep; @@ -761,7 +763,6 @@ static const struct super_operations ext3_sops = { .dirty_inode = ext3_dirty_inode, .delete_inode = ext3_delete_inode, .put_super = ext3_put_super, - .write_super = ext3_write_super, .sync_fs = ext3_sync_fs, .freeze_fs = ext3_freeze, .unfreeze_fs = ext3_unfreeze, @@ -1785,7 +1786,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) #else es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif - sb->s_dirt = 1; } if (sbi->s_blocks_per_group > blocksize * 8) { @@ -2265,7 +2265,6 @@ static int ext3_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ ext3_commit_super(sb, es, 1); @@ -2308,7 +2307,6 @@ static int ext3_create_journal(struct super_block * sb, EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); es->s_journal_inum = cpu_to_le32(journal_inum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ ext3_commit_super(sb, es, 1); @@ -2354,7 +2352,6 @@ static void ext3_mark_recovery_complete(struct super_block * sb, if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - sb->s_dirt = 0; ext3_commit_super(sb, es, 1); } unlock_super(sb); @@ -2413,29 +2410,14 @@ int ext3_force_commit(struct super_block *sb) return 0; journal = EXT3_SB(sb)->s_journal; - sb->s_dirt = 0; ret = ext3_journal_force_commit(journal); return ret; } -/* - * Ext3 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this - * point. (We can probably nuke this function altogether, and remove - * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...) - */ -static void ext3_write_super (struct super_block * sb) -{ - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; -} - static int ext3_sync_fs(struct super_block *sb, int wait) { tid_t target; - sb->s_dirt = 0; if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { if (wait) log_wait_commit(EXT3_SB(sb)->s_journal, target); @@ -2451,7 +2433,6 @@ static int ext3_freeze(struct super_block *sb) { int error = 0; journal_t *journal; - sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { journal = EXT3_SB(sb)->s_journal; @@ -2509,7 +2490,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) int i; #endif + lock_kernel(); + /* Store the original options */ + lock_super(sb); old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; old_opts.s_resuid = sbi->s_resuid; @@ -2617,6 +2601,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) old_opts.s_qf_names[i] != sbi->s_qf_names[i]) kfree(old_opts.s_qf_names[i]); #endif + unlock_super(sb); + unlock_kernel(); return 0; restore_opts: sb->s_flags = old_sb_flags; @@ -2633,6 +2619,8 @@ restore_opts: sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } #endif + unlock_super(sb); + unlock_kernel(); return err; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 83b7be849bd5..545e37c4b91e 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -463,7 +463,6 @@ static void ext3_xattr_update_super_block(handle_t *handle, if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); } } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f016707597a7..012c4251397e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -576,6 +576,11 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; + lock_super(sb); + lock_kernel(); + if (sb->s_dirt) + ext4_commit_super(sb, 1); + ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -642,8 +647,6 @@ static void ext4_put_super(struct super_block *sb) unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - lock_super(sb); - lock_kernel(); kfree(sbi->s_blockgroup_lock); kfree(sbi); } @@ -3333,7 +3336,9 @@ int ext4_force_commit(struct super_block *sb) static void ext4_write_super(struct super_block *sb) { + lock_super(sb); ext4_commit_super(sb, 1); + unlock_super(sb); } static int ext4_sync_fs(struct super_block *sb, int wait) @@ -3417,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int i; #endif + lock_kernel(); + /* Store the original options */ + lock_super(sb); old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; old_opts.s_resuid = sbi->s_resuid; @@ -3551,6 +3559,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_qf_names[i] != sbi->s_qf_names[i]) kfree(old_opts.s_qf_names[i]); #endif + unlock_super(sb); + unlock_kernel(); return 0; restore_opts: @@ -3570,6 +3580,8 @@ restore_opts: sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } #endif + unlock_super(sb); + unlock_kernel(); return err; } diff --git a/fs/fat/cache.c b/fs/fat/cache.c index b42602298087..923990e4f16e 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -241,7 +241,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) while (*fclus < cluster) { /* prevent the infinite loop of cluster chain */ if (*fclus > limit) { - fat_fs_panic(sb, "%s: detected the cluster chain loop" + fat_fs_error(sb, "%s: detected the cluster chain loop" " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; @@ -252,7 +252,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus) if (nr < 0) goto out; else if (nr == FAT_ENT_FREE) { - fat_fs_panic(sb, "%s: invalid cluster chain" + fat_fs_error(sb, "%s: invalid cluster chain" " (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); nr = -EIO; @@ -285,7 +285,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) if (ret < 0) return ret; else if (ret == FAT_ENT_EOF) { - fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", + fat_fs_error(sb, "%s: request beyond EOF (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); return -EIO; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 3a7f603b6982..3b8e71b412fd 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -840,7 +840,7 @@ const struct file_operations fat_dir_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = fat_compat_dir_ioctl, #endif - .fsync = file_fsync, + .fsync = fat_file_fsync, }; static int fat_get_short_entry(struct inode *dir, loff_t *pos, @@ -967,7 +967,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) de++; nr_slots--; } - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bh); brelse(bh); @@ -1001,7 +1001,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) de--; nr_slots--; } - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bh); brelse(bh); @@ -1051,7 +1051,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, } memset(bhs[n]->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bhs[n]); - mark_buffer_dirty(bhs[n]); + mark_buffer_dirty_inode(bhs[n], dir); n++; blknr++; @@ -1131,7 +1131,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].size = de[1].size = 0; memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); set_buffer_uptodate(bhs[0]); - mark_buffer_dirty(bhs[0]); + mark_buffer_dirty_inode(bhs[0], dir); err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE); if (err) @@ -1193,7 +1193,7 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, slots += copy; size -= copy; set_buffer_uptodate(bhs[n]); - mark_buffer_dirty(bhs[n]); + mark_buffer_dirty_inode(bhs[n], dir); if (!size) break; n++; @@ -1293,7 +1293,7 @@ found: for (i = 0; i < long_bhs; i++) { int copy = min_t(int, sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); - mark_buffer_dirty(bhs[i]); + mark_buffer_dirty_inode(bhs[i], dir); offset = 0; slots += copy; size -= copy; @@ -1304,7 +1304,7 @@ found: /* Fill the short name slot. */ int copy = min_t(int, sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); - mark_buffer_dirty(bhs[i]); + mark_buffer_dirty_inode(bhs[i], dir); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bhs[i]); } @@ -1334,7 +1334,7 @@ found: goto error_remove; } if (dir->i_size & (sbi->cluster_size - 1)) { - fat_fs_panic(sb, "Odd directory size"); + fat_fs_error(sb, "Odd directory size"); dir->i_size = (dir->i_size + sbi->cluster_size - 1) & ~((loff_t)sbi->cluster_size - 1); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index ea440d65819c..adb0e72a176d 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -17,6 +17,10 @@ #define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ #define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ +#define FAT_ERRORS_CONT 1 /* ignore error and continue */ +#define FAT_ERRORS_PANIC 2 /* panic on error */ +#define FAT_ERRORS_RO 3 /* remount r/o on error */ + struct fat_mount_options { uid_t fs_uid; gid_t fs_gid; @@ -26,6 +30,7 @@ struct fat_mount_options { char *iocharset; /* Charset used for filename input/display */ unsigned short shortname; /* flags for shortname display/create rule */ unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned char errors; /* On error: continue, panic, remount-ro */ unsigned short allow_utime;/* permission for setting the [am]time */ unsigned quiet:1, /* set = fake successful chmods and chowns */ showexec:1, /* set = only set x bit for com/exe/bat */ @@ -74,6 +79,7 @@ struct msdos_sb_info { int fatent_shift; struct fatent_operations *fatent_ops; + struct inode *fat_inode; spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[FAT_HASH_SIZE]; @@ -251,6 +257,7 @@ struct fat_entry { } u; int nr_bhs; struct buffer_head *bhs[2]; + struct inode *fat_inode; }; static inline void fatent_init(struct fat_entry *fatent) @@ -259,6 +266,7 @@ static inline void fatent_init(struct fat_entry *fatent) fatent->entry = 0; fatent->u.ent32_p = NULL; fatent->bhs[0] = fatent->bhs[1] = NULL; + fatent->fat_inode = NULL; } static inline void fatent_set_entry(struct fat_entry *fatent, int entry) @@ -275,6 +283,7 @@ static inline void fatent_brelse(struct fat_entry *fatent) brelse(fatent->bhs[i]); fatent->nr_bhs = 0; fatent->bhs[0] = fatent->bhs[1] = NULL; + fatent->fat_inode = NULL; } extern void fat_ent_access_init(struct super_block *sb); @@ -296,6 +305,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr); extern void fat_truncate(struct inode *inode); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); +extern int fat_file_fsync(struct file *file, struct dentry *dentry, + int datasync); /* fat/inode.c */ extern void fat_attach(struct inode *inode, loff_t i_pos); @@ -310,7 +321,7 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); /* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) +extern void fat_fs_error(struct super_block *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))) __cold; extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index da6eea47872f..a81037721a6f 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -73,6 +73,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, struct buffer_head **bhs = fatent->bhs; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); + fatent->fat_inode = MSDOS_SB(sb)->fat_inode; + bhs[0] = sb_bread(sb, blocknr); if (!bhs[0]) goto err; @@ -103,6 +105,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); + fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", @@ -167,9 +170,9 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) } spin_unlock(&fat12_entry_lock); - mark_buffer_dirty(fatent->bhs[0]); + mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); if (fatent->nr_bhs == 2) - mark_buffer_dirty(fatent->bhs[1]); + mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); } static void fat16_ent_put(struct fat_entry *fatent, int new) @@ -178,7 +181,7 @@ static void fat16_ent_put(struct fat_entry *fatent, int new) new = EOF_FAT16; *fatent->u.ent16_p = cpu_to_le16(new); - mark_buffer_dirty(fatent->bhs[0]); + mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static void fat32_ent_put(struct fat_entry *fatent, int new) @@ -189,7 +192,7 @@ static void fat32_ent_put(struct fat_entry *fatent, int new) WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); - mark_buffer_dirty(fatent->bhs[0]); + mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static int fat12_ent_next(struct fat_entry *fatent) @@ -345,7 +348,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) if (entry < FAT_START_ENT || sbi->max_cluster <= entry) { fatent_brelse(fatent); - fat_fs_panic(sb, "invalid access to FAT (entry 0x%08x)", entry); + fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } @@ -381,7 +384,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, } memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); - mark_buffer_dirty(c_bh); + mark_buffer_dirty_inode(c_bh, sbi->fat_inode); if (sb->s_flags & MS_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); @@ -557,7 +560,7 @@ int fat_free_clusters(struct inode *inode, int cluster) err = cluster; goto error; } else if (cluster == FAT_ENT_FREE) { - fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", + fat_fs_error(sb, "%s: deleting FAT entry beyond EOF", __func__); err = -EIO; goto error; diff --git a/fs/fat/file.c b/fs/fat/file.c index 0a7f4a9918b3..b28ea646ff60 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -18,106 +18,112 @@ #include <linux/security.h> #include "fat.h" -int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) +{ + u32 attr; + + mutex_lock(&inode->i_mutex); + attr = fat_make_attrs(inode); + mutex_unlock(&inode->i_mutex); + + return put_user(attr, user_attr); +} + +static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) { + struct inode *inode = file->f_path.dentry->d_inode; struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); - u32 __user *user_attr = (u32 __user *)arg; + int is_dir = S_ISDIR(inode->i_mode); + u32 attr, oldattr; + struct iattr ia; + int err; - switch (cmd) { - case FAT_IOCTL_GET_ATTRIBUTES: - { - u32 attr; + err = get_user(attr, user_attr); + if (err) + goto out; - mutex_lock(&inode->i_mutex); - attr = fat_make_attrs(inode); - mutex_unlock(&inode->i_mutex); + mutex_lock(&inode->i_mutex); + err = mnt_want_write(file->f_path.mnt); + if (err) + goto out_unlock_inode; - return put_user(attr, user_attr); + /* + * ATTR_VOLUME and ATTR_DIR cannot be changed; this also + * prevents the user from turning us into a VFAT + * longname entry. Also, we obviously can't set + * any of the NTFS attributes in the high 24 bits. + */ + attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); + /* Merge in ATTR_VOLUME and ATTR_DIR */ + attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | + (is_dir ? ATTR_DIR : 0); + oldattr = fat_make_attrs(inode); + + /* Equivalent to a chmod() */ + ia.ia_valid = ATTR_MODE | ATTR_CTIME; + ia.ia_ctime = current_fs_time(inode->i_sb); + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } - case FAT_IOCTL_SET_ATTRIBUTES: - { - u32 attr, oldattr; - int err, is_dir = S_ISDIR(inode->i_mode); - struct iattr ia; - err = get_user(attr, user_attr); - if (err) - return err; + /* The root directory has no attributes */ + if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { + err = -EINVAL; + goto out_drop_write; + } - mutex_lock(&inode->i_mutex); - - err = mnt_want_write(filp->f_path.mnt); - if (err) - goto up_no_drop_write; - - /* - * ATTR_VOLUME and ATTR_DIR cannot be changed; this also - * prevents the user from turning us into a VFAT - * longname entry. Also, we obviously can't set - * any of the NTFS attributes in the high 24 bits. - */ - attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); - /* Merge in ATTR_VOLUME and ATTR_DIR */ - attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | - (is_dir ? ATTR_DIR : 0); - oldattr = fat_make_attrs(inode); - - /* Equivalent to a chmod() */ - ia.ia_valid = ATTR_MODE | ATTR_CTIME; - ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) - ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); - else { - ia.ia_mode = fat_make_mode(sbi, attr, - S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); - } + if (sbi->options.sys_immutable && + ((attr | oldattr) & ATTR_SYS) && + !capable(CAP_LINUX_IMMUTABLE)) { + err = -EPERM; + goto out_drop_write; + } - /* The root directory has no attributes */ - if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { - err = -EINVAL; - goto up; - } + /* + * The security check is questionable... We single + * out the RO attribute for checking by the security + * module, just because it maps to a file mode. + */ + err = security_inode_setattr(file->f_path.dentry, &ia); + if (err) + goto out_drop_write; - if (sbi->options.sys_immutable) { - if ((attr | oldattr) & ATTR_SYS) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto up; - } - } - } + /* This MUST be done before doing anything irreversible... */ + err = fat_setattr(file->f_path.dentry, &ia); + if (err) + goto out_drop_write; + + fsnotify_change(file->f_path.dentry, ia.ia_valid); + if (sbi->options.sys_immutable) { + if (attr & ATTR_SYS) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= S_IMMUTABLE; + } - /* - * The security check is questionable... We single - * out the RO attribute for checking by the security - * module, just because it maps to a file mode. - */ - err = security_inode_setattr(filp->f_path.dentry, &ia); - if (err) - goto up; - - /* This MUST be done before doing anything irreversible... */ - err = fat_setattr(filp->f_path.dentry, &ia); - if (err) - goto up; - - fsnotify_change(filp->f_path.dentry, ia.ia_valid); - if (sbi->options.sys_immutable) { - if (attr & ATTR_SYS) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= S_IMMUTABLE; - } + fat_save_attrs(inode, attr); + mark_inode_dirty(inode); +out_drop_write: + mnt_drop_write(file->f_path.mnt); +out_unlock_inode: + mutex_unlock(&inode->i_mutex); +out: + return err; +} - fat_save_attrs(inode, attr); - mark_inode_dirty(inode); -up: - mnt_drop_write(filp->f_path.mnt); -up_no_drop_write: - mutex_unlock(&inode->i_mutex); - return err; - } +int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + u32 __user *user_attr = (u32 __user *)arg; + + switch (cmd) { + case FAT_IOCTL_GET_ATTRIBUTES: + return fat_ioctl_get_attributes(inode, user_attr); + case FAT_IOCTL_SET_ATTRIBUTES: + return fat_ioctl_set_attributes(filp, user_attr); default: return -ENOTTY; /* Inappropriate ioctl for device */ } @@ -133,6 +139,18 @@ static int fat_file_release(struct inode *inode, struct file *filp) return 0; } +int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int res, err; + + res = simple_fsync(filp, dentry, datasync); + err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); + + return res ? res : err; +} + + const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -142,7 +160,7 @@ const struct file_operations fat_file_operations = { .mmap = generic_file_mmap, .release = fat_file_release, .ioctl = fat_generic_ioctl, - .fsync = file_fsync, + .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, }; @@ -213,7 +231,7 @@ static int fat_free(struct inode *inode, int skip) fatent_brelse(&fatent); return 0; } else if (ret == FAT_ENT_FREE) { - fat_fs_panic(sb, + fat_fs_error(sb, "%s: invalid cluster chain (i_pos %lld)", __func__, MSDOS_I(inode)->i_pos); ret = -EIO; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 296785a0dec8..304b411cb8bc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -76,7 +76,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, return 0; if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { - fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", + fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)", MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); return -EIO; } @@ -441,16 +441,35 @@ static void fat_clear_inode(struct inode *inode) static void fat_write_super(struct super_block *sb) { + lock_super(sb); sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) fat_clusters_flush(sb); + unlock_super(sb); +} + +static int fat_sync_fs(struct super_block *sb, int wait) +{ + lock_super(sb); + fat_clusters_flush(sb); + sb->s_dirt = 0; + unlock_super(sb); + + return 0; } static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + lock_kernel(); + + if (sb->s_dirt) + fat_write_super(sb); + + iput(sbi->fat_inode); + if (sbi->nls_disk) { unload_nls(sbi->nls_disk); sbi->nls_disk = NULL; @@ -467,6 +486,8 @@ static void fat_put_super(struct super_block *sb) sb->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } static struct kmem_cache *fat_inode_cachep; @@ -632,6 +653,7 @@ static const struct super_operations fat_sops = { .delete_inode = fat_delete_inode, .put_super = fat_put_super, .write_super = fat_write_super, + .sync_fs = fat_sync_fs, .statfs = fat_statfs, .clear_inode = fat_clear_inode, .remount_fs = fat_remount, @@ -834,6 +856,12 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",flush"); if (opts->tz_utc) seq_puts(m, ",tz=UTC"); + if (opts->errors == FAT_ERRORS_CONT) + seq_puts(m, ",errors=continue"); + else if (opts->errors == FAT_ERRORS_PANIC) + seq_puts(m, ",errors=panic"); + else + seq_puts(m, ",errors=remount-ro"); return 0; } @@ -846,7 +874,8 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, + Opt_err_panic, Opt_err_ro, Opt_err, }; static const match_table_t fat_tokens = { @@ -869,6 +898,11 @@ static const match_table_t fat_tokens = { {Opt_showexec, "showexec"}, {Opt_debug, "debug"}, {Opt_immutable, "sys_immutable"}, + {Opt_flush, "flush"}, + {Opt_tz_utc, "tz=UTC"}, + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, {Opt_obsolate, "conv=binary"}, {Opt_obsolate, "conv=text"}, {Opt_obsolate, "conv=auto"}, @@ -880,8 +914,6 @@ static const match_table_t fat_tokens = { {Opt_obsolate, "cvf_format=%20s"}, {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, - {Opt_flush, "flush"}, - {Opt_tz_utc, "tz=UTC"}, {Opt_err, NULL}, }; static const match_table_t msdos_tokens = { @@ -951,6 +983,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->numtail = 1; opts->usefree = opts->nocase = 0; opts->tz_utc = 0; + opts->errors = FAT_ERRORS_RO; *debug = 0; if (!options) @@ -1043,6 +1076,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_tz_utc: opts->tz_utc = 1; break; + case Opt_err_cont: + opts->errors = FAT_ERRORS_CONT; + break; + case Opt_err_panic: + opts->errors = FAT_ERRORS_PANIC; + break; + case Opt_err_ro: + opts->errors = FAT_ERRORS_RO; + break; /* msdos specific */ case Opt_dots: @@ -1174,7 +1216,7 @@ static int fat_read_root(struct inode *inode) int fat_fill_super(struct super_block *sb, void *data, int silent, const struct inode_operations *fs_dir_inode_ops, int isvfat) { - struct inode *root_inode = NULL; + struct inode *root_inode = NULL, *fat_inode = NULL; struct buffer_head *bh; struct fat_boot_sector *b; struct msdos_sb_info *sbi; @@ -1414,6 +1456,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, } error = -ENOMEM; + fat_inode = new_inode(sb); + if (!fat_inode) + goto out_fail; + MSDOS_I(fat_inode)->i_pos = 0; + sbi->fat_inode = fat_inode; root_inode = new_inode(sb); if (!root_inode) goto out_fail; @@ -1439,6 +1486,8 @@ out_invalid: " on dev %s.\n", sb->s_id); out_fail: + if (fat_inode) + iput(fat_inode); if (root_inode) iput(root_inode); if (sbi->nls_io) diff --git a/fs/fat/misc.c b/fs/fat/misc.c index ac39ebcc1496..a6c20473dfd7 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -12,14 +12,19 @@ #include "fat.h" /* - * fat_fs_panic reports a severe file system problem and sets the file system - * read-only. The file system can be made writable again by remounting it. + * fat_fs_error reports a file system problem that might indicate fa data + * corruption/inconsistency. Depending on 'errors' mount option the + * panic() is called, or error message is printed FAT and nothing is done, + * or filesystem is remounted read-only (default behavior). + * In case the file system is remounted read-only, it can be made writable + * again by remounting it. */ -void fat_fs_panic(struct super_block *s, const char *fmt, ...) +void fat_fs_error(struct super_block *s, const char *fmt, ...) { + struct fat_mount_options *opts = &MSDOS_SB(s)->options; va_list args; - printk(KERN_ERR "FAT: Filesystem panic (dev %s)\n", s->s_id); + printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); printk(KERN_ERR " "); va_start(args, fmt); @@ -27,13 +32,14 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...) va_end(args); printk("\n"); - if (!(s->s_flags & MS_RDONLY)) { + if (opts->errors == FAT_ERRORS_PANIC) + panic(" FAT fs panic from previous error\n"); + else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { s->s_flags |= MS_RDONLY; printk(KERN_ERR " File system has been set read-only\n"); } } - -EXPORT_SYMBOL_GPL(fat_fs_panic); +EXPORT_SYMBOL_GPL(fat_fs_error); /* Flushes the number of free clusters on FAT32 */ /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ @@ -124,7 +130,7 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_panic(sb, "clusters badly computed (%d != %llu)", + fat_fs_error(sb, "clusters badly computed (%d != %llu)", new_fclus, (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index da3f361a37dd..82f88733b681 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -544,7 +544,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, int start = MSDOS_I(new_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); if (err) @@ -586,7 +586,7 @@ error_dotdot: int start = MSDOS_I(old_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } error_inode: @@ -608,7 +608,7 @@ error_inode: sinfo.bh = NULL; } if (corrupt < 0) { - fat_fs_panic(new_dir->i_sb, + fat_fs_error(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", __func__, sinfo.i_pos); } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a0e00e3a46e9..8d6fdcfd41df 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -965,7 +965,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int start = MSDOS_I(new_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); if (err) @@ -1009,7 +1009,7 @@ error_dotdot: int start = MSDOS_I(old_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } error_inode: @@ -1030,7 +1030,7 @@ error_inode: sinfo.bh = NULL; } if (corrupt < 0) { - fat_fs_panic(new_dir->i_sb, + fat_fs_error(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", __func__, sinfo.i_pos); } diff --git a/fs/file_table.c b/fs/file_table.c index 54018fe48840..334ce39881f8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, */ if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { file_take_write(file); - error = mnt_want_write(mnt); + error = mnt_clone_write(mnt); WARN_ON(error); } return error; @@ -399,6 +399,44 @@ too_bad: return 0; } +/** + * mark_files_ro - mark all files read-only + * @sb: superblock in question + * + * All files are marked read-only. We don't care about pending + * delete files so this should be used in 'force' mode only. + */ +void mark_files_ro(struct super_block *sb) +{ + struct file *f; + +retry: + file_list_lock(); + list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + struct vfsmount *mnt; + if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) + continue; + if (!file_count(f)) + continue; + if (!(f->f_mode & FMODE_WRITE)) + continue; + f->f_mode &= ~FMODE_WRITE; + if (file_check_writeable(f) != 0) + continue; + file_release_write(f); + mnt = mntget(f->f_path.mnt); + file_list_unlock(); + /* + * This can sleep, so we can't hold + * the file_list_lock() spinlock. + */ + mnt_drop_write(mnt); + mntput(mnt); + goto retry; + } + file_list_unlock(); +} + void __init files_init(unsigned long mempages) { int n; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1dacda831577..cdbd1654e4cd 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp) { struct vxfs_sb_info *infp = VXFS_SBI(sbp); + lock_kernel(); + vxfs_put_fake_inode(infp->vsi_fship); vxfs_put_fake_inode(infp->vsi_ilist); vxfs_put_fake_inode(infp->vsi_stilist); brelse(infp->vsi_bp); kfree(infp); + + unlock_kernel(); } /** diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 91013ff7dd53..40308e98c6a4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi) clear_bit(BDI_pdflush, &bdi->state); } +static noinline void block_dump___mark_inode_dirty(struct inode *inode) +{ + if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { + struct dentry *dentry; + const char *name = "?"; + + dentry = d_find_alias(inode); + if (dentry) { + spin_lock(&dentry->d_lock); + name = (const char *) dentry->d_name.name; + } + printk(KERN_DEBUG + "%s(%d): dirtied inode %lu (%s) on %s\n", + current->comm, task_pid_nr(current), inode->i_ino, + name, inode->i_sb->s_id); + if (dentry) { + spin_unlock(&dentry->d_lock); + dput(dentry); + } + } +} + /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) if ((inode->i_state & flags) == flags) return; - if (unlikely(block_dump)) { - struct dentry *dentry = NULL; - const char *name = "?"; - - if (!list_empty(&inode->i_dentry)) { - dentry = list_entry(inode->i_dentry.next, - struct dentry, d_alias); - if (dentry && dentry->d_name.name) - name = (const char *) dentry->d_name.name; - } - - if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) - printk(KERN_DEBUG - "%s(%d): dirtied inode %lu (%s) on %s\n", - current->comm, task_pid_nr(current), inode->i_ino, - name, inode->i_sb->s_id); - } + if (unlikely(block_dump)) + block_dump___mark_inode_dirty(inode); spin_lock(&inode_lock); if ((inode->i_state & flags) != flags) { @@ -289,7 +296,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) int ret; BUG_ON(inode->i_state & I_SYNC); - WARN_ON(inode->i_state & I_NEW); /* Set I_SYNC, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY; @@ -314,7 +320,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } spin_lock(&inode_lock); - WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { if (!(inode->i_state & I_DIRTY) && @@ -679,55 +684,6 @@ void sync_inodes_sb(struct super_block *sb, int wait) } /** - * sync_inodes - writes all inodes to disk - * @wait: wait for completion - * - * sync_inodes() goes through each super block's dirty inode list, writes the - * inodes out, waits on the writeout and puts the inodes back on the normal - * list. - * - * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle - * part of the sync functions is that the blockdev "superblock" is processed - * last. This is because the write_inode() function of a typical fs will - * perform no I/O, but will mark buffers in the blockdev mapping as dirty. - * What we want to do is to perform all that dirtying first, and then write - * back all those inode blocks via the blockdev mapping in one sweep. So the - * additional (somewhat redundant) sync_blockdev() calls here are to make - * sure that really happens. Because if we call sync_inodes_sb(wait=1) with - * outstanding dirty inodes, the writeback goes block-at-a-time within the - * filesystem's write_inode(). This is extremely slow. - */ -static void __sync_inodes(int wait) -{ - struct super_block *sb; - - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) { - sync_inodes_sb(sb, wait); - sync_blockdev(sb->s_bdev); - } - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); -} - -void sync_inodes(int wait) -{ - __sync_inodes(0); - - if (wait) - __sync_inodes(1); -} - -/** * write_inode_now - write an inode to disk * @inode: inode to write to disk * @sync: whether the write should be synchronous or not diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 72437065f6ad..e95eeb445e58 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_FUSE_FS) += fuse.o +obj-$(CONFIG_CUSE) += cuse.o fuse-objs := dev.o dir.o file.o inode.o control.o diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c new file mode 100644 index 000000000000..de792dcf3274 --- /dev/null +++ b/fs/fuse/cuse.c @@ -0,0 +1,610 @@ +/* + * CUSE: Character device in Userspace + * + * Copyright (C) 2008-2009 SUSE Linux Products GmbH + * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + * + * CUSE enables character devices to be implemented from userland much + * like FUSE allows filesystems. On initialization /dev/cuse is + * created. By opening the file and replying to the CUSE_INIT request + * userland CUSE server can create a character device. After that the + * operation is very similar to FUSE. + * + * A CUSE instance involves the following objects. + * + * cuse_conn : contains fuse_conn and serves as bonding structure + * channel : file handle connected to the userland CUSE server + * cdev : the implemented character device + * dev : generic device for cdev + * + * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with + * devices, it's called 'channel' to reduce confusion. + * + * channel determines when the character device dies. When channel is + * closed, everything begins to destruct. The cuse_conn is taken off + * the lookup table preventing further access from cdev, cdev and + * generic device are removed and the base reference of cuse_conn is + * put. + * + * On each open, the matching cuse_conn is looked up and if found an + * additional reference is taken which is released when the file is + * closed. + */ + +#include <linux/fuse.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/kdev_t.h> +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/magic.h> +#include <linux/miscdevice.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/stat.h> + +#include "fuse_i.h" + +#define CUSE_CONNTBL_LEN 64 + +struct cuse_conn { + struct list_head list; /* linked on cuse_conntbl */ + struct fuse_conn fc; /* fuse connection */ + struct cdev *cdev; /* associated character device */ + struct device *dev; /* device representing @cdev */ + + /* init parameters, set once during initialization */ + bool unrestricted_ioctl; +}; + +static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ +static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; +static struct class *cuse_class; + +static struct cuse_conn *fc_to_cc(struct fuse_conn *fc) +{ + return container_of(fc, struct cuse_conn, fc); +} + +static struct list_head *cuse_conntbl_head(dev_t devt) +{ + return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN]; +} + + +/************************************************************************** + * CUSE frontend operations + * + * These are file operations for the character device. + * + * On open, CUSE opens a file from the FUSE mnt and stores it to + * private_data of the open file. All other ops call FUSE ops on the + * FUSE file. + */ + +static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + loff_t pos = 0; + + return fuse_direct_io(file, buf, count, &pos, 0); +} + +static ssize_t cuse_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos = 0; + /* + * No locking or generic_write_checks(), the server is + * responsible for locking and sanity checks. + */ + return fuse_direct_io(file, buf, count, &pos, 1); +} + +static int cuse_open(struct inode *inode, struct file *file) +{ + dev_t devt = inode->i_cdev->dev; + struct cuse_conn *cc = NULL, *pos; + int rc; + + /* look up and get the connection */ + spin_lock(&cuse_lock); + list_for_each_entry(pos, cuse_conntbl_head(devt), list) + if (pos->dev->devt == devt) { + fuse_conn_get(&pos->fc); + cc = pos; + break; + } + spin_unlock(&cuse_lock); + + /* dead? */ + if (!cc) + return -ENODEV; + + /* + * Generic permission check is already done against the chrdev + * file, proceed to open. + */ + rc = fuse_do_open(&cc->fc, 0, file, 0); + if (rc) + fuse_conn_put(&cc->fc); + return rc; +} + +static int cuse_release(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + + fuse_sync_release(ff, file->f_flags); + fuse_conn_put(fc); + + return 0; +} + +static long cuse_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct cuse_conn *cc = fc_to_cc(ff->fc); + unsigned int flags = 0; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct cuse_conn *cc = fc_to_cc(ff->fc); + unsigned int flags = FUSE_IOCTL_COMPAT; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static const struct file_operations cuse_frontend_fops = { + .owner = THIS_MODULE, + .read = cuse_read, + .write = cuse_write, + .open = cuse_open, + .release = cuse_release, + .unlocked_ioctl = cuse_file_ioctl, + .compat_ioctl = cuse_file_compat_ioctl, + .poll = fuse_file_poll, +}; + + +/************************************************************************** + * CUSE channel initialization and destruction + */ + +struct cuse_devinfo { + const char *name; +}; + +/** + * cuse_parse_one - parse one key=value pair + * @pp: i/o parameter for the current position + * @end: points to one past the end of the packed string + * @keyp: out parameter for key + * @valp: out parameter for value + * + * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends + * at @end - 1. This function parses one pair and set *@keyp to the + * start of the key and *@valp to the start of the value. Note that + * the original string is modified such that the key string is + * terminated with '\0'. *@pp is updated to point to the next string. + * + * RETURNS: + * 1 on successful parse, 0 on EOF, -errno on failure. + */ +static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp) +{ + char *p = *pp; + char *key, *val; + + while (p < end && *p == '\0') + p++; + if (p == end) + return 0; + + if (end[-1] != '\0') { + printk(KERN_ERR "CUSE: info not properly terminated\n"); + return -EINVAL; + } + + key = val = p; + p += strlen(p); + + if (valp) { + strsep(&val, "="); + if (!val) + val = key + strlen(key); + key = strstrip(key); + val = strstrip(val); + } else + key = strstrip(key); + + if (!strlen(key)) { + printk(KERN_ERR "CUSE: zero length info key specified\n"); + return -EINVAL; + } + + *pp = p; + *keyp = key; + if (valp) + *valp = val; + + return 1; +} + +/** + * cuse_parse_dev_info - parse device info + * @p: device info string + * @len: length of device info string + * @devinfo: out parameter for parsed device info + * + * Parse @p to extract device info and store it into @devinfo. String + * pointed to by @p is modified by parsing and @devinfo points into + * them, so @p shouldn't be freed while @devinfo is in use. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) +{ + char *end = p + len; + char *key, *val; + int rc; + + while (true) { + rc = cuse_parse_one(&p, end, &key, &val); + if (rc < 0) + return rc; + if (!rc) + break; + if (strcmp(key, "DEVNAME") == 0) + devinfo->name = val; + else + printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n", + key); + } + + if (!devinfo->name || !strlen(devinfo->name)) { + printk(KERN_ERR "CUSE: DEVNAME unspecified\n"); + return -EINVAL; + } + + return 0; +} + +static void cuse_gendev_release(struct device *dev) +{ + kfree(dev); +} + +/** + * cuse_process_init_reply - finish initializing CUSE channel + * + * This function creates the character device and sets up all the + * required data structures for it. Please read the comment at the + * top of this file for high level overview. + */ +static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + struct cuse_conn *cc = fc_to_cc(fc); + struct cuse_init_out *arg = &req->misc.cuse_init_out; + struct page *page = req->pages[0]; + struct cuse_devinfo devinfo = { }; + struct device *dev; + struct cdev *cdev; + dev_t devt; + int rc; + + if (req->out.h.error || + arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { + goto err; + } + + fc->minor = arg->minor; + fc->max_read = max_t(unsigned, arg->max_read, 4096); + fc->max_write = max_t(unsigned, arg->max_write, 4096); + + /* parse init reply */ + cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; + + rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size, + &devinfo); + if (rc) + goto err; + + /* determine and reserve devt */ + devt = MKDEV(arg->dev_major, arg->dev_minor); + if (!MAJOR(devt)) + rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name); + else + rc = register_chrdev_region(devt, 1, devinfo.name); + if (rc) { + printk(KERN_ERR "CUSE: failed to register chrdev region\n"); + goto err; + } + + /* devt determined, create device */ + rc = -ENOMEM; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + goto err_region; + + device_initialize(dev); + dev_set_uevent_suppress(dev, 1); + dev->class = cuse_class; + dev->devt = devt; + dev->release = cuse_gendev_release; + dev_set_drvdata(dev, cc); + dev_set_name(dev, "%s", devinfo.name); + + rc = device_add(dev); + if (rc) + goto err_device; + + /* register cdev */ + rc = -ENOMEM; + cdev = cdev_alloc(); + if (!cdev) + goto err_device; + + cdev->owner = THIS_MODULE; + cdev->ops = &cuse_frontend_fops; + + rc = cdev_add(cdev, devt, 1); + if (rc) + goto err_cdev; + + cc->dev = dev; + cc->cdev = cdev; + + /* make the device available */ + spin_lock(&cuse_lock); + list_add(&cc->list, cuse_conntbl_head(devt)); + spin_unlock(&cuse_lock); + + /* announce device availability */ + dev_set_uevent_suppress(dev, 0); + kobject_uevent(&dev->kobj, KOBJ_ADD); +out: + __free_page(page); + return; + +err_cdev: + cdev_del(cdev); +err_device: + put_device(dev); +err_region: + unregister_chrdev_region(devt, 1); +err: + fc->conn_error = 1; + goto out; +} + +static int cuse_send_init(struct cuse_conn *cc) +{ + int rc; + struct fuse_req *req; + struct page *page; + struct fuse_conn *fc = &cc->fc; + struct cuse_init_in *arg; + + BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); + + req = fuse_get_req(fc); + if (IS_ERR(req)) { + rc = PTR_ERR(req); + goto err; + } + + rc = -ENOMEM; + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto err_put_req; + + arg = &req->misc.cuse_init_in; + arg->major = FUSE_KERNEL_VERSION; + arg->minor = FUSE_KERNEL_MINOR_VERSION; + arg->flags |= CUSE_UNRESTRICTED_IOCTL; + req->in.h.opcode = CUSE_INIT; + req->in.numargs = 1; + req->in.args[0].size = sizeof(struct cuse_init_in); + req->in.args[0].value = arg; + req->out.numargs = 2; + req->out.args[0].size = sizeof(struct cuse_init_out); + req->out.args[0].value = &req->misc.cuse_init_out; + req->out.args[1].size = CUSE_INIT_INFO_MAX; + req->out.argvar = 1; + req->out.argpages = 1; + req->pages[0] = page; + req->num_pages = 1; + req->end = cuse_process_init_reply; + fuse_request_send_background(fc, req); + + return 0; + +err_put_req: + fuse_put_request(fc, req); +err: + return rc; +} + +static void cuse_fc_release(struct fuse_conn *fc) +{ + struct cuse_conn *cc = fc_to_cc(fc); + kfree(cc); +} + +/** + * cuse_channel_open - open method for /dev/cuse + * @inode: inode for /dev/cuse + * @file: file struct being opened + * + * Userland CUSE server can create a CUSE device by opening /dev/cuse + * and replying to the initilaization request kernel sends. This + * function is responsible for handling CUSE device initialization. + * Because the fd opened by this function is used during + * initialization, this function only creates cuse_conn and sends + * init. The rest is delegated to a kthread. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_channel_open(struct inode *inode, struct file *file) +{ + struct cuse_conn *cc; + int rc; + + /* set up cuse_conn */ + cc = kzalloc(sizeof(*cc), GFP_KERNEL); + if (!cc) + return -ENOMEM; + + fuse_conn_init(&cc->fc); + + INIT_LIST_HEAD(&cc->list); + cc->fc.release = cuse_fc_release; + + cc->fc.connected = 1; + cc->fc.blocked = 0; + rc = cuse_send_init(cc); + if (rc) { + fuse_conn_put(&cc->fc); + return rc; + } + file->private_data = &cc->fc; /* channel owns base reference to cc */ + + return 0; +} + +/** + * cuse_channel_release - release method for /dev/cuse + * @inode: inode for /dev/cuse + * @file: file struct being closed + * + * Disconnect the channel, deregister CUSE device and initiate + * destruction by putting the default reference. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int cuse_channel_release(struct inode *inode, struct file *file) +{ + struct cuse_conn *cc = fc_to_cc(file->private_data); + int rc; + + /* remove from the conntbl, no more access from this point on */ + spin_lock(&cuse_lock); + list_del_init(&cc->list); + spin_unlock(&cuse_lock); + + /* remove device */ + if (cc->dev) + device_unregister(cc->dev); + if (cc->cdev) { + unregister_chrdev_region(cc->cdev->dev, 1); + cdev_del(cc->cdev); + } + + /* kill connection and shutdown channel */ + fuse_conn_kill(&cc->fc); + rc = fuse_dev_release(inode, file); /* puts the base reference */ + + return rc; +} + +static struct file_operations cuse_channel_fops; /* initialized during init */ + + +/************************************************************************** + * Misc stuff and module initializatiion + * + * CUSE exports the same set of attributes to sysfs as fusectl. + */ + +static ssize_t cuse_class_waiting_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cuse_conn *cc = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); +} + +static ssize_t cuse_class_abort_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cuse_conn *cc = dev_get_drvdata(dev); + + fuse_abort_conn(&cc->fc); + return count; +} + +static struct device_attribute cuse_class_dev_attrs[] = { + __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL), + __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store), + { } +}; + +static struct miscdevice cuse_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cuse", + .fops = &cuse_channel_fops, +}; + +static int __init cuse_init(void) +{ + int i, rc; + + /* init conntbl */ + for (i = 0; i < CUSE_CONNTBL_LEN; i++) + INIT_LIST_HEAD(&cuse_conntbl[i]); + + /* inherit and extend fuse_dev_operations */ + cuse_channel_fops = fuse_dev_operations; + cuse_channel_fops.owner = THIS_MODULE; + cuse_channel_fops.open = cuse_channel_open; + cuse_channel_fops.release = cuse_channel_release; + + cuse_class = class_create(THIS_MODULE, "cuse"); + if (IS_ERR(cuse_class)) + return PTR_ERR(cuse_class); + + cuse_class->dev_attrs = cuse_class_dev_attrs; + + rc = misc_register(&cuse_miscdev); + if (rc) { + class_destroy(cuse_class); + return rc; + } + + return 0; +} + +static void __exit cuse_exit(void) +{ + misc_deregister(&cuse_miscdev); + class_destroy(cuse_class); +} + +module_init(cuse_init); +module_exit(cuse_exit); + +MODULE_AUTHOR("Tejun Heo <tj@kernel.org>"); +MODULE_DESCRIPTION("Character device in Userspace"); +MODULE_LICENSE("GPL"); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ba76b68c52ff..8fed2ed12f38 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void) fuse_request_init(req); return req; } +EXPORT_SYMBOL_GPL(fuse_request_alloc); struct fuse_req *fuse_request_alloc_nofs(void) { @@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) atomic_dec(&fc->num_waiting); return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(fuse_get_req); /* * Return request in fuse_file->reserved_req. However that may @@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) fuse_request_free(req); } } +EXPORT_SYMBOL_GPL(fuse_put_request); static unsigned len_args(unsigned numargs, struct fuse_arg *args) { @@ -282,7 +285,7 @@ __releases(&fc->lock) wake_up_all(&fc->blocked_waitq); } if (fc->num_background == FUSE_CONGESTION_THRESHOLD && - fc->connected) { + fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, WRITE); } @@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } spin_unlock(&fc->lock); } +EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) @@ -408,7 +412,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, fc->num_background++; if (fc->num_background == FUSE_MAX_BACKGROUND) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { + if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + fc->bdi_initialized) { set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, WRITE); } @@ -439,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) req->isreply = 1; fuse_request_send_nowait(fc, req); } +EXPORT_SYMBOL_GPL(fuse_request_send_background); /* * Called under fc->lock @@ -1105,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc) } spin_unlock(&fc->lock); } +EXPORT_SYMBOL_GPL(fuse_abort_conn); -static int fuse_dev_release(struct inode *inode, struct file *file) +int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_conn *fc = fuse_get_conn(file); if (fc) { @@ -1120,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file) return 0; } +EXPORT_SYMBOL_GPL(fuse_dev_release); static int fuse_dev_fasync(int fd, struct file *file, int on) { @@ -1142,6 +1150,7 @@ const struct file_operations fuse_dev_operations = { .release = fuse_dev_release, .fasync = fuse_dev_fasync, }; +EXPORT_SYMBOL_GPL(fuse_dev_operations); static struct miscdevice fuse_miscdevice = { .minor = FUSE_MINOR, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8b8eebc5614b..b3089a083d30 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } /* - * Synchronous release for the case when something goes wrong in CREATE_OPEN - */ -static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, int flags) -{ - fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); - ff->reserved_req->force = 1; - fuse_request_send(fc, ff->reserved_req); - fuse_put_request(fc, ff->reserved_req); - kfree(ff); -} - -/* * Atomic create+open operation * * If the filesystem doesn't support this, then fall back to separate @@ -445,12 +432,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, goto out_free_ff; fuse_put_request(fc, req); + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); - ff->fh = outopen.fh; - fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_sync_release(ff, flags); fuse_send_forget(fc, forget_req, outentry.nodeid, 1); return -ENOMEM; } @@ -460,11 +449,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_invalidate_attr(dir); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { - ff->fh = outopen.fh; - fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_sync_release(ff, flags); return PTR_ERR(file); } - fuse_finish_open(inode, file, ff, &outopen); + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); return 0; out_free_ff: @@ -1035,7 +1024,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; @@ -1101,12 +1090,14 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) static int fuse_dir_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 1); + return fuse_open_common(inode, file, true); } static int fuse_dir_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 1); + fuse_release_common(file, FUSE_RELEASEDIR); + + return 0; } static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06f30e965676..fce6ce694fde 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -12,13 +12,13 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/module.h> static const struct file_operations fuse_direct_io_file_operations; -static int fuse_send_open(struct inode *inode, struct file *file, int isdir, - struct fuse_open_out *outargp) +static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + int opcode, struct fuse_open_out *outargp) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_open_in inarg; struct fuse_req *req; int err; @@ -31,8 +31,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); if (!fc->atomic_o_trunc) inarg.flags &= ~O_TRUNC; - req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; - req->in.h.nodeid = get_node_id(inode); + req->in.h.opcode = opcode; + req->in.h.nodeid = nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -49,22 +49,27 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; + ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); - if (ff) { - ff->reserved_req = fuse_request_alloc(); - if (!ff->reserved_req) { - kfree(ff); - return NULL; - } else { - INIT_LIST_HEAD(&ff->write_entry); - atomic_set(&ff->count, 0); - spin_lock(&fc->lock); - ff->kh = ++fc->khctr; - spin_unlock(&fc->lock); - } - RB_CLEAR_NODE(&ff->polled_node); - init_waitqueue_head(&ff->poll_wait); + if (unlikely(!ff)) + return NULL; + + ff->fc = fc; + ff->reserved_req = fuse_request_alloc(); + if (unlikely(!ff->reserved_req)) { + kfree(ff); + return NULL; } + + INIT_LIST_HEAD(&ff->write_entry); + atomic_set(&ff->count, 0); + RB_CLEAR_NODE(&ff->polled_node); + init_waitqueue_head(&ff->poll_wait); + + spin_lock(&fc->lock); + ff->kh = ++fc->khctr; + spin_unlock(&fc->lock); + return ff; } @@ -74,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff) kfree(ff); } -static struct fuse_file *fuse_file_get(struct fuse_file *ff) +struct fuse_file *fuse_file_get(struct fuse_file *ff) { atomic_inc(&ff->count); return ff; @@ -82,40 +87,65 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - dput(req->misc.release.dentry); - mntput(req->misc.release.vfsmount); + path_put(&req->misc.release.path); } static void fuse_file_put(struct fuse_file *ff) { if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - struct inode *inode = req->misc.release.dentry->d_inode; - struct fuse_conn *fc = get_fuse_conn(inode); + req->end = fuse_release_end; - fuse_request_send_background(fc, req); + fuse_request_send_background(ff->fc, req); kfree(ff); } } -void fuse_finish_open(struct inode *inode, struct file *file, - struct fuse_file *ff, struct fuse_open_out *outarg) +int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir) { - if (outarg->open_flags & FOPEN_DIRECT_IO) + struct fuse_open_out outarg; + struct fuse_file *ff; + int err; + int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; + + ff = fuse_file_alloc(fc); + if (!ff) + return -ENOMEM; + + err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + if (err) { + fuse_file_free(ff); + return err; + } + + if (isdir) + outarg.open_flags &= ~FOPEN_DIRECT_IO; + + ff->fh = outarg.fh; + ff->nodeid = nodeid; + ff->open_flags = outarg.open_flags; + file->private_data = fuse_file_get(ff); + + return 0; +} +EXPORT_SYMBOL_GPL(fuse_do_open); + +void fuse_finish_open(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + + if (ff->open_flags & FOPEN_DIRECT_IO) file->f_op = &fuse_direct_io_file_operations; - if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) + if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); - if (outarg->open_flags & FOPEN_NONSEEKABLE) + if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); - ff->fh = outarg->fh; - file->private_data = fuse_file_get(ff); } -int fuse_open_common(struct inode *inode, struct file *file, int isdir) +int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_open_out outarg; - struct fuse_file *ff; int err; /* VFS checks this, but only _after_ ->open() */ @@ -126,78 +156,85 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) if (err) return err; - ff = fuse_file_alloc(fc); - if (!ff) - return -ENOMEM; - - err = fuse_send_open(inode, file, isdir, &outarg); + err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (err) - fuse_file_free(ff); - else { - if (isdir) - outarg.open_flags &= ~FOPEN_DIRECT_IO; - fuse_finish_open(inode, file, ff, &outarg); - } + return err; - return err; + fuse_finish_open(inode, file); + + return 0; } -void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) +static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) { + struct fuse_conn *fc = ff->fc; struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release.in; + spin_lock(&fc->lock); + list_del(&ff->write_entry); + if (!RB_EMPTY_NODE(&ff->polled_node)) + rb_erase(&ff->polled_node, &fc->polled_files); + spin_unlock(&fc->lock); + + wake_up_interruptible_sync(&ff->poll_wait); + inarg->fh = ff->fh; inarg->flags = flags; req->in.h.opcode = opcode; - req->in.h.nodeid = nodeid; + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_release_in); req->in.args[0].value = inarg; } -int fuse_release_common(struct inode *inode, struct file *file, int isdir) +void fuse_release_common(struct file *file, int opcode) { - struct fuse_file *ff = file->private_data; - if (ff) { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = ff->reserved_req; - - fuse_release_fill(ff, get_node_id(inode), file->f_flags, - isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); + struct fuse_file *ff; + struct fuse_req *req; - /* Hold vfsmount and dentry until release is finished */ - req->misc.release.vfsmount = mntget(file->f_path.mnt); - req->misc.release.dentry = dget(file->f_path.dentry); + ff = file->private_data; + if (unlikely(!ff)) + return; - spin_lock(&fc->lock); - list_del(&ff->write_entry); - if (!RB_EMPTY_NODE(&ff->polled_node)) - rb_erase(&ff->polled_node, &fc->polled_files); - spin_unlock(&fc->lock); + req = ff->reserved_req; + fuse_prepare_release(ff, file->f_flags, opcode); - wake_up_interruptible_sync(&ff->poll_wait); - /* - * Normally this will send the RELEASE request, - * however if some asynchronous READ or WRITE requests - * are outstanding, the sending will be delayed - */ - fuse_file_put(ff); - } + /* Hold vfsmount and dentry until release is finished */ + path_get(&file->f_path); + req->misc.release.path = file->f_path; - /* Return value is ignored by VFS */ - return 0; + /* + * Normally this will send the RELEASE request, however if + * some asynchronous READ or WRITE requests are outstanding, + * the sending will be delayed. + */ + fuse_file_put(ff); } static int fuse_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 0); + return fuse_open_common(inode, file, false); } static int fuse_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 0); + fuse_release_common(file, FUSE_RELEASE); + + /* return value is ignored by VFS */ + return 0; +} + +void fuse_sync_release(struct fuse_file *ff, int flags) +{ + WARN_ON(atomic_read(&ff->count) > 1); + fuse_prepare_release(ff, flags, FUSE_RELEASE); + ff->reserved_req->force = 1; + fuse_request_send(ff->fc, ff->reserved_req); + fuse_put_request(ff->fc, ff->reserved_req); + kfree(ff); } +EXPORT_SYMBOL_GPL(fuse_sync_release); /* * Scramble the ID space with XTEA, so that the value of the files_struct @@ -371,8 +408,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) return fuse_fsync_common(file, de, datasync, 0); } -void fuse_read_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, int opcode) +void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, + size_t count, int opcode) { struct fuse_read_in *inarg = &req->misc.read.in; struct fuse_file *ff = file->private_data; @@ -382,7 +419,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, inarg->size = count; inarg->flags = file->f_flags; req->in.h.opcode = opcode; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_read_in); req->in.args[0].value = inarg; @@ -392,12 +429,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, } static size_t fuse_send_read(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - fl_owner_t owner) + loff_t pos, size_t count, fl_owner_t owner) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; - fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + fuse_read_fill(req, file, pos, count, FUSE_READ); if (owner != NULL) { struct fuse_read_in *inarg = &req->misc.read.in; @@ -455,7 +492,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - num_read = fuse_send_read(req, file, inode, pos, count, NULL); + num_read = fuse_send_read(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -504,19 +541,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) fuse_file_put(req->ff); } -static void fuse_send_readpages(struct fuse_req *req, struct file *file, - struct inode *inode) +static void fuse_send_readpages(struct fuse_req *req, struct file *file) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; loff_t pos = page_offset(req->pages[0]); size_t count = req->num_pages << PAGE_CACHE_SHIFT; req->out.argpages = 1; req->out.page_zeroing = 1; - fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + fuse_read_fill(req, file, pos, count, FUSE_READ); req->misc.read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { - struct fuse_file *ff = file->private_data; req->ff = fuse_file_get(ff); req->end = fuse_readpages_end; fuse_request_send_background(fc, req); @@ -546,7 +582,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { - fuse_send_readpages(req, data->file, inode); + fuse_send_readpages(req, data->file); data->req = req = fuse_get_req(fc); if (IS_ERR(req)) { unlock_page(page); @@ -580,7 +616,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { if (data.req->num_pages) - fuse_send_readpages(data.req, file, inode); + fuse_send_readpages(data.req, file); else fuse_put_request(fc, data.req); } @@ -607,24 +643,19 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_read(iocb, iov, nr_segs, pos); } -static void fuse_write_fill(struct fuse_req *req, struct file *file, - struct fuse_file *ff, struct inode *inode, - loff_t pos, size_t count, int writepage) +static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, + loff_t pos, size_t count) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_out *outarg = &req->misc.write.out; - memset(inarg, 0, sizeof(struct fuse_write_in)); inarg->fh = ff->fh; inarg->offset = pos; inarg->size = count; - inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; - inarg->flags = file ? file->f_flags : 0; req->in.h.opcode = FUSE_WRITE; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 2; - if (fc->minor < 9) + if (ff->fc->minor < 9) req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; else req->in.args[0].size = sizeof(struct fuse_write_in); @@ -636,13 +667,15 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, } static size_t fuse_send_write(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - fl_owner_t owner) + loff_t pos, size_t count, fl_owner_t owner) { - struct fuse_conn *fc = get_fuse_conn(inode); - fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + struct fuse_write_in *inarg = &req->misc.write.in; + + fuse_write_fill(req, ff, pos, count); + inarg->flags = file->f_flags; if (owner != NULL) { - struct fuse_write_in *inarg = &req->misc.write.in; inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } @@ -700,7 +733,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, req->num_pages = 1; req->pages[0] = page; req->page_offset = offset; - nres = fuse_send_write(req, file, inode, pos, count, NULL); + nres = fuse_send_write(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); if (!err && !nres) @@ -741,7 +774,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); - res = fuse_send_write(req, file, inode, pos, count, NULL); + res = fuse_send_write(req, file, pos, count, NULL); offset = req->page_offset; count = res; @@ -979,25 +1012,23 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, return 0; } -static ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write) { - struct inode *inode = file->f_path.dentry->d_inode; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; ssize_t res = 0; struct fuse_req *req; - if (is_bad_inode(inode)) - return -EIO; - req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); while (count) { size_t nres; + fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); int err = fuse_get_user_pages(req, buf, &nbytes, write); if (err) { @@ -1006,11 +1037,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } if (write) - nres = fuse_send_write(req, file, inode, pos, nbytes, - current->files); + nres = fuse_send_write(req, file, pos, nbytes, owner); else - nres = fuse_send_read(req, file, inode, pos, nbytes, - current->files); + nres = fuse_send_read(req, file, pos, nbytes, owner); + fuse_release_user_pages(req, !write); if (req->out.h.error) { if (!res) @@ -1034,20 +1064,27 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } } fuse_put_request(fc, req); - if (res > 0) { - if (write) - fuse_write_update_size(inode, pos); + if (res > 0) *ppos = pos; - } - fuse_invalidate_attr(inode); return res; } +EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - return fuse_direct_io(file, buf, count, ppos, 0); + ssize_t res; + struct inode *inode = file->f_path.dentry->d_inode; + + if (is_bad_inode(inode)) + return -EIO; + + res = fuse_direct_io(file, buf, count, ppos, 0); + + fuse_invalidate_attr(inode); + + return res; } static ssize_t fuse_direct_write(struct file *file, const char __user *buf, @@ -1055,12 +1092,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, { struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; + + if (is_bad_inode(inode)) + return -EIO; + /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); res = generic_write_checks(file, ppos, &count, 0); - if (!res) + if (!res) { res = fuse_direct_io(file, buf, count, ppos, 1); + if (res > 0) + fuse_write_update_size(inode, *ppos); + } mutex_unlock(&inode->i_mutex); + + fuse_invalidate_attr(inode); + return res; } @@ -1177,9 +1224,10 @@ static int fuse_writepage_locked(struct page *page) req->ff = fuse_file_get(ff); spin_unlock(&fc->lock); - fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); + fuse_write_fill(req, ff, page_offset(page), 0); copy_highpage(tmp_page, page); + req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; @@ -1603,12 +1651,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, * limits ioctl data transfers to well-formed ioctls and is the forced * behavior for all FUSE servers. */ -static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags) { - struct inode *inode = file->f_dentry->d_inode; struct fuse_file *ff = file->private_data; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_conn *fc = ff->fc; struct fuse_ioctl_in inarg = { .fh = ff->fh, .cmd = cmd, @@ -1627,13 +1674,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, /* assume all the iovs returned by client always fits in a page */ BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); - if (!fuse_allow_task(fc, current)) - return -EACCES; - - err = -EIO; - if (is_bad_inode(inode)) - goto out; - err = -ENOMEM; pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); iov_page = alloc_page(GFP_KERNEL); @@ -1694,7 +1734,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1777,17 +1817,33 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, return err ? err : outarg.result; } +EXPORT_SYMBOL_GPL(fuse_do_ioctl); + +static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fuse_allow_task(fc, current)) + return -EACCES; + + if (is_bad_inode(inode)) + return -EIO; + + return fuse_do_ioctl(file, cmd, arg, flags); +} static long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_do_ioctl(file, cmd, arg, 0); + return fuse_file_ioctl_common(file, cmd, arg, 0); } static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT); + return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); } /* @@ -1841,11 +1897,10 @@ static void fuse_register_polled_file(struct fuse_conn *fc, spin_unlock(&fc->lock); } -static unsigned fuse_file_poll(struct file *file, poll_table *wait) +unsigned fuse_file_poll(struct file *file, poll_table *wait) { - struct inode *inode = file->f_dentry->d_inode; struct fuse_file *ff = file->private_data; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_conn *fc = ff->fc; struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; struct fuse_poll_out outarg; struct fuse_req *req; @@ -1870,7 +1925,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) return PTR_ERR(req); req->in.h.opcode = FUSE_POLL; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1889,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) } return POLLERR; } +EXPORT_SYMBOL_GPL(fuse_file_poll); /* * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6fc5aedaa0d5..aaf2f9ff970e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -97,8 +97,13 @@ struct fuse_inode { struct list_head writepages; }; +struct fuse_conn; + /** FUSE specific file data */ struct fuse_file { + /** Fuse connection for this file */ + struct fuse_conn *fc; + /** Request reserved for flush and release */ struct fuse_req *reserved_req; @@ -108,9 +113,15 @@ struct fuse_file { /** File handle used by userspace */ u64 fh; + /** Node id of this file */ + u64 nodeid; + /** Refcount */ atomic_t count; + /** FOPEN_* flags returned by open */ + u32 open_flags; + /** Entry on inode's write_files list */ struct list_head write_entry; @@ -185,8 +196,6 @@ enum fuse_req_state { FUSE_REQ_FINISHED }; -struct fuse_conn; - /** * A request to the client */ @@ -248,11 +257,12 @@ struct fuse_req { struct fuse_forget_in forget_in; struct { struct fuse_release_in in; - struct vfsmount *vfsmount; - struct dentry *dentry; + struct path path; } release; struct fuse_init_in init_in; struct fuse_init_out init_out; + struct cuse_init_in cuse_init_in; + struct cuse_init_out cuse_init_out; struct { struct fuse_read_in in; u64 attr_ver; @@ -386,6 +396,9 @@ struct fuse_conn { /** Filesystem supports NFS exporting. Only set in INIT */ unsigned export_support:1; + /** Set if bdi is valid */ + unsigned bdi_initialized:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -515,25 +528,24 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, * Initialize READ or READDIR request */ void fuse_read_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, int opcode); + loff_t pos, size_t count, int opcode); /** * Send OPEN or OPENDIR request */ -int fuse_open_common(struct inode *inode, struct file *file, int isdir); +int fuse_open_common(struct inode *inode, struct file *file, bool isdir); struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); +struct fuse_file *fuse_file_get(struct fuse_file *ff); void fuse_file_free(struct fuse_file *ff); -void fuse_finish_open(struct inode *inode, struct file *file, - struct fuse_file *ff, struct fuse_open_out *outarg); +void fuse_finish_open(struct inode *inode, struct file *file); -/** Fill in ff->reserved_req with a RELEASE request */ -void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode); +void fuse_sync_release(struct fuse_file *ff, int flags); /** * Send RELEASE or RELEASEDIR request */ -int fuse_release_common(struct inode *inode, struct file *file, int isdir); +void fuse_release_common(struct file *file, int opcode); /** * Send FSYNC or FSYNCDIR request @@ -652,10 +664,12 @@ void fuse_invalidate_entry_cache(struct dentry *entry); */ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); +void fuse_conn_kill(struct fuse_conn *fc); + /** * Initialize fuse_conn */ -int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb); +void fuse_conn_init(struct fuse_conn *fc); /** * Release reference to fuse_conn @@ -694,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir); +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write); +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags); +unsigned fuse_file_poll(struct file *file, poll_table *wait); +int fuse_dev_release(struct inode *inode, struct file *file); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 91f7c85f1ffd..f0df55a52929 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -277,11 +277,14 @@ static void fuse_send_destroy(struct fuse_conn *fc) } } -static void fuse_put_super(struct super_block *sb) +static void fuse_bdi_destroy(struct fuse_conn *fc) { - struct fuse_conn *fc = get_fuse_conn_super(sb); + if (fc->bdi_initialized) + bdi_destroy(&fc->bdi); +} - fuse_send_destroy(fc); +void fuse_conn_kill(struct fuse_conn *fc) +{ spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; @@ -295,7 +298,16 @@ static void fuse_put_super(struct super_block *sb) list_del(&fc->entry); fuse_ctl_remove_conn(fc); mutex_unlock(&fuse_mutex); - bdi_destroy(&fc->bdi); + fuse_bdi_destroy(fc); +} +EXPORT_SYMBOL_GPL(fuse_conn_kill); + +static void fuse_put_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + fuse_send_destroy(fc); + fuse_conn_kill(fc); fuse_conn_put(fc); } @@ -466,10 +478,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) +void fuse_conn_init(struct fuse_conn *fc) { - int err; - memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); @@ -484,49 +494,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); - fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - fc->bdi.unplug_io_fn = default_unplug_io_fn; - /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; fc->khctr = 0; fc->polled_files = RB_ROOT; - fc->dev = sb->s_dev; - err = bdi_init(&fc->bdi); - if (err) - goto error_mutex_destroy; - if (sb->s_bdev) { - err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", - MAJOR(fc->dev), MINOR(fc->dev)); - } else { - err = bdi_register_dev(&fc->bdi, fc->dev); - } - if (err) - goto error_bdi_destroy; - /* - * For a single fuse filesystem use max 1% of dirty + - * writeback threshold. - * - * This gives about 1M of write buffer for memory maps on a - * machine with 1G and 10% dirty_ratio, which should be more - * than enough. - * - * Privileged users can raise it by writing to - * - * /sys/class/bdi/<bdi>/max_ratio - */ - bdi_set_max_ratio(&fc->bdi, 1); fc->reqctr = 0; fc->blocked = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); - - return 0; - - error_bdi_destroy: - bdi_destroy(&fc->bdi); - error_mutex_destroy: - mutex_destroy(&fc->inst_mutex); - return err; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -539,12 +512,14 @@ void fuse_conn_put(struct fuse_conn *fc) fc->release(fc); } } +EXPORT_SYMBOL_GPL(fuse_conn_put); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) { atomic_inc(&fc->count); return fc; } +EXPORT_SYMBOL_GPL(fuse_conn_get); static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) { @@ -797,6 +772,48 @@ static void fuse_free_conn(struct fuse_conn *fc) kfree(fc); } +static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) +{ + int err; + + fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + fc->bdi.unplug_io_fn = default_unplug_io_fn; + /* fuse does it's own writeback accounting */ + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + + err = bdi_init(&fc->bdi); + if (err) + return err; + + fc->bdi_initialized = 1; + + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); + } + + if (err) + return err; + + /* + * For a single fuse filesystem use max 1% of dirty + + * writeback threshold. + * + * This gives about 1M of write buffer for memory maps on a + * machine with 1G and 10% dirty_ratio, which should be more + * than enough. + * + * Privileged users can raise it by writing to + * + * /sys/class/bdi/<bdi>/max_ratio + */ + bdi_set_max_ratio(&fc->bdi, 1); + + return 0; +} + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_conn *fc; @@ -843,11 +860,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!fc) goto err_fput; - err = fuse_conn_init(fc, sb); - if (err) { - kfree(fc); - goto err_fput; - } + fuse_conn_init(fc); + + fc->dev = sb->s_dev; + err = fuse_bdi_init(fc, sb); + if (err) + goto err_put_conn; fc->release = fuse_free_conn; fc->flags = d.flags; @@ -911,7 +929,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_put_root: dput(root_dentry); err_put_conn: - bdi_destroy(&fc->bdi); + fuse_bdi_destroy(fc); fuse_conn_put(fc); err_fput: fput(file); diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index d53a9bea1c2f..3da2f1f4f738 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,3 +1,4 @@ +EXTRA_CFLAGS := -I$(src) obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 329763530dc0..6d47379e794b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -25,6 +25,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to @@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); clear_buffer_boundary(bh_map); + trace_gfs2_bmap(ip, bh_map, lblock, create, 1); if (gfs2_is_dir(ip)) { bsize = sdp->sd_jbsize; arr = sdp->sd_jheightsize; @@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, ret = 0; out: release_metapath(&mp); + trace_gfs2_bmap(ip, bh_map, lblock, create, ret); bmap_unlock(ip, create); return ret; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2bf62bcc5181..297421c0427a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -39,6 +39,8 @@ #include "super.h" #include "util.h" #include "bmap.h" +#define CREATE_TRACE_POINTS +#include "trace_gfs2.h" struct gfs2_gl_hash_bucket { struct hlist_head hb_list; @@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl) if (aspace) gfs2_aspace_put(aspace); - + trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); } @@ -317,14 +319,17 @@ restart: return 2; gh->gh_error = ret; list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 1); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 0); gfs2_holder_wake(gh); continue; } @@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) else continue; list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); } } @@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) int rv; spin_lock(&gl->gl_spin); + trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, gl->gl_demote_state != state) { gl->gl_demote_state = LM_ST_UNLOCKED; } + trace_gfs2_demote_rq(gl); } /** @@ -936,6 +944,7 @@ fail: goto do_cancel; return; } + trace_gfs2_glock_queue(gh, 1); list_add_tail(&gh->gh_list, insert_pt); do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); @@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) return; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index aa62cf5976e8..13c6237c5f67 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -28,6 +28,7 @@ #include "meta_io.h" #include "util.h" #include "dir.h" +#include "trace_gfs2.h" #define PULL 1 @@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) gfs2_log_lock(sdp); } atomic_sub(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, -blks); gfs2_log_unlock(sdp); mutex_unlock(&sdp->sd_log_reserve_mutex); @@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) gfs2_log_lock(sdp); atomic_add(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, blks); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) gfs2_log_lock(sdp); atomic_add(dist, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, dist); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); gfs2_log_unlock(sdp); @@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) up_write(&sdp->sd_log_flush_lock); return; } + trace_gfs2_log_flush(sdp, 1); ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ai->ai_ail1_list); @@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ gfs2_log_lock(sdp); atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ + trace_gfs2_log_blocks(sdp, -1); gfs2_log_unlock(sdp); log_write_header(sdp, 0, PULL); } @@ -763,8 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) ai = NULL; } gfs2_log_unlock(sdp); - - sdp->sd_vfs->s_dirt = 0; + trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); kfree(ai); @@ -788,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; atomic_add(unused, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, unused); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); sdp->sd_log_blks_reserved = reserved; @@ -823,7 +829,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) log_refund(sdp, tr); buf_lo_incore_commit(sdp, tr); - sdp->sd_vfs->s_dirt = 1; up_read(&sdp->sd_log_flush_lock); gfs2_log_lock(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 00315f50fa46..9969ff062c5b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -27,6 +27,7 @@ #include "rgrp.h" #include "trans.h" #include "util.h" +#include "trace_gfs2.h" /** * gfs2_pin - Pin a buffer in memory @@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (bd->bd_ail) list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); get_bh(bh); + trace_gfs2_pin(bd, 1); } /** @@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, bd->bd_ail = ai; list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + trace_gfs2_pin(bd, 0); gfs2_log_unlock(sdp); unlock_buffer(bh); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index cc34f271b3e7..7bc3c45cd676 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -33,6 +33,7 @@ #include "log.h" #include "quota.h" #include "dir.h" +#include "trace_gfs2.h" #define DO 0 #define UNDO 1 @@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) /* Map the extents for this journal's blocks */ map_journal_extents(sdp); } + trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); if (sdp->sd_lockstruct.ls_first) { unsigned int x; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index de3239731db8..daa4ae341a29 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -29,6 +29,7 @@ #include "util.h" #include "log.h" #include "inode.h" +#include "trace_gfs2.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) @@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - + trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); *bn = block; return 0; @@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone--; spin_unlock(&sdp->sd_rindex_spin); - + trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); return block; } @@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - + trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode) rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); if (!rgd) return; + trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_trans_add_rg(rgd); @@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { gfs2_free_uninit_di(rgd, ip->i_no_addr); + trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, ip->i_no_addr, 1); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 40bcc37e5a70..0a6801336470 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -788,17 +788,6 @@ restart: } /** - * gfs2_write_super - * @sb: the superblock - * - */ - -static void gfs2_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - -/** * gfs2_sync_fs - sync the filesystem * @sb: the superblock * @@ -807,7 +796,6 @@ static void gfs2_write_super(struct super_block *sb) static int gfs2_sync_fs(struct super_block *sb, int wait) { - sb->s_dirt = 0; if (wait && sb->s_fs_info) gfs2_log_flush(sb->s_fs_info, NULL); return 0; @@ -1324,7 +1312,6 @@ const struct super_operations gfs2_super_ops = { .write_inode = gfs2_write_inode, .delete_inode = gfs2_delete_inode, .put_super = gfs2_put_super, - .write_super = gfs2_write_super, .sync_fs = gfs2_sync_fs, .freeze_fs = gfs2_freeze, .unfreeze_fs = gfs2_unfreeze, diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h new file mode 100644 index 000000000000..98d6ef1c1dc0 --- /dev/null +++ b/fs/gfs2/trace_gfs2.h @@ -0,0 +1,407 @@ +#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GFS2_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gfs2 +#define TRACE_INCLUDE_FILE trace_gfs2 + +#include <linux/fs.h> +#include <linux/buffer_head.h> +#include <linux/dlmconstants.h> +#include <linux/gfs2_ondisk.h> +#include "incore.h" +#include "glock.h" + +#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } +#define glock_trace_name(x) __print_symbolic(x, \ + dlm_state_name(IV), \ + dlm_state_name(NL), \ + dlm_state_name(CR), \ + dlm_state_name(CW), \ + dlm_state_name(PR), \ + dlm_state_name(PW), \ + dlm_state_name(EX)) + +#define block_state_name(x) __print_symbolic(x, \ + { GFS2_BLKST_FREE, "free" }, \ + { GFS2_BLKST_USED, "used" }, \ + { GFS2_BLKST_DINODE, "dinode" }, \ + { GFS2_BLKST_UNLINKED, "unlinked" }) + +#define show_glock_flags(flags) __print_flags(flags, "", \ + {(1UL << GLF_LOCK), "l" }, \ + {(1UL << GLF_DEMOTE), "D" }, \ + {(1UL << GLF_PENDING_DEMOTE), "d" }, \ + {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \ + {(1UL << GLF_DIRTY), "y" }, \ + {(1UL << GLF_LFLUSH), "f" }, \ + {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ + {(1UL << GLF_REPLY_PENDING), "r" }, \ + {(1UL << GLF_INITIAL), "I" }, \ + {(1UL << GLF_FROZEN), "F" }) + +#ifndef NUMPTY +#define NUMPTY +static inline u8 glock_trace_state(unsigned int state) +{ + switch(state) { + case LM_ST_SHARED: + return DLM_LOCK_PR; + case LM_ST_DEFERRED: + return DLM_LOCK_CW; + case LM_ST_EXCLUSIVE: + return DLM_LOCK_EX; + } + return DLM_LOCK_NL; +} +#endif + +/* Section 1 - Locking + * + * Objectives: + * Latency: Remote demote request to state change + * Latency: Local lock request to state change + * Latency: State change to lock grant + * Correctness: Ordering of local lock state vs. I/O requests + * Correctness: Responses to remote demote requests + */ + +/* General glock state change (DLM lock request completes) */ +TRACE_EVENT(gfs2_glock_state_change, + + TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state), + + TP_ARGS(gl, new_state), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( u8, new_state ) + __field( u8, dmt_state ) + __field( u8, tgt_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gl->gl_name.ln_number; + __entry->gltype = gl->gl_name.ln_type; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->new_state = glock_trace_state(new_state); + __entry->tgt_state = glock_trace_state(gl->gl_target); + __entry->dmt_state = glock_trace_state(gl->gl_demote_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(__entry->new_state), + glock_trace_name(__entry->tgt_state), + glock_trace_name(__entry->dmt_state), + show_glock_flags(__entry->flags)) +); + +/* State change -> unlocked, glock is being deallocated */ +TRACE_EVENT(gfs2_glock_put, + + TP_PROTO(const struct gfs2_glock *gl), + + TP_ARGS(gl), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->gltype = gl->gl_name.ln_type; + __entry->glnum = gl->gl_name.ln_number; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->gltype, (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(DLM_LOCK_IV), + show_glock_flags(__entry->flags)) + +); + +/* Callback (local or remote) requesting lock demotion */ +TRACE_EVENT(gfs2_demote_rq, + + TP_PROTO(const struct gfs2_glock *gl), + + TP_ARGS(gl), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( u8, cur_state ) + __field( u8, dmt_state ) + __field( unsigned long, flags ) + ), + + TP_fast_assign( + __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->gltype = gl->gl_name.ln_type; + __entry->glnum = gl->gl_name.ln_number; + __entry->cur_state = glock_trace_state(gl->gl_state); + __entry->dmt_state = glock_trace_state(gl->gl_demote_state); + __entry->flags = gl->gl_flags; + ), + + TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + glock_trace_name(__entry->cur_state), + glock_trace_name(__entry->dmt_state), + show_glock_flags(__entry->flags)) + +); + +/* Promotion/grant of a glock */ +TRACE_EVENT(gfs2_promote, + + TP_PROTO(const struct gfs2_holder *gh, int first), + + TP_ARGS(gh, first), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, first ) + __field( u8, state ) + ), + + TP_fast_assign( + __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gh->gh_gl->gl_name.ln_number; + __entry->gltype = gh->gh_gl->gl_name.ln_type; + __entry->first = first; + __entry->state = glock_trace_state(gh->gh_state); + ), + + TP_printk("%u,%u glock %u:%llu promote %s %s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->first ? "first": "other", + glock_trace_name(__entry->state)) +); + +/* Queue/dequeue a lock request */ +TRACE_EVENT(gfs2_glock_queue, + + TP_PROTO(const struct gfs2_holder *gh, int queue), + + TP_ARGS(gh, queue), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, glnum ) + __field( u32, gltype ) + __field( int, queue ) + __field( u8, state ) + ), + + TP_fast_assign( + __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->glnum = gh->gh_gl->gl_name.ln_number; + __entry->gltype = gh->gh_gl->gl_name.ln_type; + __entry->queue = queue; + __entry->state = glock_trace_state(gh->gh_state); + ), + + TP_printk("%u,%u glock %u:%llu %squeue %s", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, + (unsigned long long)__entry->glnum, + __entry->queue ? "" : "de", + glock_trace_name(__entry->state)) +); + +/* Section 2 - Log/journal + * + * Objectives: + * Latency: Log flush time + * Correctness: pin/unpin vs. disk I/O ordering + * Performance: Log usage stats + */ + +/* Pin/unpin a block in the log */ +TRACE_EVENT(gfs2_pin, + + TP_PROTO(const struct gfs2_bufdata *bd, int pin), + + TP_ARGS(bd, pin), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, pin ) + __field( u32, len ) + __field( sector_t, block ) + __field( u64, ino ) + ), + + TP_fast_assign( + __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev; + __entry->pin = pin; + __entry->len = bd->bd_bh->b_size; + __entry->block = bd->bd_bh->b_blocknr; + __entry->ino = bd->bd_gl->gl_name.ln_number; + ), + + TP_printk("%u,%u log %s %llu/%lu inode %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->pin ? "pin" : "unpin", + (unsigned long long)__entry->block, + (unsigned long)__entry->len, + (unsigned long long)__entry->ino) +); + +/* Flushing the log */ +TRACE_EVENT(gfs2_log_flush, + + TP_PROTO(const struct gfs2_sbd *sdp, int start), + + TP_ARGS(sdp, start), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, start ) + __field( u64, log_seq ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->start = start; + __entry->log_seq = sdp->sd_log_sequence; + ), + + TP_printk("%u,%u log flush %s %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->start ? "start" : "end", + (unsigned long long)__entry->log_seq) +); + +/* Reserving/releasing blocks in the log */ +TRACE_EVENT(gfs2_log_blocks, + + TP_PROTO(const struct gfs2_sbd *sdp, int blocks), + + TP_ARGS(sdp, blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, blocks ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->blocks = blocks; + ), + + TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->blocks) +); + +/* Section 3 - bmap + * + * Objectives: + * Latency: Bmap request time + * Performance: Block allocator tracing + * Correctness: Test of disard generation vs. blocks allocated + */ + +/* Map an extent of blocks, possibly a new allocation */ +TRACE_EVENT(gfs2_bmap, + + TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh, + sector_t lblock, int create, int errno), + + TP_ARGS(ip, bh, lblock, create, errno), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, lblock ) + __field( sector_t, pblock ) + __field( u64, inum ) + __field( unsigned long, state ) + __field( u32, len ) + __field( int, create ) + __field( int, errno ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->lblock = lblock; + __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0; + __entry->inum = ip->i_no_addr; + __entry->state = bh->b_state; + __entry->len = bh->b_size; + __entry->create = create; + __entry->errno = errno; + ), + + TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->lblock, + (unsigned long)__entry->len, + (unsigned long long)__entry->pblock, + __entry->state, __entry->create ? "create " : "nocreate", + __entry->errno) +); + +/* Keep track of blocks as they are allocated/freed */ +TRACE_EVENT(gfs2_block_alloc, + + TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len, + u8 block_state), + + TP_ARGS(ip, block, len, block_state), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, start ) + __field( u64, inum ) + __field( u32, len ) + __field( u8, block_state ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->start = block; + __entry->inum = ip->i_no_addr; + __entry->len = len; + __entry->block_state = block_state; + ), + + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->start, + (unsigned long)__entry->len, + block_state_name(__entry->block_state)) +); + +#endif /* _TRACE_GFS2_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> + diff --git a/fs/hfs/super.c b/fs/hfs/super.c index a36bb749926d..6f833dc8e910 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -49,11 +49,23 @@ MODULE_LICENSE("GPL"); */ static void hfs_write_super(struct super_block *sb) { + lock_super(sb); sb->s_dirt = 0; - if (sb->s_flags & MS_RDONLY) - return; + /* sync everything to the buffers */ + if (!(sb->s_flags & MS_RDONLY)) + hfs_mdb_commit(sb); + unlock_super(sb); +} + +static int hfs_sync_fs(struct super_block *sb, int wait) +{ + lock_super(sb); hfs_mdb_commit(sb); + sb->s_dirt = 0; + unlock_super(sb); + + return 0; } /* @@ -65,9 +77,15 @@ static void hfs_write_super(struct super_block *sb) */ static void hfs_put_super(struct super_block *sb) { + lock_kernel(); + + if (sb->s_dirt) + hfs_write_super(sb); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); + + unlock_kernel(); } /* @@ -164,6 +182,7 @@ static const struct super_operations hfs_super_operations = { .clear_inode = hfs_clear_inode, .put_super = hfs_put_super, .write_super = hfs_write_super, + .sync_fs = hfs_sync_fs, .statfs = hfs_statfs, .remount_fs = hfs_remount, .show_options = hfs_show_options, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index f2a64020f42e..9fc3af0c0dab 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -152,15 +152,14 @@ static void hfsplus_clear_inode(struct inode *inode) } } -static void hfsplus_write_super(struct super_block *sb) +static int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; dprint(DBG_SUPER, "hfsplus_write_super\n"); + + lock_super(sb); sb->s_dirt = 0; - if (sb->s_flags & MS_RDONLY) - /* warn? */ - return; vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); @@ -192,6 +191,16 @@ static void hfsplus_write_super(struct super_block *sb) } HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; } + unlock_super(sb); + return 0; +} + +static void hfsplus_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + hfsplus_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static void hfsplus_put_super(struct super_block *sb) @@ -199,6 +208,11 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); if (!sb->s_fs_info) return; + + lock_kernel(); + + if (sb->s_dirt) + hfsplus_write_super(sb); if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; @@ -218,6 +232,8 @@ static void hfsplus_put_super(struct super_block *sb) unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -279,6 +295,7 @@ static const struct super_operations hfsplus_sops = { .clear_inode = hfsplus_clear_inode, .put_super = hfsplus_put_super, .write_super = hfsplus_write_super, + .sync_fs = hfsplus_sync_fs, .statfs = hfsplus_statfs, .remount_fs = hfsplus_remount, .show_options = hfsplus_show_options, diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fc77965be841..f2feaa06bf26 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -13,6 +13,7 @@ #include <linux/statfs.h> #include <linux/magic.h> #include <linux/sched.h> +#include <linux/smp_lock.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ @@ -99,11 +100,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, static void hpfs_put_super(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); + + lock_kernel(); + kfree(sbi->sb_cp_table); kfree(sbi->sb_bmp_dir); unmark_dirty(s); s->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) @@ -393,6 +399,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) *flags |= MS_NOATIME; + lock_kernel(); + lock_super(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; @@ -425,9 +433,13 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) replace_mount_options(s, new_opts); + unlock_super(s); + unlock_kernel(); return 0; out_err: + unlock_super(s); + unlock_kernel(); kfree(new_opts); return -EINVAL; } diff --git a/fs/inode.c b/fs/inode.c index ca337014ae29..f643be565df8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(touch_atime); * for writeback. Note that this function is meant exclusively for * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the - * S_NOCTIME inode flag, e.g. for network filesystem where these + * S_NOCMTIME inode flag, e.g. for network filesystem where these * timestamps are handled by the server. */ @@ -1422,7 +1422,7 @@ void file_update_time(struct file *file) if (IS_NOCMTIME(inode)) return; - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) return; diff --git a/fs/internal.h b/fs/internal.h index b4dac4fb6b61..d55ef562f0bb 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) return sb == blockdev_superblock; } +extern int __sync_blockdev(struct block_device *bdev, int wait); + #else static inline void bdev_cache_init(void) { @@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) { return 0; } + +static inline int __sync_blockdev(struct block_device *bdev, int wait) +{ + return 0; +} #endif /* @@ -66,3 +73,13 @@ extern void __init mnt_init(void); * fs_struct.c */ extern void chroot_fs_refs(struct path *, struct path *); + +/* + * file_table.c + */ +extern void mark_files_ro(struct super_block *); + +/* + * super.c + */ +extern int do_remount_sb(struct super_block *, int, void *, int); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index b4cbe9603c7d..068b34b5a107 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst static void isofs_put_super(struct super_block *sb) { struct isofs_sb_info *sbi = ISOFS_SB(sb); + #ifdef CONFIG_JOLIET + lock_kernel(); + if (sbi->s_nls_iocharset) { unload_nls(sbi->s_nls_iocharset); sbi->s_nls_iocharset = NULL; } + + unlock_kernel(); #endif kfree(sbi); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 249305d65d5b..3451a81b2142 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/vfs.h> #include <linux/crc32.h> +#include <linux/smp_lock.h> #include "nodelist.h" static int jffs2_flash_setup(struct jffs2_sb_info *c); @@ -387,6 +388,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) This also catches the case where it was stopped and this is just a remount to restart it. Flush the writebuffer, if neccecary, else we loose it */ + lock_kernel(); if (!(sb->s_flags & MS_RDONLY)) { jffs2_stop_garbage_collect_thread(c); mutex_lock(&c->alloc_sem); @@ -399,24 +401,10 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) *flags |= MS_NOATIME; + unlock_kernel(); return 0; } -void jffs2_write_super (struct super_block *sb) -{ - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - sb->s_dirt = 0; - - if (sb->s_flags & MS_RDONLY) - return; - - D1(printk(KERN_DEBUG "jffs2_write_super()\n")); - jffs2_garbage_collect_trigger(c); - jffs2_erase_pending_blocks(c, 0); - jffs2_flush_wbuf_gc(c, 0); -} - - /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, fill in the raw_inode while you're at it. */ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri) diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 5e194a5c8e29..2228380c47b9 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -181,7 +181,6 @@ void jffs2_dirty_inode(struct inode *inode); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); -void jffs2_write_super (struct super_block *); int jffs2_remount_fs (struct super_block *, int *, char *); int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); void jffs2_gc_release_inode(struct jffs2_sb_info *c, diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 4c4e18c54a51..07a22caf2687 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -53,10 +53,29 @@ static void jffs2_i_init_once(void *foo) inode_init_once(&f->vfs_inode); } +static void jffs2_write_super(struct super_block *sb) +{ + struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + + lock_super(sb); + sb->s_dirt = 0; + + if (!(sb->s_flags & MS_RDONLY)) { + D1(printk(KERN_DEBUG "jffs2_write_super()\n")); + jffs2_garbage_collect_trigger(c); + jffs2_erase_pending_blocks(c, 0); + jffs2_flush_wbuf_gc(c, 0); + } + + unlock_super(sb); +} + static int jffs2_sync_fs(struct super_block *sb, int wait) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + jffs2_write_super(sb); + mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); @@ -174,6 +193,11 @@ static void jffs2_put_super (struct super_block *sb) D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); + lock_kernel(); + + if (sb->s_dirt) + jffs2_write_super(sb); + mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); @@ -192,6 +216,8 @@ static void jffs2_put_super (struct super_block *sb) if (c->mtd->sync) c->mtd->sync(c->mtd); + unlock_kernel(); + D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index d9b0e92b3602..09b1b6ee2186 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -32,6 +32,7 @@ #include <linux/crc32.h> #include <asm/uaccess.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include "jfs_incore.h" #include "jfs_filsys.h" @@ -183,6 +184,9 @@ static void jfs_put_super(struct super_block *sb) int rc; jfs_info("In jfs_put_super"); + + lock_kernel(); + rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); @@ -195,6 +199,8 @@ static void jfs_put_super(struct super_block *sb) sbi->direct_inode = NULL; kfree(sbi); + + unlock_kernel(); } enum { @@ -370,19 +376,24 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) s64 newLVSize = 0; int rc = 0; int flag = JFS_SBI(sb)->flag; + int ret; if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } + lock_kernel(); if (newLVSize) { if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR "JFS: resize requires volume to be mounted read-write\n"); + unlock_kernel(); return -EROFS; } rc = jfs_extendfs(sb, newLVSize, 0); - if (rc) + if (rc) { + unlock_kernel(); return rc; + } } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { @@ -393,23 +404,31 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); JFS_SBI(sb)->flag = flag; - return jfs_mount_rw(sb, 1); + ret = jfs_mount_rw(sb, 1); + unlock_kernel(); + return ret; } if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { rc = jfs_umount_rw(sb); JFS_SBI(sb)->flag = flag; + unlock_kernel(); return rc; } if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) if (!(sb->s_flags & MS_RDONLY)) { rc = jfs_umount_rw(sb); - if (rc) + if (rc) { + unlock_kernel(); return rc; + } JFS_SBI(sb)->flag = flag; - return jfs_mount_rw(sb, 1); + ret = jfs_mount_rw(sb, 1); + unlock_kernel(); + return ret; } JFS_SBI(sb)->flag = flag; + unlock_kernel(); return 0; } diff --git a/fs/libfs.c b/fs/libfs.c index 80046ddf5063..ddfa89948c3f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -9,6 +9,8 @@ #include <linux/vfs.h> #include <linux/mutex.h> #include <linux/exportfs.h> +#include <linux/writeback.h> +#include <linux/buffer_head.h> #include <asm/uaccess.h> @@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, } EXPORT_SYMBOL_GPL(generic_fh_to_parent); +int simple_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, /* metadata-only; caller takes care of data */ + }; + struct inode *inode = dentry->d_inode; + int err; + int ret; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return ret; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return ret; + + err = sync_inode(inode, &wbc); + if (ret == 0) + ret = err; + return ret; +} +EXPORT_SYMBOL(simple_fsync); + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); diff --git a/fs/minix/dir.c b/fs/minix/dir.c index d4946c4c90e2..e5f206467e40 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -22,7 +22,7 @@ static int minix_readdir(struct file *, void *, filldir_t); const struct file_operations minix_dir_operations = { .read = generic_read_dir, .readdir = minix_readdir, - .fsync = minix_sync_file, + .fsync = simple_fsync, }; static inline void dir_put_page(struct page *page) diff --git a/fs/minix/file.c b/fs/minix/file.c index 17765f697e50..3eec3e607a87 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -6,15 +6,12 @@ * minix regular file handling primitives */ -#include <linux/buffer_head.h> /* for fsync_inode_buffers() */ #include "minix.h" /* * We have mostly NULLs here: the current defaults are OK for * the minix filesystem. */ -int minix_sync_file(struct file *, struct dentry *, int); - const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -22,7 +19,7 @@ const struct file_operations minix_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = minix_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; @@ -30,18 +27,3 @@ const struct inode_operations minix_file_inode_operations = { .truncate = minix_truncate, .getattr = minix_getattr, }; - -int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - - err |= minix_sync_inode(inode); - return err ? -EIO : 0; -} diff --git a/fs/minix/inode.c b/fs/minix/inode.c index daad3c2740db..f91a23693597 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); + lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; @@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb) sb->s_fs_info = NULL; kfree(sbi); - return; + unlock_kernel(); } static struct kmem_cache * minix_inode_cachep; @@ -554,38 +556,25 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) return bh; } -static struct buffer_head *minix_update_inode(struct inode *inode) -{ - if (INODE_VERSION(inode) == MINIX_V1) - return V1_minix_update_inode(inode); - else - return V2_minix_update_inode(inode); -} - -static int minix_write_inode(struct inode * inode, int wait) -{ - brelse(minix_update_inode(inode)); - return 0; -} - -int minix_sync_inode(struct inode * inode) +static int minix_write_inode(struct inode *inode, int wait) { int err = 0; struct buffer_head *bh; - bh = minix_update_inode(inode); - if (bh && buffer_dirty(bh)) - { + if (INODE_VERSION(inode) == MINIX_V1) + bh = V1_minix_update_inode(inode); + else + bh = V2_minix_update_inode(inode); + if (!bh) + return -EIO; + if (wait && buffer_dirty(bh)) { sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { + if (buffer_req(bh) && !buffer_uptodate(bh)) { printk("IO error syncing minix inode [%s:%08lx]\n", inode->i_sb->s_id, inode->i_ino); - err = -1; + err = -EIO; } } - else if (!bh) - err = -1; brelse (bh); return err; } diff --git a/fs/minix/minix.h b/fs/minix/minix.h index e6a0b193bea4..cb7fdd11f9a5 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -57,7 +57,6 @@ extern int __minix_write_begin(struct file *file, struct address_space *mapping, extern void V1_minix_truncate(struct inode *); extern void V2_minix_truncate(struct inode *); extern void minix_truncate(struct inode *); -extern int minix_sync_inode(struct inode *); extern void minix_set_inode(struct inode *, dev_t); extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int); extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int); @@ -72,7 +71,6 @@ extern int minix_empty_dir(struct inode*); extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*); extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**); extern ino_t minix_inode_by_name(struct dentry*); -extern int minix_sync_file(struct file *, struct dentry *, int); extern const struct inode_operations minix_file_inode_operations; extern const struct inode_operations minix_dir_inode_operations; diff --git a/fs/namei.c b/fs/namei.c index c82805d088e1..527119afb6a5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd return result; } +static __always_inline void set_root(struct nameidata *nd) +{ + if (!nd->root.mnt) { + struct fs_struct *fs = current->fs; + read_lock(&fs->lock); + nd->root = fs->root; + path_get(&nd->root); + read_unlock(&fs->lock); + } +} + static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; @@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { - struct fs_struct *fs = current->fs; - + set_root(nd); path_put(&nd->path); - - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); + nd->path = nd->root; + path_get(&nd->root); } res = link_path_walk(link, nd); @@ -668,23 +675,23 @@ loop: return err; } -int follow_up(struct vfsmount **mnt, struct dentry **dentry) +int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; spin_lock(&vfsmount_lock); - parent=(*mnt)->mnt_parent; - if (parent == *mnt) { + parent = path->mnt->mnt_parent; + if (parent == path->mnt) { spin_unlock(&vfsmount_lock); return 0; } mntget(parent); - mountpoint=dget((*mnt)->mnt_mountpoint); + mountpoint = dget(path->mnt->mnt_mountpoint); spin_unlock(&vfsmount_lock); - dput(*dentry); - *dentry = mountpoint; - mntput(*mnt); - *mnt = parent; + dput(path->dentry); + path->dentry = mountpoint; + mntput(path->mnt); + path->mnt = parent; return 1; } @@ -695,7 +702,7 @@ static int __follow_mount(struct path *path) { int res = 0; while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); + struct vfsmount *mounted = lookup_mnt(path); if (!mounted) break; dput(path->dentry); @@ -708,32 +715,32 @@ static int __follow_mount(struct path *path) return res; } -static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) +static void follow_mount(struct path *path) { - while (d_mountpoint(*dentry)) { - struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted = lookup_mnt(path); if (!mounted) break; - dput(*dentry); - mntput(*mnt); - *mnt = mounted; - *dentry = dget(mounted->mnt_root); + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); } } /* no need for dcache_lock, as serialization is taken care in * namespace.c */ -int follow_down(struct vfsmount **mnt, struct dentry **dentry) +int follow_down(struct path *path) { struct vfsmount *mounted; - mounted = lookup_mnt(*mnt, *dentry); + mounted = lookup_mnt(path); if (mounted) { - dput(*dentry); - mntput(*mnt); - *mnt = mounted; - *dentry = dget(mounted->mnt_root); + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); return 1; } return 0; @@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) static __always_inline void follow_dotdot(struct nameidata *nd) { - struct fs_struct *fs = current->fs; + set_root(nd); while(1) { struct vfsmount *parent; struct dentry *old = nd->path.dentry; - read_lock(&fs->lock); - if (nd->path.dentry == fs->root.dentry && - nd->path.mnt == fs->root.mnt) { - read_unlock(&fs->lock); + if (nd->path.dentry == nd->root.dentry && + nd->path.mnt == nd->root.mnt) { break; } - read_unlock(&fs->lock); spin_lock(&dcache_lock); if (nd->path.dentry != nd->path.mnt->mnt_root) { nd->path.dentry = dget(nd->path.dentry->d_parent); @@ -775,7 +779,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) mntput(nd->path.mnt); nd->path.mnt = parent; } - follow_mount(&nd->path.mnt, &nd->path.dentry); + follow_mount(&nd->path); } /* @@ -1017,25 +1021,23 @@ static int path_walk(const char *name, struct nameidata *nd) return link_path_walk(name, nd); } -/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int do_path_lookup(int dfd, const char *name, - unsigned int flags, struct nameidata *nd) +static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; int fput_needed; struct file *file; - struct fs_struct *fs = current->fs; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; nd->depth = 0; + nd->root.mnt = NULL; if (*name=='/') { - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); + set_root(nd); + nd->path = nd->root; + path_get(&nd->root); } else if (dfd == AT_FDCWD) { + struct fs_struct *fs = current->fs; read_lock(&fs->lock); nd->path = fs->pwd; path_get(&fs->pwd); @@ -1063,17 +1065,29 @@ static int do_path_lookup(int dfd, const char *name, fput_light(file, fput_needed); } + return 0; - retval = path_walk(name, nd); +fput_fail: + fput_light(file, fput_needed); +out_fail: + return retval; +} + +/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ +static int do_path_lookup(int dfd, const char *name, + unsigned int flags, struct nameidata *nd) +{ + int retval = path_init(dfd, name, flags, nd); + if (!retval) + retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); -out_fail: + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } return retval; - -fput_fail: - fput_light(file, fput_needed); - goto out_fail; } int path_lookup(const char *name, unsigned int flags, @@ -1113,14 +1127,18 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->path.dentry = dentry; nd->path.mnt = mnt; path_get(&nd->path); + nd->root = nd->path; + path_get(&nd->root); retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); - return retval; + path_put(&nd->root); + nd->root.mnt = NULL; + return retval; } /** @@ -1676,9 +1694,14 @@ struct file *do_filp_open(int dfd, const char *pathname, /* * Create - we need to know the parent. */ - error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); + error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); + error = path_walk(pathname, &nd); + if (error) + return ERR_PTR(error); + if (unlikely(!audit_dummy_context())) + audit_inode(pathname, nd.path.dentry); /* * We have the parent and last component. First of all, check @@ -1806,6 +1829,8 @@ exit: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); exit_parent: + if (nd.root.mnt) + path_put(&nd.root); path_put(&nd.path); return ERR_PTR(error); diff --git a/fs/namespace.c b/fs/namespace.c index 134d494158d9..2dd333b0fe7f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); - atomic_set(&mnt->__mnt_writers, 0); +#ifdef CONFIG_SMP + mnt->mnt_writers = alloc_percpu(int); + if (!mnt->mnt_writers) + goto out_free_devname; +#else + mnt->mnt_writers = 0; +#endif } return mnt; +#ifdef CONFIG_SMP +out_free_devname: + kfree(mnt->mnt_devname); +#endif out_free_id: mnt_free_id(mnt); out_free_cache: @@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt) } EXPORT_SYMBOL_GPL(__mnt_is_readonly); -struct mnt_writer { - /* - * If holding multiple instances of this lock, they - * must be ordered by cpu number. - */ - spinlock_t lock; - struct lock_class_key lock_class; /* compiles out with !lockdep */ - unsigned long count; - struct vfsmount *mnt; -} ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); +static inline void inc_mnt_writers(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; +#else + mnt->mnt_writers++; +#endif +} -static int __init init_mnt_writers(void) +static inline void dec_mnt_writers(struct vfsmount *mnt) { - int cpu; - for_each_possible_cpu(cpu) { - struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); - spin_lock_init(&writer->lock); - lockdep_set_class(&writer->lock, &writer->lock_class); - writer->count = 0; - } - return 0; +#ifdef CONFIG_SMP + (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; +#else + mnt->mnt_writers--; +#endif } -fs_initcall(init_mnt_writers); -static void unlock_mnt_writers(void) +static unsigned int count_mnt_writers(struct vfsmount *mnt) { +#ifdef CONFIG_SMP + unsigned int count = 0; int cpu; - struct mnt_writer *cpu_writer; for_each_possible_cpu(cpu) { - cpu_writer = &per_cpu(mnt_writers, cpu); - spin_unlock(&cpu_writer->lock); + count += *per_cpu_ptr(mnt->mnt_writers, cpu); } -} -static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) -{ - if (!cpu_writer->mnt) - return; - /* - * This is in case anyone ever leaves an invalid, - * old ->mnt and a count of 0. - */ - if (!cpu_writer->count) - return; - atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); - cpu_writer->count = 0; -} - /* - * must hold cpu_writer->lock - */ -static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, - struct vfsmount *mnt) -{ - if (cpu_writer->mnt == mnt) - return; - __clear_mnt_count(cpu_writer); - cpu_writer->mnt = mnt; + return count; +#else + return mnt->mnt_writers; +#endif } /* @@ -253,74 +236,73 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, int mnt_want_write(struct vfsmount *mnt) { int ret = 0; - struct mnt_writer *cpu_writer; - cpu_writer = &get_cpu_var(mnt_writers); - spin_lock(&cpu_writer->lock); + preempt_disable(); + inc_mnt_writers(mnt); + /* + * The store to inc_mnt_writers must be visible before we pass + * MNT_WRITE_HOLD loop below, so that the slowpath can see our + * incremented count after it has set MNT_WRITE_HOLD. + */ + smp_mb(); + while (mnt->mnt_flags & MNT_WRITE_HOLD) + cpu_relax(); + /* + * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will + * be set to match its requirements. So we must not load that until + * MNT_WRITE_HOLD is cleared. + */ + smp_rmb(); if (__mnt_is_readonly(mnt)) { + dec_mnt_writers(mnt); ret = -EROFS; goto out; } - use_cpu_writer_for_mount(cpu_writer, mnt); - cpu_writer->count++; out: - spin_unlock(&cpu_writer->lock); - put_cpu_var(mnt_writers); + preempt_enable(); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write); -static void lock_mnt_writers(void) -{ - int cpu; - struct mnt_writer *cpu_writer; - - for_each_possible_cpu(cpu) { - cpu_writer = &per_cpu(mnt_writers, cpu); - spin_lock(&cpu_writer->lock); - __clear_mnt_count(cpu_writer); - cpu_writer->mnt = NULL; - } +/** + * mnt_clone_write - get write access to a mount + * @mnt: the mount on which to take a write + * + * This is effectively like mnt_want_write, except + * it must only be used to take an extra write reference + * on a mountpoint that we already know has a write reference + * on it. This allows some optimisation. + * + * After finished, mnt_drop_write must be called as usual to + * drop the reference. + */ +int mnt_clone_write(struct vfsmount *mnt) +{ + /* superblock may be r/o */ + if (__mnt_is_readonly(mnt)) + return -EROFS; + preempt_disable(); + inc_mnt_writers(mnt); + preempt_enable(); + return 0; } +EXPORT_SYMBOL_GPL(mnt_clone_write); -/* - * These per-cpu write counts are not guaranteed to have - * matched increments and decrements on any given cpu. - * A file open()ed for write on one cpu and close()d on - * another cpu will imbalance this count. Make sure it - * does not get too far out of whack. +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already */ -static void handle_write_count_underflow(struct vfsmount *mnt) +int mnt_want_write_file(struct file *file) { - if (atomic_read(&mnt->__mnt_writers) >= - MNT_WRITER_UNDERFLOW_LIMIT) - return; - /* - * It isn't necessary to hold all of the locks - * at the same time, but doing it this way makes - * us share a lot more code. - */ - lock_mnt_writers(); - /* - * vfsmount_lock is for mnt_flags. - */ - spin_lock(&vfsmount_lock); - /* - * If coalescing the per-cpu writer counts did not - * get us back to a positive writer count, we have - * a bug. - */ - if ((atomic_read(&mnt->__mnt_writers) < 0) && - !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { - WARN(1, KERN_DEBUG "leak detected on mount(%p) writers " - "count: %d\n", - mnt, atomic_read(&mnt->__mnt_writers)); - /* use the flag to keep the dmesg spam down */ - mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; - } - spin_unlock(&vfsmount_lock); - unlock_mnt_writers(); + if (!(file->f_mode & FMODE_WRITE)) + return mnt_want_write(file->f_path.mnt); + else + return mnt_clone_write(file->f_path.mnt); } +EXPORT_SYMBOL_GPL(mnt_want_write_file); /** * mnt_drop_write - give up write access to a mount @@ -332,37 +314,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt) */ void mnt_drop_write(struct vfsmount *mnt) { - int must_check_underflow = 0; - struct mnt_writer *cpu_writer; - - cpu_writer = &get_cpu_var(mnt_writers); - spin_lock(&cpu_writer->lock); - - use_cpu_writer_for_mount(cpu_writer, mnt); - if (cpu_writer->count > 0) { - cpu_writer->count--; - } else { - must_check_underflow = 1; - atomic_dec(&mnt->__mnt_writers); - } - - spin_unlock(&cpu_writer->lock); - /* - * Logically, we could call this each time, - * but the __mnt_writers cacheline tends to - * be cold, and makes this expensive. - */ - if (must_check_underflow) - handle_write_count_underflow(mnt); - /* - * This could be done right after the spinlock - * is taken because the spinlock keeps us on - * the cpu, and disables preemption. However, - * putting it here bounds the amount that - * __mnt_writers can underflow. Without it, - * we could theoretically wrap __mnt_writers. - */ - put_cpu_var(mnt_writers); + preempt_disable(); + dec_mnt_writers(mnt); + preempt_enable(); } EXPORT_SYMBOL_GPL(mnt_drop_write); @@ -370,24 +324,41 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - lock_mnt_writers(); + spin_lock(&vfsmount_lock); + mnt->mnt_flags |= MNT_WRITE_HOLD; /* - * With all the locks held, this value is stable + * After storing MNT_WRITE_HOLD, we'll read the counters. This store + * should be visible before we do. */ - if (atomic_read(&mnt->__mnt_writers) > 0) { - ret = -EBUSY; - goto out; - } + smp_mb(); + /* - * nobody can do a successful mnt_want_write() with all - * of the counts in MNT_DENIED_WRITE and the locks held. + * With writers on hold, if this value is zero, then there are + * definitely no active writers (although held writers may subsequently + * increment the count, they'll have to wait, and decrement it after + * seeing MNT_READONLY). + * + * It is OK to have counter incremented on one CPU and decremented on + * another: the sum will add up correctly. The danger would be when we + * sum up each counter, if we read a counter before it is incremented, + * but then read another CPU's count which it has been subsequently + * decremented from -- we would see more decrements than we should. + * MNT_WRITE_HOLD protects against this scenario, because + * mnt_want_write first increments count, then smp_mb, then spins on + * MNT_WRITE_HOLD, so it can't be decremented by another CPU while + * we're counting up here. */ - spin_lock(&vfsmount_lock); - if (!ret) + if (count_mnt_writers(mnt) > 0) + ret = -EBUSY; + else mnt->mnt_flags |= MNT_READONLY; + /* + * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers + * that become unheld will see MNT_READONLY. + */ + smp_wmb(); + mnt->mnt_flags &= ~MNT_WRITE_HOLD; spin_unlock(&vfsmount_lock); -out: - unlock_mnt_writers(); return ret; } @@ -410,6 +381,9 @@ void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); mnt_free_id(mnt); +#ifdef CONFIG_SMP + free_percpu(mnt->mnt_writers); +#endif kmem_cache_free(mnt_cache, mnt); } @@ -442,11 +416,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, * lookup_mnt increments the ref count before returning * the vfsmount struct. */ -struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) +struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; spin_lock(&vfsmount_lock); - if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) + if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); spin_unlock(&vfsmount_lock); return child_mnt; @@ -604,38 +578,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, static inline void __mntput(struct vfsmount *mnt) { - int cpu; struct super_block *sb = mnt->mnt_sb; /* - * We don't have to hold all of the locks at the - * same time here because we know that we're the - * last reference to mnt and that no new writers - * can come in. - */ - for_each_possible_cpu(cpu) { - struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); - spin_lock(&cpu_writer->lock); - if (cpu_writer->mnt != mnt) { - spin_unlock(&cpu_writer->lock); - continue; - } - atomic_add(cpu_writer->count, &mnt->__mnt_writers); - cpu_writer->count = 0; - /* - * Might as well do this so that no one - * ever sees the pointer and expects - * it to be valid. - */ - cpu_writer->mnt = NULL; - spin_unlock(&cpu_writer->lock); - } - /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this * happens, the filesystem was probably unable * to make r/w->r/o transitions. */ - WARN_ON(atomic_read(&mnt->__mnt_writers)); + /* + * atomic_dec_and_lock() used to deal with ->mnt_count decrements + * provides barriers, so count_mnt_writers() below is safe. AV + */ + WARN_ON(count_mnt_writers(mnt)); dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); @@ -1106,11 +1060,8 @@ static int do_umount(struct vfsmount *mnt, int flags) * we just try to remount it readonly. */ down_write(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY)) { - lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); - unlock_kernel(); - } up_write(&sb->s_umount); return retval; } @@ -1253,11 +1204,11 @@ Enomem: return NULL; } -struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) +struct vfsmount *collect_mounts(struct path *path) { struct vfsmount *tree; down_write(&namespace_sem); - tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); + tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); up_write(&namespace_sem); return tree; } @@ -1430,7 +1381,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) goto out_unlock; err = -ENOENT; - if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) + if (!d_unlinked(path->dentry)) err = attach_recursive_mnt(mnt, path, NULL); out_unlock: mutex_unlock(&path->dentry->d_inode->i_mutex); @@ -1601,7 +1552,7 @@ static int do_move_mount(struct path *path, char *old_name) down_write(&namespace_sem); while (d_mountpoint(path->dentry) && - follow_down(&path->mnt, &path->dentry)) + follow_down(path)) ; err = -EINVAL; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) @@ -1612,7 +1563,7 @@ static int do_move_mount(struct path *path, char *old_name) if (IS_DEADDIR(path->dentry->d_inode)) goto out1; - if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) + if (d_unlinked(path->dentry)) goto out1; err = -EINVAL; @@ -1676,7 +1627,9 @@ static int do_new_mount(struct path *path, char *type, int flags, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + lock_kernel(); mnt = do_kern_mount(type, flags, name, data); + unlock_kernel(); if (IS_ERR(mnt)) return PTR_ERR(mnt); @@ -1695,10 +1648,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, down_write(&namespace_sem); /* Something was mounted here while we slept */ while (d_mountpoint(path->dentry) && - follow_down(&path->mnt, &path->dentry)) + follow_down(path)) ; err = -EINVAL; - if (!check_mnt(path->mnt)) + if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ @@ -2092,10 +2045,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, if (retval < 0) goto out3; - lock_kernel(); retval = do_mount((char *)dev_page, dir_page, (char *)type_page, flags, (void *)data_page); - unlock_kernel(); free_page(data_page); out3: @@ -2175,9 +2126,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = -ENOENT; if (IS_DEADDIR(new.dentry->d_inode)) goto out2; - if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) + if (d_unlinked(new.dentry)) goto out2; - if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) + if (d_unlinked(old.dentry)) goto out2; error = -EBUSY; if (new.mnt == root.mnt || diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d642f0e5b365..b99ce205b1bd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb) { struct ncp_server *server = NCP_SBP(sb); + lock_kernel(); + ncp_lock_server(server); ncp_disconnect(server); ncp_unlock_server(server); @@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb) vfree(server->packet); sb->s_fs_info = NULL; kfree(server); + + unlock_kernel(); } static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 64a288ee046d..f01caec84463 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -154,7 +154,7 @@ out_err: goto out; out_follow: while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; goto out; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d2d67781c579..26127b69a275 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1813,6 +1813,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) if (data == NULL) return -ENOMEM; + lock_kernel(); /* fill out struct with values from existing mount */ data->flags = nfss->flags; data->rsize = nfss->rsize; @@ -1837,6 +1838,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) error = nfs_compare_remount_data(nfss, data); out: kfree(data); + unlock_kernel(); return error; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5839b229cd0e..8b1f8efb4690 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -847,9 +847,8 @@ exp_get_fsid_key(svc_client *clp, int fsid) return exp_find_key(clp, FSID_NUM, fsidv, NULL); } -static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, + struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -858,8 +857,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, return ERR_PTR(-ENOENT); key.ex_client = clp; - key.ex_path.mnt = mnt; - key.ex_path.dentry = dentry; + key.ex_path = *path; exp = svc_export_lookup(&key); if (exp == NULL) @@ -873,24 +871,19 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static struct svc_export *exp_parent(svc_client *clp, struct path *path) { - svc_export *exp; - - dget(dentry); - exp = exp_get_by_name(clp, mnt, dentry, reqp); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = exp_get_by_name(clp, mnt, dentry, reqp); + struct dentry *saved = dget(path->dentry); + svc_export *exp = exp_get_by_name(clp, path, NULL); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = exp_get_by_name(clp, path, NULL); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } @@ -1018,7 +1011,7 @@ exp_export(struct nfsctl_export *nxp) goto out_put_clp; err = -EINVAL; - exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL); + exp = exp_get_by_name(clp, &path, NULL); memset(&new, 0, sizeof(new)); @@ -1135,7 +1128,7 @@ exp_unexport(struct nfsctl_export *nxp) goto out_domain; err = -EINVAL; - exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL); + exp = exp_get_by_name(dom, &path, NULL); path_put(&path); if (IS_ERR(exp)) goto out_domain; @@ -1177,7 +1170,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", name, path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, path.mnt, path.dentry, NULL); + exp = exp_parent(clp, &path); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@ -1207,7 +1200,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, if (IS_ERR(ek)) return ERR_CAST(ek); - exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp); + exp = exp_get_by_name(clp, &ek->ek_path, reqp); cache_put(&ek->h, &svc_expkey_cache); if (IS_ERR(exp)) @@ -1247,8 +1240,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) * use exp_get_by_name() or exp_find(). */ struct svc_export * -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); @@ -1256,8 +1248,7 @@ rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, goto gss; /* First try the auth_unix client: */ - exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, - &rqstp->rq_chandle); + exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1269,8 +1260,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, - &rqstp->rq_chandle); + gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -1309,23 +1299,19 @@ gss: } struct svc_export * -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) { - struct svc_export *exp; - - dget(dentry); - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = rqst_exp_get_by_name(rqstp, path); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bd584bcf1d9f..99f835753596 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -101,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, { struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - struct dentry *mounts = dget(dentry); + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dentry)}; int err = 0; - while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); + while (d_mountpoint(path.dentry) && follow_down(&path)) + ; - exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); + exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { if (PTR_ERR(exp2) != -ENOENT) err = PTR_ERR(exp2); - dput(mounts); - mntput(mnt); + path_put(&path); goto out; } if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ /* - * This is subtle: dentry is *not* under mnt at this point. - * The only reason we are safe is that original mnt is pinned - * down by exp, so we should dput before putting exp. + * This is subtle: path.dentry is *not* on path.mnt + * at this point. The only reason we are safe is that + * original mnt is pinned down by exp, so we should + * put path *before* putting exp */ - dput(dentry); - *dpp = mounts; - exp_put(exp); + *dpp = path.dentry; + path.dentry = dentry; *expp = exp2; - } else { - exp_put(exp2); - dput(mounts); + exp2 = exp; } - mntput(mnt); + path_put(&path); + exp_put(exp2); out: return err; } @@ -169,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, /* checking mountpoint crossing is very different when stepping up */ struct svc_export *exp2 = NULL; struct dentry *dp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - dentry = dget(dparent); - while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dparent)}; + + while (path.dentry == path.mnt->mnt_root && + follow_up(&path)) ; - dp = dget_parent(dentry); - dput(dentry); - dentry = dp; + dp = dget_parent(path.dentry); + dput(path.dentry); + path.dentry = dp; - exp2 = rqst_exp_parent(rqstp, mnt, dentry); + exp2 = rqst_exp_parent(rqstp, &path); if (PTR_ERR(exp2) == -ENOENT) { - dput(dentry); dentry = dget(dparent); } else if (IS_ERR(exp2)) { host_err = PTR_ERR(exp2); - dput(dentry); - mntput(mnt); + path_put(&path); goto out_nfserr; } else { + dentry = dget(path.dentry); exp_put(exp); exp = exp2; } - mntput(mnt); + path_put(&path); } } else { fh_lock(fhp); diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 064279e33bbb..36df60b6d8a4 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -31,21 +31,26 @@ #include "dat.h" #include "alloc.h" +struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) +{ + return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); +} + int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, __u64 *ptrp) { - __u64 ptr; + sector_t blocknr; int ret; down_read(&bmap->b_sem); ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); if (ret < 0) goto out; - if (bmap->b_pops->bpop_translate != NULL) { - ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); - if (ret < 0) - goto out; - *ptrp = ptr; + if (NILFS_BMAP_USE_VBN(bmap)) { + ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp, + &blocknr); + if (!ret) + *ptrp = blocknr; } out: @@ -53,6 +58,16 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, return ret; } +int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp, + unsigned maxblocks) +{ + int ret; + + down_read(&bmap->b_sem); + ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks); + up_read(&bmap->b_sem); + return ret; +} /** * nilfs_bmap_lookup - find a record @@ -101,8 +116,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (n < 0) return n; ret = nilfs_btree_convert_and_insert( - bmap, key, ptr, keys, ptrs, n, - NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH); + bmap, key, ptr, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags |= NILFS_BMAP_LARGE; @@ -158,8 +172,7 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) if (n < 0) return n; ret = nilfs_direct_delete_and_convert( - bmap, key, keys, ptrs, n, - NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH); + bmap, key, keys, ptrs, n); if (ret == 0) bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; @@ -417,38 +430,6 @@ void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) mark_inode_dirty(bmap->b_inode); } -int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr, - struct buffer_head **bhp) -{ - return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, - ptr, 0, bhp, 0); -} - -void nilfs_bmap_put_block(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - brelse(bh); -} - -int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr, - struct buffer_head **bhp) -{ - int ret; - - ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, - ptr, 0, bhp, 1); - if (ret < 0) - return ret; - set_buffer_nilfs_volatile(*bhp); - return 0; -} - -void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap, - struct buffer_head *bh) -{ - nilfs_btnode_delete(bh); -} - __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { @@ -476,11 +457,6 @@ __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) return NILFS_BMAP_INVALID_PTR; } -static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) -{ - return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); -} - #define NILFS_BMAP_GROUP_DIV 8 __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) { @@ -493,64 +469,51 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) (entries_per_group / NILFS_BMAP_GROUP_DIV); } -static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req, + sector_t blocknr) { - return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); -} - -static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req, - sector_t blocknr) -{ - nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req, - blocknr); -} + struct inode *dat = nilfs_bmap_get_dat(bmap); + int ret; -static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); + ret = nilfs_dat_prepare_start(dat, &req->bpr_req); + if (likely(!ret)) + nilfs_dat_commit_start(dat, &req->bpr_req, blocknr); + return ret; } -static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); } -static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0); -} - -static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { - nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); + nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, + bmap->b_ptr_type == NILFS_BMAP_PTR_VS); } -static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) +void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) { nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); } @@ -566,128 +529,44 @@ int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); } -int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) +int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) { + struct inode *dat = nilfs_bmap_get_dat(bmap); int ret; - ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); + ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req); if (ret < 0) return ret; - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); + ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req); if (ret < 0) - bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); + nilfs_dat_abort_end(dat, &oldreq->bpr_req); return ret; } -void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) +void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) { - bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); - bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq); -} + struct inode *dat = nilfs_bmap_get_dat(bmap); -void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *oldreq, - union nilfs_bmap_ptr_req *newreq) -{ - bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); - bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq); + nilfs_dat_commit_end(dat, &oldreq->bpr_req, + bmap->b_ptr_type == NILFS_BMAP_PTR_VS); + nilfs_dat_commit_alloc(dat, &newreq->bpr_req); } -static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, - __u64 *ptrp) +void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) { - sector_t blocknr; - int ret; - - ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr); - if (ret < 0) - return ret; - if (ptrp != NULL) - *ptrp = blocknr; - return 0; -} + struct inode *dat = nilfs_bmap_get_dat(bmap); -static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - /* ignore target ptr */ - req->bpr_ptr = bmap->b_last_allocated_ptr++; - return 0; + nilfs_dat_abort_end(dat, &oldreq->bpr_req); + nilfs_dat_abort_alloc(dat, &newreq->bpr_req); } -static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - /* do nothing */ -} - -static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap, - union nilfs_bmap_ptr_req *req) -{ - bmap->b_last_allocated_ptr--; -} - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, - .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, - .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, - .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, - .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, - .bpop_commit_end_ptr = nilfs_bmap_commit_end_v, - .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, - - .bpop_translate = nilfs_bmap_translate_v, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, - .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, - .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, - .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, - .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, - .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt, - .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, - - .bpop_translate = nilfs_bmap_translate_v, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = { - .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p, - .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p, - .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p, - .bpop_prepare_start_ptr = NULL, - .bpop_commit_start_ptr = NULL, - .bpop_abort_start_ptr = NULL, - .bpop_prepare_end_ptr = NULL, - .bpop_commit_end_ptr = NULL, - .bpop_abort_end_ptr = NULL, - - .bpop_translate = NULL, -}; - -static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { - .bpop_prepare_alloc_ptr = NULL, - .bpop_commit_alloc_ptr = NULL, - .bpop_abort_alloc_ptr = NULL, - .bpop_prepare_start_ptr = NULL, - .bpop_commit_start_ptr = NULL, - .bpop_abort_start_ptr = NULL, - .bpop_prepare_end_ptr = NULL, - .bpop_commit_end_ptr = NULL, - .bpop_abort_end_ptr = NULL, - - .bpop_translate = NULL, -}; - static struct lock_class_key nilfs_bmap_dat_lock_key; /** @@ -714,31 +593,26 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; switch (bmap->b_inode->i_ino) { case NILFS_DAT_INO: - bmap->b_pops = &nilfs_bmap_ptr_ops_p; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_P; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: - bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_VS; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; default: - bmap->b_pops = &nilfs_bmap_ptr_ops_v; - bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_ptr_type = NILFS_BMAP_PTR_VM; + bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; break; } return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? - nilfs_btree_init(bmap, - NILFS_BMAP_LARGE_LOW, - NILFS_BMAP_LARGE_HIGH) : - nilfs_direct_init(bmap, - NILFS_BMAP_SMALL_LOW, - NILFS_BMAP_SMALL_HIGH); + nilfs_btree_init(bmap) : nilfs_direct_init(bmap); } /** @@ -764,7 +638,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); init_rwsem(&bmap->b_sem); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; - bmap->b_pops = &nilfs_bmap_ptr_ops_gc; + bmap->b_ptr_type = NILFS_BMAP_PTR_U; bmap->b_last_allocated_key = 0; bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; bmap->b_state = 0; diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 4f2708abb1ba..b2890cdcef12 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -64,6 +64,8 @@ struct nilfs_bmap_stats { */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); + int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *, + unsigned); int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); int (*bop_delete)(struct nilfs_bmap *, __u64); void (*bop_clear)(struct nilfs_bmap *); @@ -86,34 +88,6 @@ struct nilfs_bmap_operations { }; -/** - * struct nilfs_bmap_ptr_operations - bmap ptr operation table - */ -struct nilfs_bmap_ptr_operations { - int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - int (*bpop_prepare_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - sector_t); - void (*bpop_abort_start_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - int (*bpop_prepare_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_commit_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - void (*bpop_abort_end_ptr)(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *); - - int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *); -}; - - #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) #define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) #define NILFS_BMAP_NEW_PTR_INIT \ @@ -131,11 +105,9 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) * @b_sem: semaphore * @b_inode: owner of bmap * @b_ops: bmap operation table - * @b_pops: bmap ptr operation table - * @b_low: low watermark of conversion - * @b_high: high watermark of conversion * @b_last_allocated_key: last allocated key for data block * @b_last_allocated_ptr: last allocated ptr for data block + * @b_ptr_type: pointer type * @b_state: state */ struct nilfs_bmap { @@ -146,14 +118,22 @@ struct nilfs_bmap { struct rw_semaphore b_sem; struct inode *b_inode; const struct nilfs_bmap_operations *b_ops; - const struct nilfs_bmap_ptr_operations *b_pops; - __u64 b_low; - __u64 b_high; __u64 b_last_allocated_key; __u64 b_last_allocated_ptr; + int b_ptr_type; int b_state; }; +/* pointer type */ +#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */ +#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single + version) */ +#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple + versions) */ +#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */ + +#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0) + /* state */ #define NILFS_BMAP_DIRTY 0x00000001 @@ -162,6 +142,7 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); +int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); @@ -182,7 +163,67 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); /* * Internal use only */ +struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); +int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); +static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + return nilfs_bmap_prepare_alloc_v(bmap, req); + /* ignore target ptr */ + req->bpr_ptr = bmap->b_last_allocated_ptr++; + return 0; +} + +static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_commit_alloc_v(bmap, req); +} + +static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_abort_alloc_v(bmap, req); + else + bmap->b_last_allocated_ptr--; +} + +int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *); + +static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_bmap_prepare_end_v(bmap, req) : 0; +} + +static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_commit_end_v(bmap, req); +} + +static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_bmap_abort_end_v(bmap, req); +} + +int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *, + sector_t); int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); @@ -193,28 +234,20 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); -int nilfs_bmap_prepare_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); -void nilfs_bmap_commit_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); -void nilfs_bmap_abort_update(struct nilfs_bmap *, - union nilfs_bmap_ptr_req *, - union nilfs_bmap_ptr_req *); +int nilfs_bmap_prepare_update_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_update_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_update_v(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); -int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64, - struct buffer_head **); -void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *); -int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64, - struct buffer_head **); -void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *); - - /* Assume that bmap semaphore is locked. */ static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) { diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4cc07b2c30e0..7e0b61be212e 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -46,15 +46,18 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); } -static struct address_space_operations def_btnode_aops; +static struct address_space_operations def_btnode_aops = { + .sync_page = block_sync_page, +}; -void nilfs_btnode_cache_init(struct address_space *btnc) +void nilfs_btnode_cache_init(struct address_space *btnc, + struct backing_dev_info *bdi) { btnc->host = NULL; /* can safely set to host inode ? */ btnc->flags = 0; mapping_set_gfp_mask(btnc, GFP_NOFS); btnc->assoc_mapping = NULL; - btnc->backing_dev_info = &default_backing_dev_info; + btnc->backing_dev_info = bdi; btnc->a_ops = &def_btnode_aops; } diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 35faa86444a7..3e2275172ed6 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -38,7 +38,7 @@ struct nilfs_btnode_chkey_ctxt { }; void nilfs_btnode_cache_init_once(struct address_space *); -void nilfs_btnode_cache_init(struct address_space *); +void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, struct buffer_head **, int); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 6b37a2767293..aa412724b64e 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -29,6 +29,7 @@ #include "btnode.h" #include "btree.h" #include "alloc.h" +#include "dat.h" /** * struct nilfs_btree_path - A path on which B-tree operations are executed @@ -109,8 +110,7 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree, level < NILFS_BTREE_LEVEL_MAX; level++) { if (path[level].bp_bh != NULL) { - nilfs_bmap_put_block(&btree->bt_bmap, - path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = NULL; } /* sib_bh is released or deleted by prepare or commit @@ -123,10 +123,29 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree, } } - /* * B-tree node operations */ +static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, + struct buffer_head **bhp) +{ + struct address_space *btnc = + &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; + return nilfs_btnode_get(btnc, ptr, 0, bhp, 0); +} + +static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, + __u64 ptr, struct buffer_head **bhp) +{ + struct address_space *btnc = + &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; + int ret; + + ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1); + if (!ret) + set_buffer_nilfs_volatile(*bhp); + return ret; +} static inline int nilfs_btree_node_get_flags(const struct nilfs_btree *btree, @@ -488,8 +507,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, path[level].bp_index = index; for (level--; level >= minlevel; level--) { - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, - &path[level].bp_bh); + ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(btree, path, level); @@ -535,8 +553,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, path[level].bp_index = index; for (level--; level > 0; level--) { - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, - &path[level].bp_bh); + ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(btree, path, level); @@ -579,6 +596,87 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, return ret; } +static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, + __u64 key, __u64 *ptrp, unsigned maxblocks) +{ + struct nilfs_btree *btree = (struct nilfs_btree *)bmap; + struct nilfs_btree_path *path; + struct nilfs_btree_node *node; + struct inode *dat = NULL; + __u64 ptr, ptr2; + sector_t blocknr; + int level = NILFS_BTREE_LEVEL_NODE_MIN; + int ret, cnt, index, maxlevel; + + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); + if (ret < 0) + goto out; + + if (NILFS_BMAP_USE_VBN(bmap)) { + dat = nilfs_bmap_get_dat(bmap); + ret = nilfs_dat_translate(dat, ptr, &blocknr); + if (ret < 0) + goto out; + ptr = blocknr; + } + cnt = 1; + if (cnt == maxblocks) + goto end; + + maxlevel = nilfs_btree_height(btree) - 1; + node = nilfs_btree_get_node(btree, path, level); + index = path[level].bp_index + 1; + for (;;) { + while (index < nilfs_btree_node_get_nchildren(btree, node)) { + if (nilfs_btree_node_get_key(btree, node, index) != + key + cnt) + goto end; + ptr2 = nilfs_btree_node_get_ptr(btree, node, index); + if (dat) { + ret = nilfs_dat_translate(dat, ptr2, &blocknr); + if (ret < 0) + goto out; + ptr2 = blocknr; + } + if (ptr2 != ptr + cnt || ++cnt == maxblocks) + goto end; + index++; + continue; + } + if (level == maxlevel) + break; + + /* look-up right sibling node */ + node = nilfs_btree_get_node(btree, path, level + 1); + index = path[level + 1].bp_index + 1; + if (index >= nilfs_btree_node_get_nchildren(btree, node) || + nilfs_btree_node_get_key(btree, node, index) != key + cnt) + break; + ptr2 = nilfs_btree_node_get_ptr(btree, node, index); + path[level + 1].bp_index = index; + + brelse(path[level].bp_bh); + path[level].bp_bh = NULL; + ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); + if (ret < 0) + goto out; + node = nilfs_btree_get_nonroot_node(btree, path, level); + index = 0; + path[level].bp_index = index; + } + end: + *ptrp = ptr; + ret = cnt; + out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + return ret; +} + static void nilfs_btree_promote_key(struct nilfs_btree *btree, struct nilfs_btree_path *path, int level, __u64 key) @@ -669,13 +767,13 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, nilfs_btree_node_get_key(btree, node, 0)); if (move) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += lnchildren; path[level + 1].bp_index--; } else { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index -= n; } @@ -722,14 +820,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, path[level + 1].bp_index--; if (move) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index -= nilfs_btree_node_get_nchildren(btree, node); path[level + 1].bp_index++; } else { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } @@ -781,7 +879,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, *keyp = nilfs_btree_node_get_key(btree, right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; } else { @@ -790,7 +888,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, *keyp = nilfs_btree_node_get_key(btree, right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } @@ -897,12 +995,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ - if (btree->bt_ops->btop_find_target != NULL) + if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) path[level].bp_newreq.bpr_ptr = - btree->bt_ops->btop_find_target(btree, path, key); + nilfs_btree_find_target_v(btree, path, key); - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); if (ret < 0) goto err_out_data; @@ -924,8 +1022,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, if (pindex > 0) { sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex - 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -936,7 +1033,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; goto out; } else - nilfs_bmap_put_block(&btree->bt_bmap, bh); + brelse(bh); } /* right sibling */ @@ -944,8 +1041,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, nilfs_btree_node_get_nchildren(btree, parent) - 1) { sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex + 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -956,19 +1052,19 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; goto out; } else - nilfs_bmap_put_block(&btree->bt_bmap, bh); + brelse(bh); } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); if (ret < 0) goto err_out_child_node; - ret = nilfs_bmap_get_new_block(&btree->bt_bmap, - path[level].bp_newreq.bpr_ptr, - &bh); + ret = nilfs_btree_get_new_block(btree, + path[level].bp_newreq.bpr_ptr, + &bh); if (ret < 0) goto err_out_curr_node; @@ -994,12 +1090,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); if (ret < 0) goto err_out_child_node; - ret = nilfs_bmap_get_new_block(&btree->bt_bmap, - path[level].bp_newreq.bpr_ptr, &bh); + ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, + &bh); if (ret < 0) goto err_out_curr_node; @@ -1023,18 +1119,16 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* error */ err_out_curr_node: - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq); + nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( - &btree->bt_bmap, &path[level].bp_newreq); + nilfs_btnode_delete(path[level].bp_sib_bh); + nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); } - btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq); + nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); err_out_data: *levelp = level; stats->bs_nblocks = 0; @@ -1049,14 +1143,12 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree, set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; - if (btree->bt_ops->btop_set_target != NULL) - btree->bt_ops->btop_set_target(btree, key, ptr); + if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) + nilfs_btree_set_target_v(btree, key, ptr); for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { - btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( - &btree->bt_bmap, &path[level - 1].bp_newreq); - } + nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, + &path[level - 1].bp_newreq); path[level].bp_op(btree, path, level, &key, &ptr); } @@ -1153,7 +1245,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(btree, node, 0)); - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index += n; } @@ -1192,7 +1284,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, nilfs_btree_node_get_key(btree, right, 0)); path[level + 1].bp_index--; - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } @@ -1221,7 +1313,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, unlock_buffer(path[level].bp_bh); unlock_buffer(path[level].bp_sib_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); @@ -1252,7 +1344,7 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, unlock_buffer(path[level].bp_bh); unlock_buffer(path[level].bp_sib_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); + nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; } @@ -1276,7 +1368,7 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, nilfs_btree_node_move_left(btree, root, child, n); unlock_buffer(path[level].bp_bh); - nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } @@ -1300,12 +1392,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); - if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); - if (ret < 0) - goto err_out_child_node; - } + ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); + if (ret < 0) + goto err_out_child_node; if (nilfs_btree_node_get_nchildren(btree, node) > nilfs_btree_node_nchildren_min(btree, node)) { @@ -1321,8 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* left sibling */ sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex - 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -1343,8 +1432,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* right sibling */ sibptr = nilfs_btree_node_get_ptr(btree, parent, pindex + 1); - ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, - &bh); + ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; @@ -1381,12 +1469,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); - if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); - if (ret < 0) - goto err_out_child_node; - } + + ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); + if (ret < 0) + goto err_out_child_node; + /* child of the root node is deleted */ path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; @@ -1398,15 +1486,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* error */ err_out_curr_node: - if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_abort_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { - nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); - if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_abort_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + brelse(path[level].bp_sib_bh); + nilfs_bmap_abort_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); } *levelp = level; stats->bs_nblocks = 0; @@ -1420,9 +1505,8 @@ static void nilfs_btree_commit_delete(struct nilfs_btree *btree, int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) - btree->bt_bmap.b_pops->bpop_commit_end_ptr( - &btree->bt_bmap, &path[level].bp_oldreq); + nilfs_bmap_commit_end_ptr(&btree->bt_bmap, + &path[level].bp_oldreq); path[level].bp_op(btree, path, level, NULL, NULL); } @@ -1501,7 +1585,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); - ret = nilfs_bmap_get_block(bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; @@ -1515,9 +1599,9 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; if (bh != NULL) - nilfs_bmap_put_block(bmap, bh); + brelse(bh); - return (maxkey == key) && (nextmaxkey < bmap->b_low); + return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, @@ -1542,7 +1626,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, nchildren = nilfs_btree_node_get_nchildren(btree, root); WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); - ret = nilfs_bmap_get_block(bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; @@ -1563,7 +1647,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, } if (bh != NULL) - nilfs_bmap_put_block(bmap, bh); + brelse(bh); return nitems; } @@ -1584,10 +1668,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* for data */ /* cannot find near ptr */ - if (btree->bt_ops->btop_find_target != NULL) - dreq->bpr_ptr - = btree->bt_ops->btop_find_target(btree, NULL, key); - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); + if (NILFS_BMAP_USE_VBN(bmap)) + dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); + + ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq); if (ret < 0) return ret; @@ -1595,11 +1679,11 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; - ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); + ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq); if (ret < 0) goto err_out_dreq; - ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); + ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); if (ret < 0) goto err_out_nreq; @@ -1612,9 +1696,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* error */ err_out_nreq: - bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); + nilfs_bmap_abort_alloc_ptr(bmap, nreq); err_out_dreq: - bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); + nilfs_bmap_abort_alloc_ptr(bmap, dreq); stats->bs_nblocks = 0; return ret; @@ -1624,7 +1708,7 @@ static void nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, - int n, __u64 low, __u64 high, + int n, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) @@ -1642,12 +1726,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, /* convert and insert */ btree = (struct nilfs_btree *)bmap; - nilfs_btree_init(bmap, low, high); + nilfs_btree_init(bmap); if (nreq != NULL) { - if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { - bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); - bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq); - } + nilfs_bmap_commit_alloc_ptr(bmap, dreq); + nilfs_bmap_commit_alloc_ptr(bmap, nreq); /* create child node at level 1 */ lock_buffer(bh); @@ -1661,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_bmap_set_dirty(bmap); unlock_buffer(bh); - nilfs_bmap_put_block(bmap, bh); + brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); @@ -1669,8 +1751,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 2, 1, &keys[0], &tmpptr); } else { - if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) - bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); + nilfs_bmap_commit_alloc_ptr(bmap, dreq); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); @@ -1682,8 +1763,8 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_bmap_set_dirty(bmap); } - if (btree->bt_ops->btop_set_target != NULL) - btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); + if (NILFS_BMAP_USE_VBN(bmap)) + nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr); } /** @@ -1694,13 +1775,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, * @keys: * @ptrs: * @n: - * @low: - * @high: */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr, - const __u64 *keys, const __u64 *ptrs, - int n, __u64 low, __u64 high) + const __u64 *keys, const __u64 *ptrs, int n) { struct buffer_head *bh; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; @@ -1725,7 +1803,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, if (ret < 0) return ret; nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, - low, high, di, ni, bh); + di, ni, bh); nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); return 0; } @@ -1754,9 +1832,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, nilfs_btree_node_get_ptr(btree, parent, path[level + 1].bp_index); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; - ret = nilfs_bmap_prepare_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); if (ret < 0) return ret; @@ -1768,9 +1846,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, &path[level].bp_ctxt); if (ret < 0) { - nilfs_bmap_abort_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_bmap_abort_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); return ret; } } @@ -1784,9 +1862,9 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, { struct nilfs_btree_node *parent; - nilfs_bmap_commit_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_bmap_commit_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( @@ -1805,9 +1883,9 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, struct nilfs_btree_path *path, int level) { - nilfs_bmap_abort_update(&btree->bt_bmap, - &path[level].bp_oldreq, - &path[level].bp_newreq); + nilfs_bmap_abort_update_v(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, @@ -1930,7 +2008,9 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, goto out; } - ret = btree->bt_ops->btop_propagate(btree, path, level, bh); + ret = NILFS_BMAP_USE_VBN(bmap) ? + nilfs_btree_propagate_v(btree, path, level, bh) : + nilfs_btree_propagate_p(btree, path, level, bh); out: nilfs_btree_clear_path(btree, path); @@ -2066,12 +2146,9 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, ptr = nilfs_btree_node_get_ptr(btree, parent, path[level + 1].bp_index); req.bpr_ptr = ptr; - ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, - &req); - if (ret < 0) + ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr); + if (unlikely(ret < 0)) return ret; - btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap, - &req, blocknr); key = nilfs_btree_node_get_key(btree, parent, path[level + 1].bp_index); @@ -2114,8 +2191,9 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, goto out; } - ret = btree->bt_ops->btop_assign(btree, path, level, bh, - blocknr, binfo); + ret = NILFS_BMAP_USE_VBN(bmap) ? + nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : + nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); out: nilfs_btree_clear_path(btree, path); @@ -2171,7 +2249,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) WARN_ON(ret == -ENOENT); goto out; } - ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); + ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; @@ -2179,7 +2257,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) if (!buffer_dirty(bh)) nilfs_btnode_mark_dirty(bh); - nilfs_bmap_put_block(&btree->bt_bmap, bh); + brelse(bh); if (!nilfs_bmap_dirty(&btree->bt_bmap)) nilfs_bmap_set_dirty(&btree->bt_bmap); @@ -2191,6 +2269,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_lookup = nilfs_btree_lookup, + .bop_lookup_contig = nilfs_btree_lookup_contig, .bop_insert = nilfs_btree_insert, .bop_delete = nilfs_btree_delete, .bop_clear = NULL, @@ -2210,6 +2289,7 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = { static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_lookup = NULL, + .bop_lookup_contig = NULL, .bop_insert = NULL, .bop_delete = NULL, .bop_clear = NULL, @@ -2227,43 +2307,13 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_gather_data = NULL, }; -static const struct nilfs_btree_operations nilfs_btree_ops_v = { - .btop_find_target = nilfs_btree_find_target_v, - .btop_set_target = nilfs_btree_set_target_v, - .btop_propagate = nilfs_btree_propagate_v, - .btop_assign = nilfs_btree_assign_v, -}; - -static const struct nilfs_btree_operations nilfs_btree_ops_p = { - .btop_find_target = NULL, - .btop_set_target = NULL, - .btop_propagate = nilfs_btree_propagate_p, - .btop_assign = nilfs_btree_assign_p, -}; - -int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +int nilfs_btree_init(struct nilfs_bmap *bmap) { - struct nilfs_btree *btree; - - btree = (struct nilfs_btree *)bmap; bmap->b_ops = &nilfs_btree_ops; - bmap->b_low = low; - bmap->b_high = high; - switch (bmap->b_inode->i_ino) { - case NILFS_DAT_INO: - btree->bt_ops = &nilfs_btree_ops_p; - break; - default: - btree->bt_ops = &nilfs_btree_ops_v; - break; - } - return 0; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { - bmap->b_low = NILFS_BMAP_LARGE_LOW; - bmap->b_high = NILFS_BMAP_LARGE_HIGH; bmap->b_ops = &nilfs_btree_ops_gc; } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 4766deb52fb1..0e72bbbc6b64 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -34,28 +34,6 @@ struct nilfs_btree; struct nilfs_btree_path; /** - * struct nilfs_btree_operations - B-tree operation table - */ -struct nilfs_btree_operations { - __u64 (*btop_find_target)(const struct nilfs_btree *, - const struct nilfs_btree_path *, __u64); - void (*btop_set_target)(struct nilfs_btree *, __u64, __u64); - - struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *); - - int (*btop_propagate)(struct nilfs_btree *, - struct nilfs_btree_path *, - int, - struct buffer_head *); - int (*btop_assign)(struct nilfs_btree *, - struct nilfs_btree_path *, - int, - struct buffer_head **, - sector_t, - union nilfs_binfo *); -}; - -/** * struct nilfs_btree_node - B-tree node * @bn_flags: flags * @bn_level: level @@ -80,13 +58,9 @@ struct nilfs_btree_node { /** * struct nilfs_btree - B-tree structure * @bt_bmap: bmap base structure - * @bt_ops: B-tree operation table */ struct nilfs_btree { struct nilfs_bmap bt_bmap; - - /* B-tree-specific members */ - const struct nilfs_btree_operations *bt_ops; }; @@ -108,10 +82,9 @@ struct nilfs_btree { int nilfs_btree_path_cache_init(void); void nilfs_btree_path_cache_destroy(void); -int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_btree_init(struct nilfs_bmap *); int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, - const __u64 *, const __u64 *, - int, __u64, __u64); + const __u64 *, const __u64 *, int); void nilfs_btree_init_gc(struct nilfs_bmap *); #endif /* _NILFS_BTREE_H */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 300f1cdfa862..7d49813f66d6 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -295,10 +295,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, return -EINVAL; } - /* cannot delete the latest checkpoint */ - if (start == nilfs_mdt_cno(cpfile) - 1) - return -EPERM; - down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); @@ -384,9 +380,10 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, } static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { struct nilfs_checkpoint *cp; + struct nilfs_cpinfo *ci = buf; struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; @@ -410,17 +407,22 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, kaddr = kmap_atomic(bh->b_page, KM_USER0); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { - if (!nilfs_checkpoint_invalid(cp)) - nilfs_cpfile_checkpoint_to_cpinfo( - cpfile, cp, &ci[n++]); + if (!nilfs_checkpoint_invalid(cp)) { + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, + ci); + ci = (void *)ci + cisz; + n++; + } } kunmap_atomic(kaddr, KM_USER0); brelse(bh); } ret = n; - if (n > 0) - *cnop = ci[n - 1].ci_cno + 1; + if (n > 0) { + ci = (void *)ci - cisz; + *cnop = ci->ci_cno + 1; + } out: up_read(&NILFS_MDT(cpfile)->mi_sem); @@ -428,11 +430,12 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { struct buffer_head *bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; + struct nilfs_cpinfo *ci = buf; __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; void *kaddr; @@ -472,7 +475,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, if (unlikely(nilfs_checkpoint_invalid(cp) || !nilfs_checkpoint_snapshot(cp))) break; - nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, ci); + ci = (void *)ci + cisz; + n++; next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); if (next == 0) break; /* reach end of the snapshot list */ @@ -511,13 +516,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, */ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, - struct nilfs_cpinfo *ci, size_t nci) + void *buf, unsigned cisz, size_t nci) { switch (mode) { case NILFS_CHECKPOINT: - return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); + return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, buf, cisz, nci); case NILFS_SNAPSHOT: - return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); + return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, buf, cisz, nci); default: return -EINVAL; } @@ -533,20 +538,14 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) struct nilfs_cpinfo ci; __u64 tcno = cno; ssize_t nci; - int ret; - nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); + nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, sizeof(ci), 1); if (nci < 0) return nci; else if (nci == 0 || ci.ci_cno != cno) return -ENOENT; - - /* cannot delete the latest checkpoint nor snapshots */ - ret = nilfs_cpinfo_snapshot(&ci); - if (ret < 0) - return ret; - else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1) - return -EPERM; + else if (nilfs_cpinfo_snapshot(&ci)) + return -EBUSY; return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); } @@ -864,11 +863,11 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) case NILFS_CHECKPOINT: /* * Check for protecting existing snapshot mounts: - * bd_mount_sem is used to make this operation atomic and + * ns_mount_mutex is used to make this operation atomic and * exclusive with a new mount job. Though it doesn't cover * umount, it's enough for the purpose. */ - down(&nilfs->ns_bdev->bd_mount_sem); + mutex_lock(&nilfs->ns_mount_mutex); if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { /* Current implementation does not have to protect plain read-only mounts since they are exclusive @@ -877,7 +876,7 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) ret = -EBUSY; } else ret = nilfs_cpfile_clear_snapshot(cpfile, cno); - up(&nilfs->ns_bdev->bd_mount_sem); + mutex_unlock(&nilfs->ns_mount_mutex); return ret; case NILFS_SNAPSHOT: return nilfs_cpfile_set_snapshot(cpfile, cno); diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 1a8a1008c342..788a45950197 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -39,7 +39,7 @@ int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); int nilfs_cpfile_is_snapshot(struct inode *, __u64); int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); -ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, - struct nilfs_cpinfo *, size_t); +ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, + size_t); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index bb8a5818e7f1..0b2710e2d565 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -92,21 +92,6 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) nilfs_palloc_abort_alloc_entry(dat, req); } -int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req) -{ - int ret; - - ret = nilfs_palloc_prepare_free_entry(dat, req); - if (ret < 0) - return ret; - ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - nilfs_palloc_abort_free_entry(dat, req); - return ret; - } - return 0; -} - void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; @@ -391,36 +376,37 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) return ret; } -ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, +ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, size_t nvi) { struct buffer_head *entry_bh; struct nilfs_dat_entry *entry; + struct nilfs_vinfo *vinfo = buf; __u64 first, last; void *kaddr; unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; int i, j, n, ret; for (i = 0; i < nvi; i += n) { - ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, + ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr, 0, &entry_bh); if (ret < 0) return ret; kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); /* last virtual block number in this block */ - first = vinfo[i].vi_vblocknr; + first = vinfo->vi_vblocknr; do_div(first, entries_per_block); first *= entries_per_block; last = first + entries_per_block - 1; for (j = i, n = 0; - j < nvi && vinfo[j].vi_vblocknr >= first && - vinfo[j].vi_vblocknr <= last; - j++, n++) { + j < nvi && vinfo->vi_vblocknr >= first && + vinfo->vi_vblocknr <= last; + j++, n++, vinfo = (void *)vinfo + visz) { entry = nilfs_palloc_block_get_entry( - dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); - vinfo[j].vi_start = le64_to_cpu(entry->de_start); - vinfo[j].vi_end = le64_to_cpu(entry->de_end); - vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); + dat, vinfo->vi_vblocknr, entry_bh, kaddr); + vinfo->vi_start = le64_to_cpu(entry->de_start); + vinfo->vi_end = le64_to_cpu(entry->de_end); + vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } kunmap_atomic(kaddr, KM_USER0); brelse(entry_bh); diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d9560654a4b7..d328b81eead4 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -47,6 +47,6 @@ void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); int nilfs_dat_mark_dirty(struct inode *, __u64); int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); -ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); +ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index c6379e482781..342d9765df8d 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -25,6 +25,7 @@ #include "page.h" #include "direct.h" #include "alloc.h" +#include "dat.h" static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) { @@ -62,6 +63,47 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, return 0; } +static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, + __u64 key, __u64 *ptrp, + unsigned maxblocks) +{ + struct nilfs_direct *direct = (struct nilfs_direct *)bmap; + struct inode *dat = NULL; + __u64 ptr, ptr2; + sector_t blocknr; + int ret, cnt; + + if (key > NILFS_DIRECT_KEY_MAX || + (ptr = nilfs_direct_get_ptr(direct, key)) == + NILFS_BMAP_INVALID_PTR) + return -ENOENT; + + if (NILFS_BMAP_USE_VBN(bmap)) { + dat = nilfs_bmap_get_dat(bmap); + ret = nilfs_dat_translate(dat, ptr, &blocknr); + if (ret < 0) + return ret; + ptr = blocknr; + } + + maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1); + for (cnt = 1; cnt < maxblocks && + (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) != + NILFS_BMAP_INVALID_PTR; + cnt++) { + if (dat) { + ret = nilfs_dat_translate(dat, ptr2, &blocknr); + if (ret < 0) + return ret; + ptr2 = blocknr; + } + if (ptr2 != ptr + cnt) + break; + } + *ptrp = ptr; + return cnt; +} + static __u64 nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) { @@ -90,10 +132,9 @@ static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, { int ret; - if (direct->d_ops->dop_find_target != NULL) - req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); - ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, - req); + if (NILFS_BMAP_USE_VBN(&direct->d_bmap)) + req->bpr_ptr = nilfs_direct_find_target_v(direct, key); + ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req); if (ret < 0) return ret; @@ -111,16 +152,14 @@ static void nilfs_direct_commit_insert(struct nilfs_direct *direct, bh = (struct buffer_head *)((unsigned long)ptr); set_buffer_nilfs_volatile(bh); - if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) - direct->d_bmap.b_pops->bpop_commit_alloc_ptr( - &direct->d_bmap, req); + nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req); nilfs_direct_set_ptr(direct, key, req->bpr_ptr); if (!nilfs_bmap_dirty(&direct->d_bmap)) nilfs_bmap_set_dirty(&direct->d_bmap); - if (direct->d_ops->dop_set_target != NULL) - direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); + if (NILFS_BMAP_USE_VBN(&direct->d_bmap)) + nilfs_direct_set_target_v(direct, key, req->bpr_ptr); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) @@ -152,25 +191,18 @@ static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, { int ret; - if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - req->bpr_ptr = nilfs_direct_get_ptr(direct, key); - ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( - &direct->d_bmap, req); - if (ret < 0) - return ret; - } - - stats->bs_nblocks = 1; - return 0; + req->bpr_ptr = nilfs_direct_get_ptr(direct, key); + ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req); + if (!ret) + stats->bs_nblocks = 1; + return ret; } static void nilfs_direct_commit_delete(struct nilfs_direct *direct, union nilfs_bmap_ptr_req *req, __u64 key) { - if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) - direct->d_bmap.b_pops->bpop_commit_end_ptr( - &direct->d_bmap, req); + nilfs_bmap_commit_end_ptr(&direct->d_bmap, req); nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); } @@ -244,8 +276,7 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, } int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, - __u64 key, __u64 *keys, __u64 *ptrs, - int n, __u64 low, __u64 high) + __u64 key, __u64 *keys, __u64 *ptrs, int n) { struct nilfs_direct *direct; __le64 *dptrs; @@ -275,8 +306,7 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, dptrs[i] = NILFS_BMAP_INVALID_PTR; } - nilfs_direct_init(bmap, low, high); - + nilfs_direct_init(bmap); return 0; } @@ -293,11 +323,11 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct, if (!buffer_nilfs_volatile(bh)) { oldreq.bpr_ptr = ptr; newreq.bpr_ptr = ptr; - ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, - &newreq); + ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq, + &newreq); if (ret < 0) return ret; - nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); + nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq); set_buffer_nilfs_volatile(bh); nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); } else @@ -309,12 +339,10 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct, static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, struct buffer_head *bh) { - struct nilfs_direct *direct; + struct nilfs_direct *direct = (struct nilfs_direct *)bmap; - direct = (struct nilfs_direct *)bmap; - return (direct->d_ops->dop_propagate != NULL) ? - direct->d_ops->dop_propagate(direct, bh) : - 0; + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_direct_propagate_v(direct, bh) : 0; } static int nilfs_direct_assign_v(struct nilfs_direct *direct, @@ -327,12 +355,9 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct, int ret; req.bpr_ptr = ptr; - ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( - &direct->d_bmap, &req); - if (ret < 0) + ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr); + if (unlikely(ret < 0)) return ret; - direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap, - &req, blocknr); binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); @@ -377,12 +402,14 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, return -EINVAL; } - return direct->d_ops->dop_assign(direct, key, ptr, bh, - blocknr, binfo); + return NILFS_BMAP_USE_VBN(bmap) ? + nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) : + nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo); } static const struct nilfs_bmap_operations nilfs_direct_ops = { .bop_lookup = nilfs_direct_lookup, + .bop_lookup_contig = nilfs_direct_lookup_contig, .bop_insert = nilfs_direct_insert, .bop_delete = nilfs_direct_delete, .bop_clear = NULL, @@ -401,36 +428,8 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = { }; -static const struct nilfs_direct_operations nilfs_direct_ops_v = { - .dop_find_target = nilfs_direct_find_target_v, - .dop_set_target = nilfs_direct_set_target_v, - .dop_propagate = nilfs_direct_propagate_v, - .dop_assign = nilfs_direct_assign_v, -}; - -static const struct nilfs_direct_operations nilfs_direct_ops_p = { - .dop_find_target = NULL, - .dop_set_target = NULL, - .dop_propagate = NULL, - .dop_assign = nilfs_direct_assign_p, -}; - -int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +int nilfs_direct_init(struct nilfs_bmap *bmap) { - struct nilfs_direct *direct; - - direct = (struct nilfs_direct *)bmap; bmap->b_ops = &nilfs_direct_ops; - bmap->b_low = low; - bmap->b_high = high; - switch (bmap->b_inode->i_ino) { - case NILFS_DAT_INO: - direct->d_ops = &nilfs_direct_ops_p; - break; - default: - direct->d_ops = &nilfs_direct_ops_v; - break; - } - return 0; } diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index 45d2c5cda812..a5ffd66e25d0 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -31,18 +31,6 @@ struct nilfs_direct; /** - * struct nilfs_direct_operations - direct mapping operation table - */ -struct nilfs_direct_operations { - __u64 (*dop_find_target)(const struct nilfs_direct *, __u64); - void (*dop_set_target)(struct nilfs_direct *, __u64, __u64); - int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *); - int (*dop_assign)(struct nilfs_direct *, __u64, __u64, - struct buffer_head **, sector_t, - union nilfs_binfo *); -}; - -/** * struct nilfs_direct_node - direct node * @dn_flags: flags * @dn_pad: padding @@ -55,13 +43,9 @@ struct nilfs_direct_node { /** * struct nilfs_direct - direct mapping * @d_bmap: bmap structure - * @d_ops: direct mapping operation table */ struct nilfs_direct { struct nilfs_bmap d_bmap; - - /* direct-mapping-specific members */ - const struct nilfs_direct_operations *d_ops; }; @@ -70,9 +54,9 @@ struct nilfs_direct { #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) -int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_direct_init(struct nilfs_bmap *); int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, - __u64 *, int, __u64, __u64); + __u64 *, int); #endif /* _NILFS_DIRECT_H */ diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 19d2102b6a69..1b3c2bb20da9 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -52,8 +52,9 @@ #include "dat.h" #include "ifile.h" -static struct address_space_operations def_gcinode_aops = {}; -/* XXX need def_gcinode_iops/fops? */ +static struct address_space_operations def_gcinode_aops = { + .sync_page = block_sync_page, +}; /* * nilfs_gccache_submit_read_data() - add data buffer and submit read request diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 49ab4a49bb4f..2696d6b513b7 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -43,22 +43,23 @@ * * This function does not issue actual read request of the specified data * block. It is done by VFS. - * Bulk read for direct-io is not supported yet. (should be supported) */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) { struct nilfs_inode_info *ii = NILFS_I(inode); - unsigned long blknum = 0; + __u64 blknum = 0; int err = 0, ret; struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); + unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; - /* This exclusion control is a workaround; should be revised */ - down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); - up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - if (ret == 0) { /* found */ + down_read(&NILFS_MDT(dat)->mi_sem); + ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); + up_read(&NILFS_MDT(dat)->mi_sem); + if (ret >= 0) { /* found */ map_bh(bh_result, inode->i_sb, blknum); + if (ret > 0) + bh_result->b_size = (ret << inode->i_blkbits); goto out; } /* data block was not found */ @@ -240,7 +241,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct address_space_operations nilfs_aops = { .writepage = nilfs_writepage, .readpage = nilfs_readpage, - /* .sync_page = nilfs_sync_page, */ + .sync_page = block_sync_page, .writepages = nilfs_writepages, .set_page_dirty = nilfs_set_page_dirty, .readpages = nilfs_readpages, @@ -249,6 +250,7 @@ struct address_space_operations nilfs_aops = { /* .releasepage = nilfs_releasepage, */ .invalidatepage = block_invalidatepage, .direct_IO = nilfs_direct_IO, + .is_partially_uptodate = block_is_partially_uptodate, }; struct inode *nilfs_new_inode(struct inode *dir, int mode) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index d6759b92006f..6ea5f872e2de 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -152,7 +152,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, - nmembs); + size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -182,7 +182,8 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); + ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, size, + nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -212,7 +213,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, int ret; down_read(&nilfs->ns_segctor_sem); - ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs); up_read(&nilfs->ns_segctor_sem); return ret; } @@ -435,24 +436,6 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, return nmembs; } -static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, void *buf) -{ - size_t nmembs = argv->v_nmembs; - struct nilfs_sb_info *sbi = nilfs->ns_writer; - int ret; - - if (unlikely(!sbi)) { - /* never happens because called for a writable mount */ - WARN_ON(1); - return -EROFS; - } - ret = nilfs_segctor_add_segments_to_be_freed( - NILFS_SC(sbi), buf, nmembs); - - return (ret < 0) ? ret : nmembs; -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, struct nilfs_argv *argv, void **kbufs) { @@ -491,14 +474,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); - if (ret < 0) { - /* - * can safely abort because this operation is atomic. - */ - msg = "cannot set segments to be freed"; - goto failed; - } return 0; failed: @@ -615,7 +590,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - if (argv.v_size != membsz) + if (argv.v_size < membsz) return -EINVAL; ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index bb78745a0e30..3d3ddb3f5177 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -430,6 +430,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) static struct address_space_operations def_mdt_aops = { .writepage = nilfs_mdt_write_page, + .sync_page = block_sync_page, }; static struct inode_operations def_mdt_iops; @@ -449,7 +450,7 @@ struct inode * nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, ino_t ino, gfp_t gfp_mask) { - struct inode *inode = nilfs_alloc_inode(sb); + struct inode *inode = nilfs_alloc_inode_common(nilfs); if (!inode) return NULL; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index da6fc0bba2e5..edf6a59d9f2a 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -263,6 +263,7 @@ extern void nilfs_dirty_inode(struct inode *); extern struct dentry *nilfs_get_parent(struct dentry *); /* super.c */ +extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *); extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); extern void nilfs_error(struct super_block *, const char *, const char *, ...) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 57afa9d24061..d80cc71be749 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -28,7 +28,6 @@ #include "segment.h" #include "sufile.h" #include "page.h" -#include "seglist.h" #include "segbuf.h" /* @@ -395,6 +394,24 @@ static void dispose_recovery_list(struct list_head *head) } } +struct nilfs_segment_entry { + struct list_head list; + __u64 segnum; +}; + +static int nilfs_segment_list_add(struct list_head *head, __u64 segnum) +{ + struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); + + if (unlikely(!ent)) + return -ENOMEM; + + ent->segnum = segnum; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, head); + return 0; +} + void nilfs_dispose_segment_list(struct list_head *head) { while (!list_empty(head)) { @@ -402,7 +419,7 @@ void nilfs_dispose_segment_list(struct list_head *head) = list_entry(head->next, struct nilfs_segment_entry, list); list_del(&ent->list); - nilfs_free_segment_entry(ent); + kfree(ent); } } @@ -431,12 +448,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, if (unlikely(err)) goto failed; - err = -ENOMEM; for (i = 1; i < 4; i++) { - ent = nilfs_alloc_segment_entry(segnum[i]); - if (unlikely(!ent)) + err = nilfs_segment_list_add(head, segnum[i]); + if (unlikely(err)) goto failed; - list_add_tail(&ent->list, head); } /* @@ -450,7 +465,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, goto failed; } list_del(&ent->list); - nilfs_free_segment_entry(ent); + kfree(ent); } /* Allocate new segments for recovery */ @@ -791,7 +806,6 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, u64 seg_seq; __u64 segnum, nextnum = 0; __u64 cno; - struct nilfs_segment_entry *ent; LIST_HEAD(segments); int empty_seg = 0, scan_newer = 0; int ret; @@ -892,12 +906,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (empty_seg++) goto super_root_found; /* found a valid super root */ - ent = nilfs_alloc_segment_entry(segnum); - if (unlikely(!ent)) { - ret = -ENOMEM; + ret = nilfs_segment_list_add(&segments, segnum); + if (unlikely(ret)) goto failed; - } - list_add_tail(&ent->list, &segments); seg_seq++; segnum = nextnum; diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index adccd4fc654e..0776ccc2504a 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h @@ -60,6 +60,7 @@ struct nilfs_sb_info { struct super_block *s_super; /* reverse pointer to super_block */ struct the_nilfs *s_nilfs; struct list_head s_list; /* list head for nilfs->ns_supers */ + atomic_t s_count; /* reference count */ /* Segment constructor */ struct list_head s_dirty_files; /* dirty files list */ diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1e68821b4a9b..9e3fe17bb96b 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -26,7 +26,6 @@ #include <linux/crc32.h> #include "page.h" #include "segbuf.h" -#include "seglist.h" static struct kmem_cache *nilfs_segbuf_cachep; @@ -394,7 +393,7 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= (1 << BIO_RW_SYNCIO); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); res = nilfs_submit_seg_bio(wi, rw); if (unlikely(res)) goto failed_bio; diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h deleted file mode 100644 index d39df9144e99..000000000000 --- a/fs/nilfs2/seglist.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * seglist.h - expediential structure and routines to handle list of segments - * (would be removed in a future release) - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Ryusuke Konishi <ryusuke@osrg.net> - * - */ -#ifndef _NILFS_SEGLIST_H -#define _NILFS_SEGLIST_H - -#include <linux/fs.h> -#include <linux/buffer_head.h> -#include <linux/nilfs2_fs.h> -#include "sufile.h" - -struct nilfs_segment_entry { - __u64 segnum; - -#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally. - It must be cancelled if - construction aborted */ - - unsigned flags; - struct list_head list; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; -}; - - -void nilfs_dispose_segment_list(struct list_head *); - -static inline struct nilfs_segment_entry * -nilfs_alloc_segment_entry(__u64 segnum) -{ - struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); - - if (likely(ent)) { - ent->segnum = segnum; - ent->flags = 0; - ent->bh_su = NULL; - ent->raw_su = NULL; - INIT_LIST_HEAD(&ent->list); - } - return ent; -} - -static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent, - struct inode *sufile) -{ - return nilfs_sufile_get_segment_usage(sufile, ent->segnum, - &ent->raw_su, &ent->bh_su); -} - -static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent, - struct inode *sufile) -{ - if (!ent->bh_su) - return; - nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su); - ent->bh_su = NULL; - ent->raw_su = NULL; -} - -static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent) -{ - kfree(ent); -} - -#endif /* _NILFS_SEGLIST_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 22c7f65c2403..aa977549919e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -39,7 +39,6 @@ #include "sufile.h" #include "cpfile.h" #include "ifile.h" -#include "seglist.h" #include "segbuf.h" @@ -79,7 +78,8 @@ enum { /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ -#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) +#define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ +#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) /* Operations depending on the construction mode and file type */ struct nilfs_sc_operations { @@ -810,7 +810,7 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci) { return list_empty(&sci->sc_dirty_files) && !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && - list_empty(&sci->sc_cleaning_segments) && + sci->sc_nfreesegs == 0 && (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); } @@ -1005,44 +1005,6 @@ static void nilfs_drop_collected_inodes(struct list_head *head) } } -static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, - struct inode *sufile) - -{ - struct list_head *head = &sci->sc_cleaning_segments; - struct nilfs_segment_entry *ent; - int err; - - list_for_each_entry(ent, head, list) { - if (!(ent->flags & NILFS_SLH_FREED)) - break; - err = nilfs_sufile_cancel_free(sufile, ent->segnum); - WARN_ON(err); /* do not happen */ - ent->flags &= ~NILFS_SLH_FREED; - } -} - -static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, - struct inode *sufile) -{ - struct list_head *head = &sci->sc_cleaning_segments; - struct nilfs_segment_entry *ent; - int err; - - list_for_each_entry(ent, head, list) { - err = nilfs_sufile_free(sufile, ent->segnum); - if (unlikely(err)) - return err; - ent->flags |= NILFS_SLH_FREED; - } - return 0; -} - -static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) -{ - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); -} - static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, struct inode *inode, struct list_head *listp, @@ -1161,6 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) struct the_nilfs *nilfs = sbi->s_nilfs; struct list_head *head; struct nilfs_inode_info *ii; + size_t ndone; int err = 0; switch (sci->sc_stage.scnt) { @@ -1250,10 +1213,16 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) break; sci->sc_stage.scnt++; /* Fall through */ case NILFS_ST_SUFILE: - err = nilfs_segctor_prepare_free_segments(sci, - nilfs->ns_sufile); - if (unlikely(err)) + err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, + sci->sc_nfreesegs, &ndone); + if (unlikely(err)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, ndone, + NULL); break; + } + sci->sc_stage.flags |= NILFS_CF_SUFREED; + err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, &nilfs_sc_file_ops); if (unlikely(err)) @@ -1486,7 +1455,15 @@ static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, { if (unlikely(err)) { nilfs_segctor_free_incomplete_segments(sci, nilfs); - nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + int ret; + + ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(ret); /* do not happen */ + } } nilfs_segctor_clear_segment_buffers(sci); } @@ -1585,7 +1562,13 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) break; - nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(err); /* do not happen */ + } nilfs_segctor_clear_segment_buffers(sci); err = nilfs_segctor_extend_segments(sci, nilfs, nadd); @@ -2224,10 +2207,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_complete_write(sci); /* Commit segments */ - if (has_sr) { - nilfs_segctor_commit_free_segments(sci); + if (has_sr) nilfs_segctor_clear_metadata_dirty(sci); - } nilfs_segctor_end_construction(sci, nilfs, 0); @@ -2301,48 +2282,6 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino) /* assign bit 0 to data files */ } -int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, - __u64 *segnum, size_t nsegs) -{ - struct nilfs_segment_entry *ent; - struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; - struct inode *sufile = nilfs->ns_sufile; - LIST_HEAD(list); - __u64 *pnum; - size_t i; - int err; - - for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { - ent = nilfs_alloc_segment_entry(*pnum); - if (unlikely(!ent)) { - err = -ENOMEM; - goto failed; - } - list_add_tail(&ent->list, &list); - - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - - if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) - printk(KERN_WARNING "NILFS: unused segment is " - "requested to be cleaned (segnum=%llu)\n", - (unsigned long long)ent->segnum); - nilfs_close_segment_entry(ent, sufile); - } - list_splice(&list, sci->sc_cleaning_segments.prev); - return 0; - - failed: - nilfs_dispose_segment_list(&list); - return err; -} - -void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) -{ - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); -} - struct nilfs_segctor_wait_request { wait_queue_t wq; __u32 seq; @@ -2607,10 +2546,13 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, err = nilfs_init_gcdat_inode(nilfs); if (unlikely(err)) goto out_unlock; + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); if (unlikely(err)) goto out_unlock; + sci->sc_freesegs = kbufs[4]; + sci->sc_nfreesegs = argv[4].v_nmembs; list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); for (;;) { @@ -2629,6 +2571,8 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, } out_unlock: + sci->sc_freesegs = NULL; + sci->sc_nfreesegs = 0; nilfs_clear_gcdat_inode(nilfs); nilfs_transaction_unlock(sbi); return err; @@ -2835,7 +2779,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_cleaning_segments); INIT_LIST_HEAD(&sci->sc_copied_buffers); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; @@ -2901,9 +2844,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); } - if (!list_empty(&sci->sc_cleaning_segments)) - nilfs_dispose_segment_list(&sci->sc_cleaning_segments); - WARN_ON(!list_empty(&sci->sc_segbufs)); down_write(&sbi->s_nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 476bdd5df5be..0d2a475a741b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -90,8 +90,9 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_cleaning_segments: List of segments to be freed through construction * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data + * @sc_freesegs: array of segment numbers to be freed + * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation * @sc_dsync_start: start byte offset of data pages * @sc_dsync_end: end byte offset of data pages (inclusive) @@ -131,9 +132,11 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_cleaning_segments; struct list_head sc_copied_buffers; + __u64 *sc_freesegs; + size_t sc_nfreesegs; + struct nilfs_inode_info *sc_dsync_inode; loff_t sc_dsync_start; loff_t sc_dsync_end; @@ -225,10 +228,6 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); -extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, - __u64 *, size_t); -extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); - extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); @@ -240,5 +239,6 @@ extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, extern int nilfs_recover_logical_segments(struct the_nilfs *, struct nilfs_sb_info *, struct nilfs_recovery_info *); +extern void nilfs_dispose_segment_list(struct list_head *); #endif /* _NILFS_SEGMENT_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 98e68677f045..37994d4a59cc 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Written by Koji Sato <koji@osrg.net>. + * Rivised by Ryusuke Konishi <ryusuke@osrg.net>. */ #include <linux/kernel.h> @@ -108,6 +109,102 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, nilfs_mdt_mark_buffer_dirty(header_bh); } +/** + * nilfs_sufile_updatev - modify multiple segment usages at a time + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @create: creation flag + * @ndone: place to store number of modified segments on @segnumv + * @dofunc: primitive operation for the update + * + * Description: nilfs_sufile_updatev() repeatedly calls @dofunc + * against the given array of segments. The @dofunc is called with + * buffers of a header block and the sufile block in which the target + * segment usage entry is contained. If @ndone is given, the number + * of successfully modified segments from the head is stored in the + * place @ndone points to. + * + * Return Value: On success, zero is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - Given segment usage is in hole block (may be returned if + * @create is zero) + * + * %-EINVAL - Invalid segment usage number + */ +int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, + int create, size_t *ndone, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)) +{ + struct buffer_head *header_bh, *bh; + unsigned long blkoff, prev_blkoff; + __u64 *seg; + size_t nerr = 0, n = 0; + int ret = 0; + + if (unlikely(nsegs == 0)) + goto out; + + down_write(&NILFS_MDT(sufile)->mi_sem); + for (seg = segnumv; seg < segnumv + nsegs; seg++) { + if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING + "%s: invalid segment number: %llu\n", __func__, + (unsigned long long)*seg); + nerr++; + } + } + if (nerr > 0) { + ret = -EINVAL; + goto out_sem; + } + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + seg = segnumv; + blkoff = nilfs_sufile_get_blkoff(sufile, *seg); + ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); + if (ret < 0) + goto out_header; + + for (;;) { + dofunc(sufile, *seg, header_bh, bh); + + if (++seg >= segnumv + nsegs) + break; + prev_blkoff = blkoff; + blkoff = nilfs_sufile_get_blkoff(sufile, *seg); + if (blkoff == prev_blkoff) + continue; + + /* get different block */ + brelse(bh); + ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); + if (unlikely(ret < 0)) + goto out_header; + } + brelse(bh); + + out_header: + n = seg - segnumv; + brelse(header_bh); + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + out: + if (ndone) + *ndone = n; + return ret; +} + int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, void (*dofunc)(struct inode *, __u64, struct buffer_head *, @@ -490,7 +587,8 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * nilfs_sufile_get_suinfo - * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @si: array of suinfo + * @buf: array of suinfo + * @sisz: byte size of suinfo * @nsi: size of suinfo array * * Description: @@ -502,11 +600,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, * * %-ENOMEM - Insufficient amount of memory available. */ -ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, - struct nilfs_suinfo *si, size_t nsi) +ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, + unsigned sisz, size_t nsi) { struct buffer_head *su_bh; struct nilfs_segment_usage *su; + struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; void *kaddr; @@ -531,20 +630,22 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, if (ret != -ENOENT) goto out; /* hole */ - memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); + memset(si, 0, sisz * n); + si = (void *)si + sisz * n; continue; } kaddr = kmap_atomic(su_bh->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); - for (j = 0; j < n; j++, su = (void *)su + susz) { - si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); - si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); - si[i + j].sui_flags = le32_to_cpu(su->su_flags) & + for (j = 0; j < n; + j++, su = (void *)su + susz, si = (void *)si + sisz) { + si->sui_lastmod = le64_to_cpu(su->su_lastmod); + si->sui_nblocks = le32_to_cpu(su->su_nblocks); + si->sui_flags = le32_to_cpu(su->su_flags) & ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); if (nilfs_segment_is_active(nilfs, segnum + j)) - si[i + j].sui_flags |= + si->sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index a2e2efd4ade1..a2c4d76c3366 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -43,43 +43,27 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); -ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, +ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); +int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)); int nilfs_sufile_update(struct inode *, __u64, int, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)); -void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, - struct buffer_head *); void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, struct buffer_head *); void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); /** - * nilfs_sufile_cancel_free - - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) -{ - return nilfs_sufile_update(sufile, segnum, 0, - nilfs_sufile_do_cancel_free); -} - -/** * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file * @segnum: segment number to be freed @@ -100,6 +84,38 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) } /** + * nilfs_sufile_freev - free segments + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @ndone: place to store the number of freed segments + */ +static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv, + size_t nsegs, size_t *ndone) +{ + return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone, + nilfs_sufile_do_free); +} + +/** + * nilfs_sufile_cancel_freev - reallocate freeing segments + * @sufile: inode of segment usage file + * @segnumv: array of segment numbers + * @nsegs: size of @segnumv array + * @ndone: place to store the number of cancelled segments + * + * Return Value: On success, 0 is returned. On error, a negative error codes + * is returned. + */ +static inline int nilfs_sufile_cancel_freev(struct inode *sufile, + __u64 *segnumv, size_t nsegs, + size_t *ndone) +{ + return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone, + nilfs_sufile_do_cancel_free); +} + +/** * nilfs_sufile_set_error - mark a segment as erroneous * @sufile: inode of segment usage file * @segnum: segment number diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6989b03e97ab..ab785f85aa50 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -65,9 +65,8 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); +static void nilfs_write_super(struct super_block *sb); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -static int test_exclusive_mount(struct file_system_type *fs_type, - struct block_device *bdev, int flags); /** * nilfs_error() - report failure condition on a filesystem @@ -134,7 +133,7 @@ void nilfs_warning(struct super_block *sb, const char *function, static struct kmem_cache *nilfs_inode_cachep; -struct inode *nilfs_alloc_inode(struct super_block *sb) +struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) { struct nilfs_inode_info *ii; @@ -144,10 +143,15 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache); + nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi); return &ii->vfs_inode; } +struct inode *nilfs_alloc_inode(struct super_block *sb) +{ + return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs); +} + void nilfs_destroy_inode(struct inode *inode) { kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); @@ -315,6 +319,11 @@ static void nilfs_put_super(struct super_block *sb) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; + lock_kernel(); + + if (sb->s_dirt) + nilfs_write_super(sb); + nilfs_detach_segment_constructor(sbi); if (!(sb->s_flags & MS_RDONLY)) { @@ -323,12 +332,18 @@ static void nilfs_put_super(struct super_block *sb) nilfs_commit_super(sbi, 1); up_write(&nilfs->ns_sem); } + down_write(&nilfs->ns_super_sem); + if (nilfs->ns_current == sbi) + nilfs->ns_current = NULL; + up_write(&nilfs->ns_super_sem); nilfs_detach_checkpoint(sbi); put_nilfs(sbi->s_nilfs); sbi->s_super = NULL; sb->s_fs_info = NULL; - kfree(sbi); + nilfs_put_sbinfo(sbi); + + unlock_kernel(); } /** @@ -383,6 +398,8 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) { int err = 0; + nilfs_write_super(sb); + /* This function is called when super block should be written back */ if (wait) err = nilfs_construct_segment(sb); @@ -396,9 +413,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) struct buffer_head *bh_cp; int err; - down_write(&nilfs->ns_sem); + down_write(&nilfs->ns_super_sem); list_add(&sbi->s_list, &nilfs->ns_supers); - up_write(&nilfs->ns_sem); + up_write(&nilfs->ns_super_sem); sbi->s_ifile = nilfs_mdt_new( nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); @@ -436,9 +453,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; - down_write(&nilfs->ns_sem); + down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); - up_write(&nilfs->ns_sem); + up_write(&nilfs->ns_super_sem); return err; } @@ -450,9 +467,9 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) nilfs_mdt_clear(sbi->s_ifile); nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; - down_write(&nilfs->ns_sem); + down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); - up_write(&nilfs->ns_sem); + up_write(&nilfs->ns_super_sem); } static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) @@ -752,7 +769,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, * @silent: silent mode flag * @nilfs: the_nilfs struct * - * This function is called exclusively by bd_mount_mutex. + * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. */ static int @@ -773,6 +790,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, get_nilfs(nilfs); sbi->s_nilfs = nilfs; sbi->s_super = sb; + atomic_set(&sbi->s_count, 1); err = init_nilfs(nilfs, sbi, (char *)data); if (err) @@ -870,6 +888,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, goto failed_root; } + down_write(&nilfs->ns_super_sem); + if (!nilfs_test_opt(sbi, SNAPSHOT)) + nilfs->ns_current = sbi; + up_write(&nilfs->ns_super_sem); + return 0; failed_root: @@ -885,7 +908,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, failed_sbi: put_nilfs(nilfs); sb->s_fs_info = NULL; - kfree(sbi); + nilfs_put_sbinfo(sbi); return err; } @@ -898,6 +921,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) struct nilfs_mount_options old_opts; int err; + lock_kernel(); + + down_write(&nilfs->ns_super_sem); old_sb_flags = sb->s_flags; old_opts.mount_opt = sbi->s_mount_opt; old_opts.snapshot_cno = sbi->s_snapshot_cno; @@ -945,14 +971,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) */ - down(&sb->s_bdev->bd_mount_sem); - /* Check existing RW-mount */ - if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { + if (nilfs->ns_current && nilfs->ns_current != sbi) { printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount because a RW-mount exists.\n", + "remount because an RW-mount exists.\n", sb->s_id); err = -EBUSY; - goto rw_remount_failed; + goto restore_opts; } if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { printk(KERN_WARNING "NILFS (device %s): couldn't " @@ -960,7 +984,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) "the latest one.\n", sb->s_id); err = -EINVAL; - goto rw_remount_failed; + goto restore_opts; } sb->s_flags &= ~MS_RDONLY; nilfs_clear_opt(sbi, SNAPSHOT); @@ -968,28 +992,31 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) err = nilfs_attach_segment_constructor(sbi); if (err) - goto rw_remount_failed; + goto restore_opts; down_write(&nilfs->ns_sem); nilfs_setup_super(sbi); up_write(&nilfs->ns_sem); - up(&sb->s_bdev->bd_mount_sem); + nilfs->ns_current = sbi; } out: + up_write(&nilfs->ns_super_sem); + unlock_kernel(); return 0; - rw_remount_failed: - up(&sb->s_bdev->bd_mount_sem); restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.mount_opt; sbi->s_snapshot_cno = old_opts.snapshot_cno; + up_write(&nilfs->ns_super_sem); + unlock_kernel(); return err; } struct nilfs_super_data { struct block_device *bdev; + struct nilfs_sb_info *sbi; __u64 cno; int flags; }; @@ -1048,33 +1075,7 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data) { struct nilfs_super_data *sd = data; - return s->s_bdev == sd->bdev; -} - -static int nilfs_test_bdev_super2(struct super_block *s, void *data) -{ - struct nilfs_super_data *sd = data; - int ret; - - if (s->s_bdev != sd->bdev) - return 0; - - if (!((s->s_flags | sd->flags) & MS_RDONLY)) - return 1; /* Reuse an old R/W-mode super_block */ - - if (s->s_flags & sd->flags & MS_RDONLY) { - if (down_read_trylock(&s->s_umount)) { - ret = s->s_root && - (sd->cno == NILFS_SB(s)->s_snapshot_cno); - up_read(&s->s_umount); - /* - * This path is locked with sb_lock by sget(). - * So, drop_super() causes deadlock. - */ - return ret; - } - } - return 0; + return sd->sbi && s->s_fs_info == (void *)sd->sbi; } static int @@ -1082,8 +1083,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { struct nilfs_super_data sd; - struct super_block *s, *s2; - struct the_nilfs *nilfs = NULL; + struct super_block *s; + struct the_nilfs *nilfs; int err, need_to_close = 1; sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); @@ -1095,7 +1096,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, * much more information than normal filesystems to identify mount * instance. For snapshot mounts, not only a mount type (ro-mount * or rw-mount) but also a checkpoint number is required. - * The results are passed in sget() using nilfs_super_data. */ sd.cno = 0; sd.flags = flags; @@ -1104,64 +1104,59 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, goto failed; } - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - down(&sd.bdev->bd_mount_sem); - if (!sd.cno && - (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { - err = (err < 0) ? : -EBUSY; - goto failed_unlock; + nilfs = find_or_create_nilfs(sd.bdev); + if (!nilfs) { + err = -ENOMEM; + goto failed; } - /* - * Phase-1: search any existent instance and get the_nilfs - */ - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); - if (IS_ERR(s)) - goto error_s; - - if (!s->s_root) { - err = -ENOMEM; - nilfs = alloc_nilfs(sd.bdev); - if (!nilfs) - goto cancel_new; - } else { - struct nilfs_sb_info *sbi = NILFS_SB(s); + mutex_lock(&nilfs->ns_mount_mutex); + if (!sd.cno) { /* - * s_umount protects super_block from unmount process; - * It covers pointers of nilfs_sb_info and the_nilfs. + * Check if an exclusive mount exists or not. + * Snapshot mounts coexist with a current mount + * (i.e. rw-mount or ro-mount), whereas rw-mount and + * ro-mount are mutually exclusive. */ - nilfs = sbi->s_nilfs; - get_nilfs(nilfs); - up_write(&s->s_umount); + down_read(&nilfs->ns_super_sem); + if (nilfs->ns_current && + ((nilfs->ns_current->s_super->s_flags ^ flags) + & MS_RDONLY)) { + up_read(&nilfs->ns_super_sem); + err = -EBUSY; + goto failed_unlock; + } + up_read(&nilfs->ns_super_sem); + } - /* - * Phase-2: search specified snapshot or R/W mode super_block - */ - if (!sd.cno) - /* trying to get the latest checkpoint. */ - sd.cno = nilfs_last_cno(nilfs); + /* + * Find existing nilfs_sb_info struct + */ + sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); - s2 = sget(fs_type, nilfs_test_bdev_super2, - nilfs_set_bdev_super, &sd); - deactivate_super(s); - /* - * Although deactivate_super() invokes close_bdev_exclusive() at - * kill_block_super(). Here, s is an existent mount; we need - * one more close_bdev_exclusive() call. - */ - s = s2; - if (IS_ERR(s)) - goto error_s; + if (!sd.cno) + /* trying to get the latest checkpoint. */ + sd.cno = nilfs_last_cno(nilfs); + + /* + * Get super block instance holding the nilfs_sb_info struct. + * A new instance is allocated if no existing mount is present or + * existing instance has been unmounted. + */ + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); + if (sd.sbi) + nilfs_put_sbinfo(sd.sbi); + + if (IS_ERR(s)) { + err = PTR_ERR(s); + goto failed_unlock; } if (!s->s_root) { char b[BDEVNAME_SIZE]; + /* New superblock instance created */ s->s_flags = flags; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); @@ -1172,26 +1167,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; need_to_close = 0; - } else if (!(s->s_flags & MS_RDONLY)) { - err = -EBUSY; } - up(&sd.bdev->bd_mount_sem); + mutex_unlock(&nilfs->ns_mount_mutex); put_nilfs(nilfs); if (need_to_close) close_bdev_exclusive(sd.bdev, flags); simple_set_mnt(mnt, s); return 0; - error_s: - up(&sd.bdev->bd_mount_sem); - if (nilfs) - put_nilfs(nilfs); - close_bdev_exclusive(sd.bdev, flags); - return PTR_ERR(s); - failed_unlock: - up(&sd.bdev->bd_mount_sem); + mutex_unlock(&nilfs->ns_mount_mutex); + put_nilfs(nilfs); failed: close_bdev_exclusive(sd.bdev, flags); @@ -1199,70 +1186,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, cancel_new: /* Abandoning the newly allocated superblock */ - up(&sd.bdev->bd_mount_sem); - if (nilfs) - put_nilfs(nilfs); + mutex_unlock(&nilfs->ns_mount_mutex); + put_nilfs(nilfs); up_write(&s->s_umount); deactivate_super(s); /* * deactivate_super() invokes close_bdev_exclusive(). * We must finish all post-cleaning before this call; - * put_nilfs() and unlocking bd_mount_sem need the block device. + * put_nilfs() needs the block device. */ return err; } -static int nilfs_test_bdev_super3(struct super_block *s, void *data) -{ - struct nilfs_super_data *sd = data; - int ret; - - if (s->s_bdev != sd->bdev) - return 0; - if (down_read_trylock(&s->s_umount)) { - ret = (s->s_flags & MS_RDONLY) && s->s_root && - nilfs_test_opt(NILFS_SB(s), SNAPSHOT); - up_read(&s->s_umount); - if (ret) - return 0; /* ignore snapshot mounts */ - } - return !((sd->flags ^ s->s_flags) & MS_RDONLY); -} - -static int __false_bdev_super(struct super_block *s, void *data) -{ -#if 0 /* XXX: workaround for lock debug. This is not good idea */ - up_write(&s->s_umount); -#endif - return -EFAULT; -} - -/** - * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. - * fs_type: filesystem type - * bdev: block device - * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) - * res: pointer to an integer to store result - * - * This function must be called within a section protected by bd_mount_mutex. - */ -static int test_exclusive_mount(struct file_system_type *fs_type, - struct block_device *bdev, int flags) -{ - struct super_block *s; - struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; - - s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); - if (IS_ERR(s)) { - if (PTR_ERR(s) != -EFAULT) - return PTR_ERR(s); - return 0; /* Not found */ - } - up_write(&s->s_umount); - deactivate_super(s); - return 1; /* Found */ -} - struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a91f15b8673c..8b8889825716 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -32,9 +32,12 @@ #include "cpfile.h" #include "sufile.h" #include "dat.h" -#include "seglist.h" #include "segbuf.h" + +static LIST_HEAD(nilfs_objects); +static DEFINE_SPINLOCK(nilfs_lock); + void nilfs_set_last_segment(struct the_nilfs *nilfs, sector_t start_blocknr, u64 seq, __u64 cno) { @@ -55,7 +58,7 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, * Return Value: On success, pointer to the_nilfs is returned. * On error, NULL is returned. */ -struct the_nilfs *alloc_nilfs(struct block_device *bdev) +static struct the_nilfs *alloc_nilfs(struct block_device *bdev) { struct the_nilfs *nilfs; @@ -68,7 +71,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) atomic_set(&nilfs->ns_writer_refcount, -1); atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); + init_rwsem(&nilfs->ns_super_sem); + mutex_init(&nilfs->ns_mount_mutex); mutex_init(&nilfs->ns_writer_mutex); + INIT_LIST_HEAD(&nilfs->ns_list); INIT_LIST_HEAD(&nilfs->ns_supers); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_gc_inodes_h = NULL; @@ -78,6 +84,45 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) } /** + * find_or_create_nilfs - find or create nilfs object + * @bdev: block device to which the_nilfs is related + * + * find_nilfs() looks up an existent nilfs object created on the + * device and gets the reference count of the object. If no nilfs object + * is found on the device, a new nilfs object is allocated. + * + * Return Value: On success, pointer to the nilfs object is returned. + * On error, NULL is returned. + */ +struct the_nilfs *find_or_create_nilfs(struct block_device *bdev) +{ + struct the_nilfs *nilfs, *new = NULL; + + retry: + spin_lock(&nilfs_lock); + list_for_each_entry(nilfs, &nilfs_objects, ns_list) { + if (nilfs->ns_bdev == bdev) { + get_nilfs(nilfs); + spin_unlock(&nilfs_lock); + if (new) + put_nilfs(new); + return nilfs; /* existing object */ + } + } + if (new) { + list_add_tail(&new->ns_list, &nilfs_objects); + spin_unlock(&nilfs_lock); + return new; /* new object */ + } + spin_unlock(&nilfs_lock); + + new = alloc_nilfs(bdev); + if (new) + goto retry; + return NULL; /* insufficient memory */ +} + +/** * put_nilfs - release a reference to the_nilfs * @nilfs: the_nilfs structure to be released * @@ -86,13 +131,20 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) */ void put_nilfs(struct the_nilfs *nilfs) { - if (!atomic_dec_and_test(&nilfs->ns_count)) + spin_lock(&nilfs_lock); + if (!atomic_dec_and_test(&nilfs->ns_count)) { + spin_unlock(&nilfs_lock); return; + } + list_del_init(&nilfs->ns_list); + spin_unlock(&nilfs_lock); + /* - * Increment of ns_count never occur below because the caller + * Increment of ns_count never occurs below because the caller * of get_nilfs() holds at least one reference to the_nilfs. * Thus its exclusion control is not required here. */ + might_sleep(); if (nilfs_loaded(nilfs)) { nilfs_mdt_clear(nilfs->ns_sufile); @@ -613,13 +665,63 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs) return ret; } +/** + * nilfs_find_sbinfo - find existing nilfs_sb_info structure + * @nilfs: nilfs object + * @rw_mount: mount type (non-zero value for read/write mount) + * @cno: checkpoint number (zero for read-only mount) + * + * nilfs_find_sbinfo() returns the nilfs_sb_info structure which + * @rw_mount and @cno (in case of snapshots) matched. If no instance + * was found, NULL is returned. Although the super block instance can + * be unmounted after this function returns, the nilfs_sb_info struct + * is kept on memory until nilfs_put_sbinfo() is called. + */ +struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs, + int rw_mount, __u64 cno) +{ + struct nilfs_sb_info *sbi; + + down_read(&nilfs->ns_super_sem); + /* + * The SNAPSHOT flag and sb->s_flags are supposed to be + * protected with nilfs->ns_super_sem. + */ + sbi = nilfs->ns_current; + if (rw_mount) { + if (sbi && !(sbi->s_super->s_flags & MS_RDONLY)) + goto found; /* read/write mount */ + else + goto out; + } else if (cno == 0) { + if (sbi && (sbi->s_super->s_flags & MS_RDONLY)) + goto found; /* read-only mount */ + else + goto out; + } + + list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { + if (nilfs_test_opt(sbi, SNAPSHOT) && + sbi->s_snapshot_cno == cno) + goto found; /* snapshot mount */ + } + out: + up_read(&nilfs->ns_super_sem); + return NULL; + + found: + atomic_inc(&sbi->s_count); + up_read(&nilfs->ns_super_sem); + return sbi; +} + int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, int snapshot_mount) { struct nilfs_sb_info *sbi; int ret = 0; - down_read(&nilfs->ns_sem); + down_read(&nilfs->ns_super_sem); if (cno == 0 || cno > nilfs->ns_cno) goto out_unlock; @@ -636,6 +738,6 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, ret++; out_unlock: - up_read(&nilfs->ns_sem); + up_read(&nilfs->ns_super_sem); return ret; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 30fe58778d05..e8adbffc626f 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -43,12 +43,16 @@ enum { * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags * @ns_count: reference count + * @ns_list: list head for nilfs_list * @ns_bdev: block device * @ns_bdi: backing dev info * @ns_writer: back pointer to writable nilfs_sb_info * @ns_sem: semaphore for shared states + * @ns_super_sem: semaphore for global operations across super block instances + * @ns_mount_mutex: mutex protecting mount process of nilfs * @ns_writer_mutex: mutex protecting ns_writer attach/detach * @ns_writer_refcount: number of referrers on ns_writer + * @ns_current: back pointer to current mount * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super blocks @@ -88,15 +92,24 @@ enum { struct the_nilfs { unsigned long ns_flags; atomic_t ns_count; + struct list_head ns_list; struct block_device *ns_bdev; struct backing_dev_info *ns_bdi; struct nilfs_sb_info *ns_writer; struct rw_semaphore ns_sem; + struct rw_semaphore ns_super_sem; + struct mutex ns_mount_mutex; struct mutex ns_writer_mutex; atomic_t ns_writer_refcount; /* + * components protected by ns_super_sem + */ + struct nilfs_sb_info *ns_current; + struct list_head ns_supers; + + /* * used for * - loading the latest checkpoint exclusively. * - allocating a new full segment. @@ -108,7 +121,6 @@ struct the_nilfs { time_t ns_sbwtime[2]; unsigned ns_sbsize; unsigned ns_mount_state; - struct list_head ns_supers; /* * Following fields are dedicated to a writable FS-instance. @@ -191,11 +203,12 @@ THE_NILFS_FNS(DISCONTINUED, discontinued) #define NILFS_ALTSB_FREQ 60 /* spare superblock */ void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); -struct the_nilfs *alloc_nilfs(struct block_device *); +struct the_nilfs *find_or_create_nilfs(struct block_device *); void put_nilfs(struct the_nilfs *); int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); +struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); @@ -238,6 +251,12 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) mutex_unlock(&nilfs->ns_writer_mutex); } +static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) +{ + if (!atomic_dec_and_test(&sbi->s_count)) + kfree(sbi); +} + static inline void nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, sector_t *seg_start, sector_t *seg_end) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6aa7c4713536..abaaa1cbf8de 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -443,6 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering with remount options string: %s", opt); + + lock_kernel(); #ifndef NTFS_RW /* For read-only compiled driver, enforce read-only flag. */ *flags |= MS_RDONLY; @@ -466,15 +468,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) if (NVolErrors(vol)) { ntfs_error(sb, "Volume has errors and is read-only%s", es); + unlock_kernel(); return -EROFS; } if (vol->vol_flags & VOLUME_IS_DIRTY) { ntfs_error(sb, "Volume is dirty and read-only%s", es); + unlock_kernel(); return -EROFS; } if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { ntfs_error(sb, "Volume has been modified by chkdsk " "and is read-only%s", es); + unlock_kernel(); return -EROFS; } if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { @@ -482,11 +487,13 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) "(0x%x) and is read-only%s", (unsigned)le16_to_cpu(vol->vol_flags), es); + unlock_kernel(); return -EROFS; } if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { ntfs_error(sb, "Failed to set dirty bit in volume " "information flags%s", es); + unlock_kernel(); return -EROFS; } #if 0 @@ -506,18 +513,21 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_error(sb, "Failed to empty journal $LogFile%s", es); NVolSetErrors(vol); + unlock_kernel(); return -EROFS; } if (!ntfs_mark_quotas_out_of_date(vol)) { ntfs_error(sb, "Failed to mark quotas out of date%s", es); NVolSetErrors(vol); + unlock_kernel(); return -EROFS; } if (!ntfs_stamp_usnjrnl(vol)) { ntfs_error(sb, "Failed to stamp transation log " "($UsnJrnl)%s", es); NVolSetErrors(vol); + unlock_kernel(); return -EROFS; } } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { @@ -533,8 +543,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) // TODO: Deal with *flags. - if (!parse_options(vol, opt)) + if (!parse_options(vol, opt)) { + unlock_kernel(); return -EINVAL; + } + unlock_kernel(); ntfs_debug("Done."); return 0; } @@ -2246,6 +2259,9 @@ static void ntfs_put_super(struct super_block *sb) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering."); + + lock_kernel(); + #ifdef NTFS_RW /* * Commit all inodes while they are still open in case some of them @@ -2373,39 +2389,12 @@ static void ntfs_put_super(struct super_block *sb) vol->mftmirr_ino = NULL; } /* - * If any dirty inodes are left, throw away all mft data page cache - * pages to allow a clean umount. This should never happen any more - * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as - * the underlying mft records are written out and cleaned. If it does, - * happen anyway, we want to know... + * We should have no dirty inodes left, due to + * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as + * the underlying mft records are written out and cleaned. */ ntfs_commit_inode(vol->mft_ino); write_inode_now(vol->mft_ino, 1); - if (sb_has_dirty_inodes(sb)) { - const char *s1, *s2; - - mutex_lock(&vol->mft_ino->i_mutex); - truncate_inode_pages(vol->mft_ino->i_mapping, 0); - mutex_unlock(&vol->mft_ino->i_mutex); - write_inode_now(vol->mft_ino, 1); - if (sb_has_dirty_inodes(sb)) { - static const char *_s1 = "inodes"; - static const char *_s2 = ""; - s1 = _s1; - s2 = _s2; - } else { - static const char *_s1 = "mft pages"; - static const char *_s2 = "They have been thrown " - "away. "; - s1 = _s1; - s2 = _s2; - } - ntfs_error(sb, "Dirty %s found at umount time. %sYou should " - "run chkdsk. Please email " - "linux-ntfs-dev@lists.sourceforge.net and say " - "that you saw this message. Thank you.", s1, - s2); - } #endif /* NTFS_RW */ iput(vol->mft_ino); @@ -2444,7 +2433,8 @@ static void ntfs_put_super(struct super_block *sb) } sb->s_fs_info = NULL; kfree(vol); - return; + + unlock_kernel(); } /** diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 678a067d9251..9edcde4974aa 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -475,6 +475,12 @@ struct ocfs2_path { #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) #define path_num_items(_path) ((_path)->p_tree_depth + 1) +static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, + u32 cpos); +static void ocfs2_adjust_rightmost_records(struct inode *inode, + handle_t *handle, + struct ocfs2_path *path, + struct ocfs2_extent_rec *insert_rec); /* * Reset the actual path elements so that we can re-use the structure * to build another path. Generally, this involves freeing the buffer @@ -1013,6 +1019,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) } /* + * Change range of the branches in the right most path according to the leaf + * extent block's rightmost record. + */ +static int ocfs2_adjust_rightmost_branch(handle_t *handle, + struct inode *inode, + struct ocfs2_extent_tree *et) +{ + int status; + struct ocfs2_path *path = NULL; + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + + path = ocfs2_new_path_from_et(et); + if (!path) { + status = -ENOMEM; + return status; + } + + status = ocfs2_find_path(inode, path, UINT_MAX); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_extend_trans(handle, path_num_items(path) + + handle->h_buffer_credits); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_journal_access_path(inode, handle, path); + if (status < 0) { + mlog_errno(status); + goto out; + } + + el = path_leaf_el(path); + rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; + + ocfs2_adjust_rightmost_records(inode, handle, path, rec); + +out: + ocfs2_free_path(path); + return status; +} + +/* * Add an entire tree branch to our inode. eb_bh is the extent block * to start at, if we don't want to start the branch at the dinode * structure. @@ -1038,7 +1092,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, struct ocfs2_extent_block *eb; struct ocfs2_extent_list *eb_el; struct ocfs2_extent_list *el; - u32 new_cpos; + u32 new_cpos, root_end; mlog_entry_void(); @@ -1055,6 +1109,27 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, new_blocks = le16_to_cpu(el->l_tree_depth); + eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; + new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); + root_end = ocfs2_sum_rightmost_rec(et->et_root_el); + + /* + * If there is a gap before the root end and the real end + * of the righmost leaf block, we need to remove the gap + * between new_cpos and root_end first so that the tree + * is consistent after we add a new branch(it will start + * from new_cpos). + */ + if (root_end > new_cpos) { + mlog(0, "adjust the cluster end from %u to %u\n", + root_end, new_cpos); + status = ocfs2_adjust_rightmost_branch(handle, inode, et); + if (status) { + mlog_errno(status); + goto bail; + } + } + /* allocate the number of new eb blocks we need */ new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), GFP_KERNEL); @@ -1071,9 +1146,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, goto bail; } - eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; - new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); - /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be * linked with the rest of the tree. * conversly, new_eb_bhs[0] is the new bottommost leaf. diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 2a947c44e594..a1163b8b417c 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -22,6 +22,9 @@ #include <linux/crc32.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/fs.h> #include <asm/byteorder.h> #include <cluster/masklog.h> @@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize, ocfs2_hamming_fix(data, blocksize * 8, 0, fix); } + +/* + * Debugfs handling. + */ + +#ifdef CONFIG_DEBUG_FS + +static int blockcheck_u64_get(void *data, u64 *val) +{ + *val = *(u64 *)data; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n"); + +static struct dentry *blockcheck_debugfs_create(const char *name, + struct dentry *parent, + u64 *value) +{ + return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value, + &blockcheck_fops); +} + +static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ + if (stats) { + debugfs_remove(stats->b_debug_check); + stats->b_debug_check = NULL; + debugfs_remove(stats->b_debug_failure); + stats->b_debug_failure = NULL; + debugfs_remove(stats->b_debug_recover); + stats->b_debug_recover = NULL; + debugfs_remove(stats->b_debug_dir); + stats->b_debug_dir = NULL; + } +} + +static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) +{ + int rc = -EINVAL; + + if (!stats) + goto out; + + stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); + if (!stats->b_debug_dir) + goto out; + + stats->b_debug_check = + blockcheck_debugfs_create("blocks_checked", + stats->b_debug_dir, + &stats->b_check_count); + + stats->b_debug_failure = + blockcheck_debugfs_create("checksums_failed", + stats->b_debug_dir, + &stats->b_failure_count); + + stats->b_debug_recover = + blockcheck_debugfs_create("ecc_recoveries", + stats->b_debug_dir, + &stats->b_recover_count); + if (stats->b_debug_check && stats->b_debug_failure && + stats->b_debug_recover) + rc = 0; + +out: + if (rc) + ocfs2_blockcheck_debug_remove(stats); + return rc; +} +#else +static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) +{ + return 0; +} + +static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +/* Always-called wrappers for starting and stopping the debugfs files */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) +{ + return ocfs2_blockcheck_debug_install(stats, parent); +} + +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) +{ + ocfs2_blockcheck_debug_remove(stats); +} + +static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats) +{ + u64 new_count; + + if (!stats) + return; + + spin_lock(&stats->b_lock); + stats->b_check_count++; + new_count = stats->b_check_count; + spin_unlock(&stats->b_lock); + + if (!new_count) + mlog(ML_NOTICE, "Block check count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats) +{ + u64 new_count; + + if (!stats) + return; + + spin_lock(&stats->b_lock); + stats->b_failure_count++; + new_count = stats->b_failure_count; + spin_unlock(&stats->b_lock); + + if (!new_count) + mlog(ML_NOTICE, "Checksum failure count has wrapped\n"); +} + +static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats) +{ + u64 new_count; + + if (!stats) + return; + + spin_lock(&stats->b_lock); + stats->b_recover_count++; + new_count = stats->b_recover_count; + spin_unlock(&stats->b_lock); + + if (!new_count) + mlog(ML_NOTICE, "ECC recovery count has wrapped\n"); +} + + + +/* + * These are the low-level APIs for using the ocfs2_block_check structure. + */ + /* * This function generates check information for a block. * data is the block to be checked. bc is a pointer to the @@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize, * Again, the data passed in should be the on-disk endian. */ int ocfs2_block_check_validate(void *data, size_t blocksize, - struct ocfs2_block_check *bc) + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats) { int rc = 0; struct ocfs2_block_check check; u32 crc, ecc; + ocfs2_blockcheck_inc_check(stats); + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, if (crc == check.bc_crc32e) goto out; + ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, /* And check the crc32 again */ crc = crc32_le(~0, data, blocksize); - if (crc == check.bc_crc32e) + if (crc == check.bc_crc32e) { + ocfs2_blockcheck_inc_recover(stats); goto out; + } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, * Again, the data passed in should be the on-disk endian. */ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, - struct ocfs2_block_check *bc) + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats) { int i, rc = 0; struct ocfs2_block_check check; @@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, if (!nr) return 0; + ocfs2_blockcheck_inc_check(stats); + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); check.bc_ecc = le16_to_cpu(bc->bc_ecc); @@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, if (crc == check.bc_crc32e) goto out; + ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, /* And check the crc32 again */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == check.bc_crc32e) + if (crc == check.bc_crc32e) { + ocfs2_blockcheck_inc_recover(stats); goto out; + } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); @@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data, struct ocfs2_block_check *bc) { int rc = 0; + struct ocfs2_super *osb = OCFS2_SB(sb); - if (ocfs2_meta_ecc(OCFS2_SB(sb))) - rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc); + if (ocfs2_meta_ecc(osb)) + rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc, + &osb->osb_ecc_stats); return rc; } @@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, struct ocfs2_block_check *bc) { int rc = 0; + struct ocfs2_super *osb = OCFS2_SB(sb); - if (ocfs2_meta_ecc(OCFS2_SB(sb))) - rc = ocfs2_block_check_validate_bhs(bhs, nr, bc); + if (ocfs2_meta_ecc(osb)) + rc = ocfs2_block_check_validate_bhs(bhs, nr, bc, + &osb->osb_ecc_stats); return rc; } diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h index 70ec3feda32f..d4b69febf70a 100644 --- a/fs/ocfs2/blockcheck.h +++ b/fs/ocfs2/blockcheck.h @@ -21,6 +21,24 @@ #define OCFS2_BLOCKCHECK_H +/* Count errors and error correction from blockcheck.c */ +struct ocfs2_blockcheck_stats { + spinlock_t b_lock; + u64 b_check_count; /* Number of blocks we've checked */ + u64 b_failure_count; /* Number of failed checksums */ + u64 b_recover_count; /* Number of blocks fixed by ecc */ + + /* + * debugfs entries, used if this is passed to + * ocfs2_blockcheck_stats_debugfs_install() + */ + struct dentry *b_debug_dir; /* Parent of the debugfs files */ + struct dentry *b_debug_check; /* Exposes b_check_count */ + struct dentry *b_debug_failure; /* Exposes b_failure_count */ + struct dentry *b_debug_recover; /* Exposes b_recover_count */ +}; + + /* High level block API */ void ocfs2_compute_meta_ecc(struct super_block *sb, void *data, struct ocfs2_block_check *bc); @@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, void ocfs2_block_check_compute(void *data, size_t blocksize, struct ocfs2_block_check *bc); int ocfs2_block_check_validate(void *data, size_t blocksize, - struct ocfs2_block_check *bc); + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats); void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, struct ocfs2_block_check *bc); int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, - struct ocfs2_block_check *bc); + struct ocfs2_block_check *bc, + struct ocfs2_blockcheck_stats *stats); + +/* Debug Initialization */ +int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent); +void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats); /* * Hamming code functions diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 7e72a81bc2d4..696c32e50716 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -48,34 +48,33 @@ * only emit the appropriage printk() when the caller passes in a constant * mask, as is almost always the case. * - * All this bitmask nonsense is hidden from the /proc interface so that Joel - * doesn't have an aneurism. Reading the file gives a straight forward - * indication of which bits are on or off: - * ENTRY off - * EXIT off + * All this bitmask nonsense is managed from the files under + * /sys/fs/o2cb/logmask/. Reading the files gives a straightforward + * indication of which bits are allowed (allow) or denied (off/deny). + * ENTRY deny + * EXIT deny * TCP off * MSG off * SOCKET off - * ERROR off - * NOTICE on + * ERROR allow + * NOTICE allow * * Writing changes the state of a given bit and requires a strictly formatted * single write() call: * - * write(fd, "ENTRY on", 8); + * write(fd, "allow", 5); * - * would turn the entry bit on. "1" is also accepted in the place of "on", and - * "off" and "0" behave as expected. + * Echoing allow/deny/off string into the logmask files can flip the bits + * on or off as expected; here is the bash script for example: * - * Some trivial shell can flip all the bits on or off: + * log_mask="/sys/fs/o2cb/log_mask" + * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do + * echo allow >"$log_mask"/"$node" + * done * - * log_mask="/proc/fs/ocfs2_nodemanager/log_mask" - * cat $log_mask | ( - * while read bit status; do - * # $1 is "on" or "off", say - * echo "$bit $1" > $log_mask - * done - * ) + * The debugfs.ocfs2 tool can also flip the bits with the -l option: + * + * debugfs.ocfs2 -l TCP allow */ /* for task_struct */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9fbe849f6344..334f231a422c 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -974,7 +974,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { - int ret, error = 0; + int ret; struct o2net_msg *msg = NULL; size_t veclen, caller_bytes = 0; struct kvec *vec = NULL; @@ -1015,10 +1015,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, o2net_set_nst_sock_time(&nst); - ret = wait_event_interruptible(nn->nn_sc_wq, - o2net_tx_can_proceed(nn, &sc, &error)); - if (!ret && error) - ret = error; + wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret)); if (ret) goto out; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c5752305627c..b358f3bf896d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2900,6 +2900,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, alloc = ocfs2_clusters_for_bytes(sb, bytes); dx_alloc = 0; + down_write(&oi->ip_alloc_sem); + if (ocfs2_supports_indexed_dirs(osb)) { credits += ocfs2_add_dir_index_credits(sb); @@ -2940,8 +2942,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, goto out; } - down_write(&oi->ip_alloc_sem); - /* * Prepare for worst case allocation scenario of two separate * extents in the unindexed tree. @@ -2953,7 +2953,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_sem; + goto out; } if (vfs_dq_alloc_space_nodirty(dir, @@ -3172,10 +3172,8 @@ out_commit: ocfs2_commit_trans(osb, handle); -out_sem: - up_write(&oi->ip_alloc_sem); - out: + up_write(&oi->ip_alloc_sem); if (data_ac) ocfs2_free_alloc_context(data_ac); if (meta_ac) @@ -3322,11 +3320,15 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, brelse(new_bh); new_bh = NULL; + down_write(&OCFS2_I(dir)->ip_alloc_sem); + drop_alloc_sem = 1; dir_i_size = i_size_read(dir); credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; goto do_extend; } + down_write(&OCFS2_I(dir)->ip_alloc_sem); + drop_alloc_sem = 1; dir_i_size = i_size_read(dir); mlog(0, "extending dir %llu (i_size = %lld)\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); @@ -3370,9 +3372,6 @@ do_extend: credits++; /* For attaching the new dirent block to the * dx_root */ - down_write(&OCFS2_I(dir)->ip_alloc_sem); - drop_alloc_sem = 1; - handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); @@ -3435,10 +3434,10 @@ bail_bh: *new_de_bh = new_bh; get_bh(*new_de_bh); bail: - if (drop_alloc_sem) - up_write(&OCFS2_I(dir)->ip_alloc_sem); if (handle) ocfs2_commit_trans(osb, handle); + if (drop_alloc_sem) + up_write(&OCFS2_I(dir)->ip_alloc_sem); if (data_ac) ocfs2_free_alloc_context(data_ac); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index e15fc7d50827..6cdeaa76f27f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -248,6 +248,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; +static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { + .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, +}; + static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, @@ -637,6 +641,19 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, + struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan_lvb *lvb; + + ocfs2_lock_res_init_once(res); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); + ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, + &ocfs2_orphan_scan_lops, osb); + lvb = ocfs2_dlm_lvb(&res->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; +} + void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, struct ocfs2_file_private *fp) { @@ -2352,6 +2369,37 @@ void ocfs2_inode_unlock(struct inode *inode, mlog_exit_void(); } +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +{ + struct ocfs2_lock_res *lockres; + struct ocfs2_orphan_scan_lvb *lvb; + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + int status = 0; + + lockres = &osb->osb_orphan_scan.os_lockres; + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + if (status < 0) + return status; + + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) + *seqno = be32_to_cpu(lvb->lvb_os_seqno); + return status; +} + +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +{ + struct ocfs2_lock_res *lockres; + struct ocfs2_orphan_scan_lvb *lvb; + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + + lockres = &osb->osb_orphan_scan.os_lockres; + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; + lvb->lvb_os_seqno = cpu_to_be32(seqno); + ocfs2_cluster_unlock(osb, lockres, level); +} + int ocfs2_super_lock(struct ocfs2_super *osb, int ex) { @@ -2842,6 +2890,7 @@ local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); osb->cconn = conn; @@ -2878,6 +2927,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, ocfs2_lock_res_free(&osb->osb_super_lockres); ocfs2_lock_res_free(&osb->osb_rename_lockres); ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); + ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); ocfs2_cluster_disconnect(osb->cconn, hangup_pending); osb->cconn = NULL; @@ -3061,6 +3111,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); + ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); } int ocfs2_drop_inode_locks(struct inode *inode) diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e1fd5721cd7f..31b90d7b8f51 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -62,6 +62,14 @@ struct ocfs2_qinfo_lvb { __be32 lvb_free_entry; }; +#define OCFS2_ORPHAN_LVB_VERSION 1 + +struct ocfs2_orphan_scan_lvb { + __u8 lvb_version; + __u8 lvb_reserved[3]; + __be32 lvb_os_seqno; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -113,6 +121,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb, int ex); void ocfs2_super_unlock(struct ocfs2_super *osb, int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); + int ocfs2_rename_lock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb); int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c2a87c885b73..07267e0da909 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file, if (err) goto bail; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto bail; + journal = osb->journal->j_journal; err = jbd2_journal_force_commit(journal); @@ -894,9 +897,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; - int locked[MAXQUOTAS] = {0, 0}; - int credits, qtype; - struct ocfs2_mem_dqinfo *oinfo; + int qtype; + struct dquot *transfer_from[MAXQUOTAS] = { }; + struct dquot *transfer_to[MAXQUOTAS] = { }; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -969,30 +972,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - credits = OCFS2_INODE_UPDATE_CREDITS; + /* + * Gather pointers to quota structures so that allocation / + * freeing of quota structures happens here and not inside + * vfs_dq_transfer() where we have problems with lock ordering + */ if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) + transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, + USRQUOTA); + transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, + USRQUOTA); + if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { + status = -ESRCH; goto bail_unlock; - credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + - ocfs2_calc_qdel_credits(sb, USRQUOTA); - locked[USRQUOTA] = 1; + } } if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) + transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, + GRPQUOTA); + transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, + GRPQUOTA); + if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { + status = -ESRCH; goto bail_unlock; - credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + - ocfs2_calc_qdel_credits(sb, GRPQUOTA); - locked[GRPQUOTA] = 1; + } } - handle = ocfs2_start_trans(osb, credits); + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + + 2 * ocfs2_quota_trans_credits(sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -1030,12 +1040,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: - for (qtype = 0; qtype < MAXQUOTAS; qtype++) { - if (!locked[qtype]) - continue; - oinfo = sb_dqinfo(sb, qtype)->dqi_priv; - ocfs2_unlock_global_qf(oinfo, 1); - } ocfs2_inode_unlock(inode, 1); bail_unlock_rw: if (size_change) @@ -1043,6 +1047,12 @@ bail_unlock_rw: bail: brelse(bh); + /* Release quota pointers in case we acquired them */ + for (qtype = 0; qtype < MAXQUOTAS; qtype++) { + dqput(transfer_to[qtype]); + dqput(transfer_from[qtype]); + } + if (!status && attr->ia_valid & ATTR_MODE) { status = ocfs2_acl_chmod(inode); if (status < 0) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a20a0f1e37fd..4a3b9e6b31ad 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -28,6 +28,8 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/kthread.h> +#include <linux/time.h> +#include <linux/random.h> #define MLOG_MASK_PREFIX ML_JOURNAL #include <cluster/masklog.h> @@ -52,6 +54,8 @@ DEFINE_SPINLOCK(trans_inc_lock); +#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000 + static int ocfs2_force_read_journal(struct inode *inode); static int ocfs2_recover_node(struct ocfs2_super *osb, int node_num, int slot_num); @@ -1841,6 +1845,113 @@ bail: return status; } +/* + * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some + * randomness to the timeout to minimize multple nodes firing the timer at the + * same time. + */ +static inline unsigned long ocfs2_orphan_scan_timeout(void) +{ + unsigned long time; + + get_random_bytes(&time, sizeof(time)); + time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000); + return msecs_to_jiffies(time); +} + +/* + * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for + * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This + * is done to catch any orphans that are left over in orphan directories. + * + * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT + * seconds. It gets an EX lock on os_lockres and checks sequence number + * stored in LVB. If the sequence number has changed, it means some other + * node has done the scan. This node skips the scan and tracks the + * sequence number. If the sequence number didn't change, it means a scan + * hasn't happened. The node queues a scan and increments the + * sequence number in the LVB. + */ +void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + int status, i; + u32 seqno = 0; + + os = &osb->osb_orphan_scan; + + status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); + if (status < 0) { + if (status != -EAGAIN) + mlog_errno(status); + goto out; + } + + if (os->os_seqno != seqno) { + os->os_seqno = seqno; + goto unlock; + } + + for (i = 0; i < osb->max_slots; i++) + ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, + NULL); + /* + * We queued a recovery on orphan slots, increment the sequence + * number and update LVB so other node will skip the scan for a while + */ + seqno++; + os->os_count++; + os->os_scantime = CURRENT_TIME; +unlock: + ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); +out: + return; +} + +/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */ +void ocfs2_orphan_scan_work(struct work_struct *work) +{ + struct ocfs2_orphan_scan *os; + struct ocfs2_super *osb; + + os = container_of(work, struct ocfs2_orphan_scan, + os_orphan_scan_work.work); + osb = os->os_osb; + + mutex_lock(&os->os_lock); + ocfs2_queue_orphan_scan(osb); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + mutex_unlock(&os->os_lock); +} + +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + + os = &osb->osb_orphan_scan; + mutex_lock(&os->os_lock); + cancel_delayed_work(&os->os_orphan_scan_work); + mutex_unlock(&os->os_lock); +} + +int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + + os = &osb->osb_orphan_scan; + os->os_osb = osb; + os->os_count = 0; + os->os_scantime = CURRENT_TIME; + mutex_init(&os->os_lock); + + INIT_DELAYED_WORK(&os->os_orphan_scan_work, + ocfs2_orphan_scan_work); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + return 0; +} + struct ocfs2_orphan_filldir_priv { struct inode *head; struct ocfs2_super *osb; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index eb7b76331eb7..61045eeb3f6e 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,6 +144,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, } /* Exported only for the journal struct init code in super.c. Do not call. */ +int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); +void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); + void ocfs2_complete_recovery(struct work_struct *work); void ocfs2_wait_for_recovery(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1386281950db..18c1d9ec1c93 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -47,6 +47,9 @@ #include "ocfs2_fs.h" #include "ocfs2_lockid.h" +/* For struct ocfs2_blockcheck_stats */ +#include "blockcheck.h" + /* Most user visible OCFS2 inodes will have very few pieces of * metadata, but larger files (including bitmaps, etc) must be taken * into account when designing an access scheme. We allow a small @@ -151,6 +154,16 @@ struct ocfs2_lock_res { #endif }; +struct ocfs2_orphan_scan { + struct mutex os_lock; + struct ocfs2_super *os_osb; + struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ + struct delayed_work os_orphan_scan_work; + struct timespec os_scantime; /* time this node ran the scan */ + u32 os_count; /* tracks node specific scans */ + u32 os_seqno; /* tracks cluster wide scans */ +}; + struct ocfs2_dlm_debug { struct kref d_refcnt; struct dentry *d_locking_state; @@ -295,6 +308,7 @@ struct ocfs2_super struct ocfs2_dinode *local_alloc_copy; struct ocfs2_quota_recovery *quota_rec; + struct ocfs2_blockcheck_stats osb_ecc_stats; struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ @@ -341,6 +355,8 @@ struct ocfs2_super unsigned int *osb_orphan_wipes; wait_queue_head_t osb_wipe_event; + struct ocfs2_orphan_scan osb_orphan_scan; + /* used to protect metaecc calculation check of xattr. */ spinlock_t osb_xattr_lock; diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index a53ce87481bf..fcdba091af3d 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -48,6 +48,7 @@ enum ocfs2_lock_type { OCFS2_LOCK_TYPE_FLOCK, OCFS2_LOCK_TYPE_QINFO, OCFS2_LOCK_TYPE_NFS_SYNC, + OCFS2_LOCK_TYPE_ORPHAN_SCAN, OCFS2_NUM_LOCK_TYPES }; @@ -85,6 +86,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) case OCFS2_LOCK_TYPE_NFS_SYNC: c = 'Y'; break; + case OCFS2_LOCK_TYPE_ORPHAN_SCAN: + c = 'P'; + break; default: c = '\0'; } @@ -104,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = { [OCFS2_LOCK_TYPE_OPEN] = "Open", [OCFS2_LOCK_TYPE_FLOCK] = "Flock", [OCFS2_LOCK_TYPE_QINFO] = "Quota", + [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", }; static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 1ed0f7c86869..edfa60cd155c 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -421,6 +421,7 @@ int ocfs2_global_read_dquot(struct dquot *dquot) OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; if (!dquot->dq_off) { /* No real quota entry? */ /* Upgrade to exclusive lock for allocation */ + ocfs2_qinfo_unlock(info, 0); err = ocfs2_qinfo_lock(info, 1); if (err < 0) goto out_qlock; @@ -435,7 +436,8 @@ int ocfs2_global_read_dquot(struct dquot *dquot) out_qlock: if (ex) ocfs2_qinfo_unlock(info, 1); - ocfs2_qinfo_unlock(info, 0); + else + ocfs2_qinfo_unlock(info, 0); out: if (err < 0) mlog_errno(err); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 07deec5e9721..5a460fa82553 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -444,10 +444,6 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) - goto out; - list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { chunk = rchunk->rc_chunk; hbh = NULL; @@ -480,12 +476,18 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, type); goto out_put_bh; } + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) { + mlog_errno(status); + goto out_put_dquot; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QSYNC_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto out_put_dquot; + goto out_drop_lock; } mutex_lock(&sb_dqopt(sb)->dqio_mutex); spin_lock(&dq_data_lock); @@ -523,6 +525,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, out_commit: mutex_unlock(&sb_dqopt(sb)->dqio_mutex); ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_drop_lock: + ocfs2_unlock_global_qf(oinfo, 1); out_put_dquot: dqput(dquot); out_put_bh: @@ -537,8 +541,6 @@ out_put_bh: if (status < 0) break; } - ocfs2_unlock_global_qf(oinfo, 1); -out: if (status < 0) free_recovery_list(&(rec->r_list[type])); mlog_exit(status); @@ -655,6 +657,9 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) struct ocfs2_quota_recovery *rec; int locked = 0; + /* We don't need the lock and we have to acquire quota file locks + * which will later depend on this lock */ + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); info->dqi_maxblimit = 0x7fffffffffffffffLL; info->dqi_maxilimit = 0x7fffffffffffffffLL; oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); @@ -733,6 +738,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) goto out_err; } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); return 0; out_err: if (oinfo) { @@ -746,6 +752,7 @@ out_err: kfree(oinfo); } brelse(bh); + mutex_lock(&sb_dqopt(sb)->dqio_mutex); return -1; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5c6163f55039..d33767f17ba3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -42,6 +42,7 @@ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/quotaops.h> +#include <linux/smp_lock.h> #define MLOG_MASK_PREFIX ML_SUPER #include <cluster/masklog.h> @@ -118,15 +119,16 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb); static int ocfs2_check_volume(struct ocfs2_super *osb); static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, - u32 sectsize); + u32 sectsize, + struct ocfs2_blockcheck_stats *stats); static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, - int sector_size); + int sector_size, + struct ocfs2_blockcheck_stats *stats); static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size); -static void ocfs2_write_super(struct super_block *sb); static struct inode *ocfs2_alloc_inode(struct super_block *sb); static void ocfs2_destroy_inode(struct inode *inode); static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); @@ -141,7 +143,6 @@ static const struct super_operations ocfs2_sops = { .clear_inode = ocfs2_clear_inode, .delete_inode = ocfs2_delete_inode, .sync_fs = ocfs2_sync_fs, - .write_super = ocfs2_write_super, .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, .show_options = ocfs2_show_options, @@ -208,6 +209,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) int i; struct ocfs2_cluster_connection *cconn = osb->cconn; struct ocfs2_recovery_map *rm = osb->recovery_map; + struct ocfs2_orphan_scan *os; out += snprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", @@ -309,6 +311,13 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) i, osb->slot_recovery_generations[i]); } + os = &osb->osb_orphan_scan; + out += snprintf(buf + out, len - out, "Orphan Scan=> "); + out += snprintf(buf + out, len - out, "Local: %u Global: %u ", + os->os_count, os->os_seqno); + out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", + (get_seconds() - os->os_scantime.tv_sec)); + return out; } @@ -365,24 +374,12 @@ static struct file_operations ocfs2_osb_debug_fops = { .llseek = generic_file_llseek, }; -/* - * write_super and sync_fs ripped right out of ext3. - */ -static void ocfs2_write_super(struct super_block *sb) -{ - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; -} - static int ocfs2_sync_fs(struct super_block *sb, int wait) { int status; tid_t target; struct ocfs2_super *osb = OCFS2_SB(sb); - sb->s_dirt = 0; - if (ocfs2_is_hard_readonly(osb)) return -EROFS; @@ -595,6 +592,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); + lock_kernel(); + if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { ret = -EINVAL; goto out; @@ -698,12 +697,14 @@ unlock_osb: ocfs2_set_journal_params(osb); } out: + unlock_kernel(); return ret; } static int ocfs2_sb_probe(struct super_block *sb, struct buffer_head **bh, - int *sector_size) + int *sector_size, + struct ocfs2_blockcheck_stats *stats) { int status, tmpstat; struct ocfs1_vol_disk_hdr *hdr; @@ -769,7 +770,8 @@ static int ocfs2_sb_probe(struct super_block *sb, goto bail; } di = (struct ocfs2_dinode *) (*bh)->b_data; - status = ocfs2_verify_volume(di, *bh, blksize); + memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); + status = ocfs2_verify_volume(di, *bh, blksize, stats); if (status >= 0) goto bail; brelse(*bh); @@ -975,6 +977,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; char nodestr[8]; + struct ocfs2_blockcheck_stats stats; mlog_entry("%p, %p, %i", sb, data, silent); @@ -984,13 +987,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } /* probe for superblock */ - status = ocfs2_sb_probe(sb, &bh, §or_size); + status = ocfs2_sb_probe(sb, &bh, §or_size, &stats); if (status < 0) { mlog(ML_ERROR, "superblock probe failed!\n"); goto read_super_error; } - status = ocfs2_initialize_super(sb, bh, sector_size); + status = ocfs2_initialize_super(sb, bh, sector_size, &stats); osb = OCFS2_SB(sb); if (status < 0) { mlog_errno(status); @@ -1100,6 +1103,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } + if (ocfs2_meta_ecc(osb)) { + status = ocfs2_blockcheck_stats_debugfs_install( + &osb->osb_ecc_stats, + osb->osb_debug_root); + if (status) { + mlog(ML_ERROR, + "Unable to create blockcheck statistics " + "files\n"); + goto read_super_error; + } + } + status = ocfs2_mount_volume(sb); if (osb->root_inode) inode = igrab(osb->root_inode); @@ -1550,9 +1565,13 @@ static void ocfs2_put_super(struct super_block *sb) { mlog_entry("(0x%p)\n", sb); + lock_kernel(); + ocfs2_sync_blockdev(sb); ocfs2_dismount_volume(sb, 0); + unlock_kernel(); + mlog_exit_void(); } @@ -1766,13 +1785,8 @@ static int ocfs2_mount_volume(struct super_block *sb) } status = ocfs2_truncate_log_init(osb); - if (status < 0) { + if (status < 0) mlog_errno(status); - goto leave; - } - - if (ocfs2_mount_local(osb)) - goto leave; leave: if (unlock_super) @@ -1802,6 +1816,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_truncate_log_shutdown(osb); + ocfs2_orphan_scan_stop(osb); + /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); @@ -1839,6 +1855,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) if (osb->cconn) ocfs2_dlm_shutdown(osb, hangup_needed); + ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); debugfs_remove(osb->osb_debug_root); if (hangup_needed) @@ -1886,7 +1903,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, - int sector_size) + int sector_size, + struct ocfs2_blockcheck_stats *stats) { int status; int i, cbits, bbits; @@ -1945,6 +1963,9 @@ static int ocfs2_initialize_super(struct super_block *sb, atomic_set(&osb->alloc_stats.bg_allocs, 0); atomic_set(&osb->alloc_stats.bg_extends, 0); + /* Copy the blockcheck stats from the superblock probe */ + osb->osb_ecc_stats = *stats; + ocfs2_init_node_maps(osb); snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", @@ -1957,6 +1978,13 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } + status = ocfs2_orphan_scan_init(osb); + if (status) { + mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); + mlog_errno(status); + goto bail; + } + init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); @@ -2175,7 +2203,8 @@ bail: */ static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct buffer_head *bh, - u32 blksz) + u32 blksz, + struct ocfs2_blockcheck_stats *stats) { int status = -EAGAIN; @@ -2188,7 +2217,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, OCFS2_FEATURE_INCOMPAT_META_ECC) { status = ocfs2_block_check_validate(bh->b_data, bh->b_size, - &di->i_check); + &di->i_check, + stats); if (status) goto out; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 15631019dc63..ba320e250747 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3154,7 +3154,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); if (func) { ret = func(inode, bucket, para); - if (ret) + if (ret && ret != -ERANGE) mlog_errno(ret); /* Fall through to bucket_relse() */ } @@ -3261,7 +3261,8 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode, ocfs2_list_xattr_bucket, &xl); if (ret) { - mlog_errno(ret); + if (ret != -ERANGE) + mlog_errno(ret); goto out; } diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 834b2331f6b3..d17e774eaf45 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -11,21 +11,6 @@ #include <linux/mpage.h> #include "omfs.h" -static int omfs_sync_file(struct file *file, struct dentry *dentry, - int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - err |= omfs_sync_inode(inode); - return err ? -EIO : 0; -} - static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) { return (sbi->s_sys_blocksize - offset - @@ -344,7 +329,7 @@ struct file_operations omfs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = omfs_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/open.c b/fs/open.c index bdfbf03615a4..7200e23d9258 100644 --- a/fs/open.c +++ b/fs/open.c @@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) audit_inode(NULL, dentry); - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) goto out_putf; mutex_lock(&inode->i_mutex); @@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) if (!file) goto out; - error = mnt_want_write(file->f_path.mnt); + error = mnt_want_write_file(file); if (error) goto out_fput; dentry = file->f_path.dentry; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0af36085eb28..1a9c7878f864 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -556,27 +556,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) /* add partitions */ for (p = 1; p < state->limit; p++) { - sector_t size = state->parts[p].size; - sector_t from = state->parts[p].from; + sector_t size, from; +try_scan: + size = state->parts[p].size; if (!size) continue; + + from = state->parts[p].from; if (from >= get_capacity(disk)) { printk(KERN_WARNING "%s: p%d ignored, start %llu is behind the end of the disk\n", disk->disk_name, p, (unsigned long long) from); continue; } + if (from + size > get_capacity(disk)) { - /* - * we can not ignore partitions of broken tables - * created by for example camera firmware, but we - * limit them to the end of the disk to avoid - * creating invalid block devices - */ + struct block_device_operations *bdops = disk->fops; + unsigned long long capacity; + printk(KERN_WARNING - "%s: p%d size %llu limited to end of disk\n", + "%s: p%d size %llu exceeds device capacity, ", disk->disk_name, p, (unsigned long long) size); - size = get_capacity(disk) - from; + + if (bdops->set_capacity && + (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { + printk(KERN_CONT "enabling native capacity\n"); + capacity = bdops->set_capacity(disk, ~0ULL); + disk->flags |= GENHD_FL_NATIVE_CAPACITY; + if (capacity > get_capacity(disk)) { + set_capacity(disk, capacity); + check_disk_size_change(disk, bdev); + bdev->bd_invalidated = 0; + } + goto try_scan; + } else { + /* + * we can not ignore partitions of broken tables + * created by for example camera firmware, but + * we limit them to the end of the disk to avoid + * creating invalid block devices + */ + printk(KERN_CONT "limited to end of disk\n"); + size = get_capacity(disk) - from; + } } part = add_partition(disk, p, from, size, state->parts[p].flags); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index f6db9618a888..753ca37002c8 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -92,3 +92,28 @@ struct pde_opener { struct list_head lh; }; void pde_users_dec(struct proc_dir_entry *pde); + +extern spinlock_t proc_subdir_lock; + +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); +int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); +unsigned long task_vsize(struct mm_struct *); +int task_statm(struct mm_struct *, int *, int *, int *, int *); +void task_mem(struct seq_file *, struct mm_struct *); + +struct proc_dir_entry *de_get(struct proc_dir_entry *de); +void de_put(struct proc_dir_entry *de); + +extern struct vfsmount *proc_mnt; +int proc_fill_super(struct super_block *); +struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); + +/* + * These are generic /proc routines that use the internal + * "struct proc_dir_entry" tree to traverse the filesystem. + * + * The /proc root directory has extended versions to take care + * of the /proc/<pid> subdirectories. + */ +int proc_readdir(struct file *, void *, filldir_t); +struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de2bba5a3440..fc6c3025befd 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <asm/prom.h> #include <asm/uaccess.h> +#include "internal.h" #ifndef HAVE_ARCH_DEVTREE_FIXUPS static inline void set_node_proc_entry(struct device_node *np, diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index 502d7fe98bab..e4d408cc5473 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4.o -qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o +qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 8425cf6e9624..e1cd061a25f7 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -13,14 +13,9 @@ * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . */ -#include <linux/time.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> -#include <linux/stat.h> -#include <linux/kernel.h> -#include <linux/string.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include "qnx4.h" #if 0 int qnx4_new_block(struct super_block *sb) diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index ea9ffefb48ad..003c68f3238b 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -11,14 +11,9 @@ * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. */ -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> -#include <linux/stat.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> - +#include "qnx4.h" static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -84,7 +79,7 @@ const struct file_operations qnx4_dir_operations = { .read = generic_read_dir, .readdir = qnx4_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, }; const struct inode_operations qnx4_dir_inode_operations = diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 867f42b02035..09b170ac936c 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -12,8 +12,7 @@ * 27-06-1998 by Frank Denis : file overwriting. */ -#include <linux/fs.h> -#include <linux/qnx4_fs.h> +#include "qnx4.h" /* * We have mostly NULL's here: the current defaults are ok for @@ -29,7 +28,7 @@ const struct file_operations qnx4_file_operations = #ifdef CONFIG_QNX4FS_RW .write = do_sync_write, .aio_write = generic_file_aio_write, - .fsync = qnx4_sync_file, + .fsync = simple_fsync, #endif }; diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c deleted file mode 100644 index aa3b19544bee..000000000000 --- a/fs/qnx4/fsync.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * QNX4 file system, Linux implementation. - * - * Version : 0.1 - * - * Using parts of the xiafs filesystem. - * - * History : - * - * 24-03-1998 by Richard Frowijn : first release. - */ - -#include <linux/errno.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/smp_lock.h> -#include <linux/buffer_head.h> - -#include <linux/fs.h> -#include <linux/qnx4_fs.h> - -#include <asm/system.h> - -/* - * The functions for qnx4 fs file synchronization. - */ - -#ifdef CONFIG_QNX4FS_RW - -static int sync_block(struct inode *inode, unsigned short *block, int wait) -{ - struct buffer_head *bh; - unsigned short tmp; - - if (!*block) - return 0; - tmp = *block; - bh = sb_find_get_block(inode->i_sb, *block); - if (!bh) - return 0; - if (*block != tmp) { - brelse(bh); - return 1; - } - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - brelse(bh); - return -1; - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - return 0; -} - -#ifdef WTF -static int sync_iblock(struct inode *inode, unsigned short *iblock, - struct buffer_head **bh, int wait) -{ - int rc; - unsigned short tmp; - - *bh = NULL; - tmp = *iblock; - if (!tmp) - return 0; - rc = sync_block(inode, iblock, wait); - if (rc) - return rc; - *bh = sb_bread(inode->i_sb, tmp); - if (tmp != *iblock) { - brelse(*bh); - *bh = NULL; - return 1; - } - if (!*bh) - return -1; - return 0; -} -#endif - -static int sync_direct(struct inode *inode, int wait) -{ - int i; - int rc, err = 0; - - for (i = 0; i < 7; i++) { - rc = sync_block(inode, - (unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - return err; -} - -#ifdef WTF -static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait) -{ - int i; - struct buffer_head *ind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, iblock, &ind_bh, wait); - if (rc || !ind_bh) - return rc; - - for (i = 0; i < 512; i++) { - rc = sync_block(inode, - ((unsigned short *) ind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(ind_bh); - return err; -} - -static int sync_dindirect(struct inode *inode, unsigned short *diblock, - int wait) -{ - int i; - struct buffer_head *dind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < 512; i++) { - rc = sync_indirect(inode, - ((unsigned short *) dind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} -#endif - -int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused) -{ - struct inode *inode = dentry->d_inode; - int wait, err = 0; - - (void) file; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return -EINVAL; - - lock_kernel(); - for (wait = 0; wait <= 1; wait++) { - err |= sync_direct(inode, wait); - } - err |= qnx4_sync_inode(inode); - unlock_kernel(); - return (err < 0) ? -EIO : 0; -} - -#endif diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fe1f0f31d11c..681df5fcd161 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -13,19 +13,15 @@ */ #include <linux/module.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/highuid.h> #include <linux/smp_lock.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/vfs.h> -#include <asm/uaccess.h> +#include <linux/writeback.h> +#include <linux/statfs.h> +#include "qnx4.h" #define QNX4_VERSION 4 #define QNX4_BMNAME ".bitmap" @@ -34,31 +30,6 @@ static const struct super_operations qnx4_sops; #ifdef CONFIG_QNX4FS_RW -int qnx4_sync_inode(struct inode *inode) -{ - int err = 0; -# if 0 - struct buffer_head *bh; - - bh = qnx4_update_inode(inode); - if (bh && buffer_dirty(bh)) - { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { - printk ("IO error syncing qnx4 inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); - err = -1; - } - brelse (bh); - } else if (!bh) { - err = -1; - } -# endif - - return err; -} - static void qnx4_delete_inode(struct inode *inode) { QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); @@ -70,15 +41,7 @@ static void qnx4_delete_inode(struct inode *inode) unlock_kernel(); } -static void qnx4_write_super(struct super_block *sb) -{ - lock_kernel(); - QNX4DEBUG(("qnx4: write_super\n")); - sb->s_dirt = 0; - unlock_kernel(); -} - -static int qnx4_write_inode(struct inode *inode, int unused) +static int qnx4_write_inode(struct inode *inode, int do_sync) { struct qnx4_inode_entry *raw_inode; int block, ino; @@ -115,6 +78,16 @@ static int qnx4_write_inode(struct inode *inode, int unused) raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); mark_buffer_dirty(bh); + if (do_sync) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + printk("qnx4: IO error syncing inode [%s:%08x]\n", + inode->i_sb->s_id, ino); + brelse(bh); + unlock_kernel(); + return -EIO; + } + } brelse(bh); unlock_kernel(); return 0; @@ -138,7 +111,6 @@ static const struct super_operations qnx4_sops = #ifdef CONFIG_QNX4FS_RW .write_inode = qnx4_write_inode, .delete_inode = qnx4_delete_inode, - .write_super = qnx4_write_super, #endif }; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 775eed3a4085..5972ed214937 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -12,16 +12,9 @@ * 04-07-1998 by Frank Denis : first step for rmdir/unlink. */ -#include <linux/time.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/errno.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include "qnx4.h" /* @@ -187,7 +180,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry) de->di_status = 0; memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); clear_nlink(inode); mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; @@ -232,7 +225,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry) de->di_status = 0; memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); inode->i_ctime = dir->i_ctime; diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h new file mode 100644 index 000000000000..9efc089454f6 --- /dev/null +++ b/fs/qnx4/qnx4.h @@ -0,0 +1,57 @@ +#include <linux/fs.h> +#include <linux/qnx4_fs.h> + +#define QNX4_DEBUG 0 + +#if QNX4_DEBUG +#define QNX4DEBUG(X) printk X +#else +#define QNX4DEBUG(X) (void) 0 +#endif + +struct qnx4_sb_info { + struct buffer_head *sb_buf; /* superblock buffer */ + struct qnx4_super_block *sb; /* our superblock */ + unsigned int Version; /* may be useful */ + struct qnx4_inode_entry *BitMap; /* useful */ +}; + +struct qnx4_inode_info { + struct qnx4_inode_entry raw; + loff_t mmu_private; + struct inode vfs_inode; +}; + +extern struct inode *qnx4_iget(struct super_block *, unsigned long); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); +extern unsigned long qnx4_count_free_blocks(struct super_block *sb); +extern unsigned long qnx4_block_map(struct inode *inode, long iblock); + +extern struct buffer_head *qnx4_bread(struct inode *, int, int); + +extern const struct inode_operations qnx4_file_inode_operations; +extern const struct inode_operations qnx4_dir_inode_operations; +extern const struct file_operations qnx4_file_operations; +extern const struct file_operations qnx4_dir_operations; +extern int qnx4_is_free(struct super_block *sb, long block); +extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); +extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); +extern void qnx4_truncate(struct inode *inode); +extern void qnx4_free_inode(struct inode *inode); +extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); +extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); + +static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) +{ + return container_of(inode, struct qnx4_inode_info, vfs_inode); +} + +static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) +{ + return &qnx4_i(inode)->raw; +} diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c index 6437c1c3d1dd..d94d9ee241fe 100644 --- a/fs/qnx4/truncate.c +++ b/fs/qnx4/truncate.c @@ -10,12 +10,8 @@ * 30-06-1998 by Frank DENIS : ugly filler. */ -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> #include <linux/smp_lock.h> -#include <asm/uaccess.h> +#include "qnx4.h" #ifdef CONFIG_QNX4FS_RW diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b7f5a468f076..95c5b42384b2 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, return error; } -static void quota_sync_sb(struct super_block *sb, int type) +#ifdef CONFIG_QUOTA +void sync_quota_sb(struct super_block *sb, int type) { int cnt; + if (!sb->s_qcop->quota_sync) + return; + sb->s_qcop->quota_sync(sb, type); if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) @@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type) } mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); } +#endif -void sync_dquots(struct super_block *sb, int type) +static void sync_dquots(int type) { + struct super_block *sb; int cnt; - if (sb) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - return; - } - spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { @@ -222,8 +222,8 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); + if (sb->s_root) + sync_quota_sb(sb, type); up_read(&sb->s_umount); spin_lock(&sb_lock); if (__put_super_and_need_restart(sb)) @@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return sb->s_qcop->set_dqblk(sb, type, id, &idq); } case Q_SYNC: - sync_dquots(sb, type); + if (sb) + sync_quota_sb(sb, type); + else + sync_dquots(type); return 0; case Q_XQUOTAON: diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 3a6b193d8444..0ff7566c767c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -202,9 +202,12 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts) return -EINVAL; opts->mode = option & S_IALLUGO; break; - default: - printk(KERN_ERR "ramfs: bad mount option: %s\n", p); - return -EINVAL; + /* + * We might like to report bad mount options here; + * but traditionally ramfs has ignored all mount options, + * and as it is used as a !CONFIG_SHMEM simple substitute + * for tmpfs, better continue to ignore other mount options. + */ } } diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 45ee3d357c70..6d2668fdc384 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, static inline bool is_privroot_deh(struct dentry *dir, struct reiserfs_de_head *deh) { - int ret = 0; -#ifdef CONFIG_REISERFS_FS_XATTR struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; - ret = (dir == dir->d_parent && privroot->d_inode && - deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); -#endif - return ret; + if (reiserfs_expose_privroot(dir->d_sb)) + return 0; + return (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); } int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3567fb9e3fb1..2969773cfc22 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -28,6 +28,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/crc32.h> +#include <linux/smp_lock.h> struct file_system_type reiserfs_fs_type; @@ -64,18 +65,15 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); static int reiserfs_sync_fs(struct super_block *s, int wait) { - if (!(s->s_flags & MS_RDONLY)) { - struct reiserfs_transaction_handle th; - reiserfs_write_lock(s); - if (!journal_begin(&th, s, 1)) - if (!journal_end_sync(&th, s, 1)) - reiserfs_flush_old_commits(s); - s->s_dirt = 0; /* Even if it's not true. - * We'll loop forever in sync_supers otherwise */ - reiserfs_write_unlock(s); - } else { - s->s_dirt = 0; - } + struct reiserfs_transaction_handle th; + + reiserfs_write_lock(s); + if (!journal_begin(&th, s, 1)) + if (!journal_end_sync(&th, s, 1)) + reiserfs_flush_old_commits(s); + s->s_dirt = 0; /* Even if it's not true. + * We'll loop forever in sync_supers otherwise */ + reiserfs_write_unlock(s); return 0; } @@ -468,6 +466,11 @@ static void reiserfs_put_super(struct super_block *s) struct reiserfs_transaction_handle th; th.t_trans_id = 0; + lock_kernel(); + + if (s->s_dirt) + reiserfs_write_super(s); + /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { if (!journal_begin(&th, s, 10)) { @@ -500,7 +503,7 @@ static void reiserfs_put_super(struct super_block *s) kfree(s->s_fs_info); s->s_fs_info = NULL; - return; + unlock_kernel(); } static struct kmem_cache *reiserfs_inode_cachep; @@ -898,6 +901,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin {"conv",.setmask = 1 << REISERFS_CONVERT}, {"attrs",.setmask = 1 << REISERFS_ATTRS}, {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, + {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, #ifdef CONFIG_REISERFS_FS_XATTR {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, @@ -1193,6 +1197,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); #endif + lock_kernel(); rs = SB_DISK_SUPER_BLOCK(s); if (!reiserfs_parse_options @@ -1315,10 +1320,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) out_ok: replace_mount_options(s, new_opts); + unlock_kernel(); return 0; out_err: kfree(new_opts); + unlock_kernel(); return err; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8e7deb0e6964..f3d47d856848 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - s->s_root->d_op = &xattr_lookup_poison_ops; + if (!reiserfs_expose_privroot(s)) + s->s_root->d_op = &xattr_lookup_poison_ops; if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fc27fbfc5397..1402d2d54f52 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -474,6 +474,8 @@ smb_put_super(struct super_block *sb) { struct smb_sb_info *server = SMB_SB(sb); + lock_kernel(); + smb_lock_server(server); server->state = CONN_INVALID; smbiod_unregister_server(server); @@ -489,6 +491,8 @@ smb_put_super(struct super_block *sb) smb_unlock_server(server); put_pid(server->conn_pid); kfree(server); + + unlock_kernel(); } static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 0adc624c956f..3b52770f46ff 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -338,6 +338,8 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data) static void squashfs_put_super(struct super_block *sb) { + lock_kernel(); + if (sb->s_fs_info) { struct squashfs_sb_info *sbi = sb->s_fs_info; squashfs_cache_delete(sbi->block_cache); @@ -350,6 +352,8 @@ static void squashfs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; } + + unlock_kernel(); } diff --git a/fs/super.c b/fs/super.c index 1943fdf655fa..83b47416d006 100644 --- a/fs/super.c +++ b/fs/super.c @@ -28,7 +28,6 @@ #include <linux/blkdev.h> #include <linux/quotaops.h> #include <linux/namei.h> -#include <linux/buffer_head.h> /* for fsync_super() */ #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -38,7 +37,6 @@ #include <linux/kobject.h> #include <linux/mutex.h> #include <linux/file.h> -#include <linux/async.h> #include <asm/uaccess.h> #include "internal.h" @@ -72,7 +70,6 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_async_list); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -285,38 +282,6 @@ void unlock_super(struct super_block * sb) EXPORT_SYMBOL(lock_super); EXPORT_SYMBOL(unlock_super); -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. Requires a second blkdev - * flush by the caller to complete the operation. - */ -void __fsync_super(struct super_block *sb) -{ - sync_inodes_sb(sb, 0); - vfs_dq_sync(sb); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); -} -EXPORT_SYMBOL_GPL(fsync_super); - /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill @@ -338,21 +303,13 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); - fsync_super(sb); - lock_super(sb); + sync_filesystem(sb); + get_fs_excl(); sb->s_flags &= ~MS_ACTIVE; - /* - * wait for asynchronous fs operations to finish before going further - */ - async_synchronize_full_domain(&sb->s_async_list); - /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); - lock_kernel(); - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); if (sop->put_super) sop->put_super(sb); @@ -362,9 +319,7 @@ void generic_shutdown_super(struct super_block *sb) "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } - - unlock_kernel(); - unlock_super(sb); + put_fs_excl(); } spin_lock(&sb_lock); /* should be initialized for __put_super_and_need_restart() */ @@ -441,16 +396,14 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); -static inline void write_super(struct super_block *sb) -{ - lock_super(sb); - if (sb->s_root && sb->s_dirt) - if (sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); -} - -/* +/** + * sync_supers - helper for periodic superblock writeback + * + * Call the write_super method if present on all dirty superblocks in + * the system. This is for the periodic writeback used by most older + * filesystems. For data integrity superblock writeback use + * sync_filesystems() instead. + * * Note: check the dirty flag before waiting, so we don't * hold up the sync while mounting a device. (The newly * mounted device won't need syncing.) @@ -462,12 +415,15 @@ void sync_supers(void) spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_dirt) { + if (sb->s_op->write_super && sb->s_dirt) { sb->s_count++; spin_unlock(&sb_lock); + down_read(&sb->s_umount); - write_super(sb); + if (sb->s_root && sb->s_dirt) + sb->s_op->write_super(sb); up_read(&sb->s_umount); + spin_lock(&sb_lock); if (__put_super_and_need_restart(sb)) goto restart; @@ -476,60 +432,6 @@ restart: spin_unlock(&sb_lock); } -/* - * Call the ->sync_fs super_op against all filesystems which are r/w and - * which implement it. - * - * This operation is careful to avoid the livelock which could easily happen - * if two or more filesystems are being continuously dirtied. s_need_sync_fs - * is used only here. We set it against all filesystems and then clear it as - * we sync them. So redirtied filesystems are skipped. - * - * But if process A is currently running sync_filesystems and then process B - * calls sync_filesystems as well, process B will set all the s_need_sync_fs - * flags again, which will cause process A to resync everything. Fix that with - * a local mutex. - * - * (Fabian) Avoid sync_fs with clean fs & wait mode 0 - */ -void sync_filesystems(int wait) -{ - struct super_block *sb; - static DEFINE_MUTEX(mutex); - - mutex_lock(&mutex); /* Could be down_interruptible */ - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_op->sync_fs) - continue; - if (sb->s_flags & MS_RDONLY) - continue; - sb->s_need_sync_fs = 1; - } - -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_need_sync_fs) - continue; - sb->s_need_sync_fs = 0; - if (sb->s_flags & MS_RDONLY) - continue; /* hm. Was remounted r/o meanwhile */ - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - async_synchronize_full_domain(&sb->s_async_list); - if (sb->s_root && (wait || sb->s_dirt)) - sb->s_op->sync_fs(sb, wait); - up_read(&sb->s_umount); - /* restart only when sb is no longer on the list */ - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); - mutex_unlock(&mutex); -} - /** * get_super - get the superblock of a device * @bdev: device to get the superblock for @@ -616,45 +518,6 @@ out: } /** - * mark_files_ro - mark all files read-only - * @sb: superblock in question - * - * All files are marked read-only. We don't care about pending - * delete files so this should be used in 'force' mode only. - */ - -static void mark_files_ro(struct super_block *sb) -{ - struct file *f; - -retry: - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { - struct vfsmount *mnt; - if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) - continue; - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - f->f_mode &= ~FMODE_WRITE; - if (file_check_writeable(f) != 0) - continue; - file_release_write(f); - mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ - mnt_drop_write(mnt); - mntput(mnt); - goto retry; - } - file_list_unlock(); -} - -/** * do_remount_sb - asks filesystem to change mount options. * @sb: superblock in question * @flags: numeric part of options @@ -675,27 +538,31 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); - fsync_super(sb); + sync_filesystem(sb); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) + else if (!fs_may_remount_ro(sb)) { + unlock_kernel(); return -EBUSY; + } retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) + if (retval < 0 && retval != -ENOSYS) { + unlock_kernel(); return -EBUSY; + } } remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { - lock_super(sb); retval = sb->s_op->remount_fs(sb, &flags, data); - unlock_super(sb); - if (retval) + if (retval) { + unlock_kernel(); return retval; + } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); if (remount_rw) @@ -711,18 +578,17 @@ static void do_emergency_remount(struct work_struct *work) list_for_each_entry(sb, &super_blocks, s_list) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { /* * ->remount_fs needs lock_kernel(). * * What lock protects sb->s_flags?? */ - lock_kernel(); do_remount_sb(sb, MS_RDONLY, NULL, 1); - unlock_kernel(); } - drop_super(sb); + up_write(&sb->s_umount); + put_super(sb); spin_lock(&sb_lock); } spin_unlock(&sb_lock); diff --git a/fs/sync.c b/fs/sync.c index 7abc65fbf21d..dd200025af85 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -13,38 +13,123 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include "internal.h" #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) /* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. + * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) + * just dirties buffers with inodes so we have to submit IO for these buffers + * via __sync_blockdev(). This also speeds up the wait == 1 case since in that + * case write_inode() functions do sync_dirty_buffer() and thus effectively + * write one block at a time. */ -static void do_sync(unsigned long wait) +static int __sync_filesystem(struct super_block *sb, int wait) { - wakeup_pdflush(0); - sync_inodes(0); /* All mappings, inodes and their blockdevs */ - vfs_dq_sync(NULL); - sync_supers(); /* Write the superblocks */ - sync_filesystems(0); /* Start syncing the filesystems */ - sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ + /* Avoid doing twice syncing and cache pruning for quota sync */ if (!wait) - printk("Emergency Sync complete\n"); - if (unlikely(laptop_mode)) - laptop_sync_completion(); + writeout_quota_sb(sb, -1); + else + sync_quota_sb(sb, -1); + sync_inodes_sb(sb, wait); + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, wait); + return __sync_blockdev(sb->s_bdev, wait); +} + +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int sync_filesystem(struct super_block *sb) +{ + int ret; + + /* + * We need to be protected against the filesystem going from + * r/o to r/w or vice versa. + */ + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + /* + * No point in syncing out anything if the filesystem is read-only. + */ + if (sb->s_flags & MS_RDONLY) + return 0; + + ret = __sync_filesystem(sb, 0); + if (ret < 0) + return ret; + return __sync_filesystem(sb, 1); +} +EXPORT_SYMBOL_GPL(sync_filesystem); + +/* + * Sync all the data for all the filesystems (called by sys_sync() and + * emergency sync) + * + * This operation is careful to avoid the livelock which could easily happen + * if two or more filesystems are being continuously dirtied. s_need_sync + * is used only here. We set it against all filesystems and then clear it as + * we sync them. So redirtied filesystems are skipped. + * + * But if process A is currently running sync_filesystems and then process B + * calls sync_filesystems as well, process B will set all the s_need_sync + * flags again, which will cause process A to resync everything. Fix that with + * a local mutex. + */ +static void sync_filesystems(int wait) +{ + struct super_block *sb; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); /* Could be down_interruptible */ + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) + sb->s_need_sync = 1; + +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (!sb->s_need_sync) + continue; + sb->s_need_sync = 0; + sb->s_count++; + spin_unlock(&sb_lock); + + down_read(&sb->s_umount); + if (!(sb->s_flags & MS_RDONLY) && sb->s_root) + __sync_filesystem(sb, wait); + up_read(&sb->s_umount); + + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); + mutex_unlock(&mutex); } SYSCALL_DEFINE0(sync) { - do_sync(1); + sync_filesystems(0); + sync_filesystems(1); + if (unlikely(laptop_mode)) + laptop_sync_completion(); return 0; } static void do_sync_work(struct work_struct *work) { - do_sync(0); + /* + * Sync twice to reduce the possibility we skipped some inodes / pages + * because they were temporarily locked + */ + sync_filesystems(0); + sync_filesystems(0); + printk("Emergency Sync complete\n"); kfree(work); } @@ -75,10 +160,8 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) /* sync the superblock to buffers */ sb = inode->i_sb; - lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); - unlock_super(sb); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 56f655254bfe..c7798079e644 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -24,7 +24,7 @@ static int sysv_readdir(struct file *, void *, filldir_t); const struct file_operations sysv_dir_operations = { .read = generic_read_dir, .readdir = sysv_readdir, - .fsync = sysv_sync_file, + .fsync = simple_fsync, }; static inline void dir_put_page(struct page *page) diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 589be21d884e..96340c01f4a7 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = sysv_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; @@ -34,18 +34,3 @@ const struct inode_operations sysv_file_inode_operations = { .truncate = sysv_truncate, .getattr = sysv_getattr, }; - -int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - - err |= sysv_sync_inode(inode); - return err ? -EIO : 0; -} diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index da20b48d350f..479923456a54 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -31,15 +31,13 @@ #include <asm/byteorder.h> #include "sysv.h" -/* This is only called on sync() and umount(), when s_dirt=1. */ -static void sysv_write_super(struct super_block *sb) +static int sysv_sync_fs(struct super_block *sb, int wait) { struct sysv_sb_info *sbi = SYSV_SB(sb); unsigned long time = get_seconds(), old_time; + lock_super(sb); lock_kernel(); - if (sb->s_flags & MS_RDONLY) - goto clean; /* * If we are going to write out the super block, @@ -53,18 +51,30 @@ static void sysv_write_super(struct super_block *sb) *sbi->s_sb_time = cpu_to_fs32(sbi, time); mark_buffer_dirty(sbi->s_bh2); } -clean: - sb->s_dirt = 0; + unlock_kernel(); + unlock_super(sb); + + return 0; +} + +static void sysv_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + sysv_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); + lock_super(sb); if (sbi->s_forced_ro) *flags |= MS_RDONLY; if (!(*flags & MS_RDONLY)) sb->s_dirt = 1; + unlock_super(sb); return 0; } @@ -72,6 +82,11 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); + lock_kernel(); + + if (sb->s_dirt) + sysv_write_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { /* XXX ext2 also updates the state here */ mark_buffer_dirty(sbi->s_bh1); @@ -84,6 +99,8 @@ static void sysv_put_super(struct super_block *sb) brelse(sbi->s_bh2); kfree(sbi); + + unlock_kernel(); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -236,7 +253,7 @@ bad_inode: return ERR_PTR(-EIO); } -static struct buffer_head * sysv_update_inode(struct inode * inode) +int sysv_write_inode(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); @@ -244,19 +261,21 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) struct sysv_inode * raw_inode; struct sysv_inode_info * si; unsigned int ino, block; + int err = 0; ino = inode->i_ino; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %d is out of range\n", inode->i_sb->s_id, ino); - return NULL; + return -EIO; } raw_inode = sysv_raw_inode(sb, ino, &bh); if (!raw_inode) { printk("unable to read i-node block\n"); - return NULL; + return -EIO; } + lock_kernel(); raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); @@ -272,38 +291,23 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) for (block = 0; block < 10+1+1+1; block++) write3byte(sbi, (u8 *)&si->i_data[block], &raw_inode->i_data[3*block]); + unlock_kernel(); mark_buffer_dirty(bh); - return bh; -} - -int sysv_write_inode(struct inode * inode, int wait) -{ - struct buffer_head *bh; - lock_kernel(); - bh = sysv_update_inode(inode); + if (wait) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + printk ("IO error syncing sysv inode [%s:%08x]\n", + sb->s_id, ino); + err = -EIO; + } + } brelse(bh); - unlock_kernel(); return 0; } -int sysv_sync_inode(struct inode * inode) +int sysv_sync_inode(struct inode *inode) { - int err = 0; - struct buffer_head *bh; - - bh = sysv_update_inode(inode); - if (bh && buffer_dirty(bh)) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - printk ("IO error syncing sysv inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); - err = -1; - } - } - else if (!bh) - err = -1; - brelse (bh); - return err; + return sysv_write_inode(inode, 1); } static void sysv_delete_inode(struct inode *inode) @@ -347,6 +351,7 @@ const struct super_operations sysv_sops = { .delete_inode = sysv_delete_inode, .put_super = sysv_put_super, .write_super = sysv_write_super, + .sync_fs = sysv_sync_fs, .remount_fs = sysv_remount, .statfs = sysv_statfs, }; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 5784a318c883..53786eb5cf60 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -144,7 +144,6 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, extern struct inode *sysv_iget(struct super_block *, unsigned int); extern int sysv_write_inode(struct inode *, int); extern int sysv_sync_inode(struct inode *); -extern int sysv_sync_file(struct file *, struct dentry *, int); extern void sysv_set_inode(struct inode *, dev_t); extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int sysv_init_icache(void); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e9f7a754c4f7..3260b73abe29 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -36,6 +36,7 @@ #include <linux/mount.h> #include <linux/math64.h> #include <linux/writeback.h> +#include <linux/smp_lock.h> #include "ubifs.h" /* @@ -447,9 +448,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - if (sb->s_flags & MS_RDONLY) - return 0; - /* * VFS calls '->sync_fs()' before synchronizing all dirty inodes and * pages, so synchronize them first, then commit the journal. Strictly @@ -1687,6 +1685,9 @@ static void ubifs_put_super(struct super_block *sb) ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); + + lock_kernel(); + /* * The following asserts are only valid if there has not been a failure * of the media. For example, there will be dirty inodes if we failed @@ -1753,6 +1754,8 @@ static void ubifs_put_super(struct super_block *sb) ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); kfree(c); + + unlock_kernel(); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) @@ -1768,17 +1771,22 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } + lock_kernel(); if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); + unlock_kernel(); return -EROFS; } err = ubifs_remount_rw(c); - if (err) + if (err) { + unlock_kernel(); return err; + } } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); + unlock_kernel(); return -EROFS; } ubifs_remount_ro(c); @@ -1793,6 +1801,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } ubifs_assert(c->lst.taken_empty_lebs > 0); + unlock_kernel(); return 0; } @@ -1928,6 +1937,9 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) err = bdi_init(&c->bdi); if (err) goto out_close; + err = bdi_register(&c->bdi, NULL, "ubifs"); + if (err) + goto out_bdi; err = ubifs_parse_options(c, data, 0); if (err) diff --git a/fs/udf/Makefile b/fs/udf/Makefile index 0d4503f7446d..eb880f66c23a 100644 --- a/fs/udf/Makefile +++ b/fs/udf/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_UDF_FS) += udf.o udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ - partition.o super.o truncate.o symlink.o fsync.o \ + partition.o super.o truncate.o symlink.o \ directory.o misc.o udftime.o unicode.o diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 2efd4d5291b6..61d9a76a3a69 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -210,5 +210,5 @@ const struct file_operations udf_dir_operations = { .read = generic_read_dir, .readdir = udf_readdir, .ioctl = udf_ioctl, - .fsync = udf_fsync_file, + .fsync = simple_fsync, }; diff --git a/fs/udf/file.c b/fs/udf/file.c index eb91f3b70320..7464305382b5 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -209,7 +209,7 @@ const struct file_operations udf_file_operations = { .write = do_sync_write, .aio_write = udf_file_aio_write, .release = udf_release_file, - .fsync = udf_fsync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, .llseek = generic_file_llseek, }; diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c deleted file mode 100644 index b2c472b733b8..000000000000 --- a/fs/udf/fsync.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * fsync.c - * - * PURPOSE - * Fsync handling routines for the OSTA-UDF(tm) filesystem. - * - * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * - * (C) 1999-2001 Ben Fennema - * (C) 1999-2000 Stelias Computing Inc - * - * HISTORY - * - * 05/22/99 blf Created. - */ - -#include "udfdecl.h" - -#include <linux/fs.h> - -static int udf_fsync_inode(struct inode *, int); - -/* - * File may be NULL when we are called. Perhaps we shouldn't - * even pass file to fsync ? - */ - -int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - - return udf_fsync_inode(inode, datasync); -} - -static int udf_fsync_inode(struct inode *inode, int datasync) -{ - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - - err |= udf_sync_inode(inode); - - return err ? -EIO : 0; -} diff --git a/fs/udf/super.c b/fs/udf/super.c index 0ba44107d8f1..6832135159b6 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -568,6 +568,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) if (!udf_parse_options(options, &uopt, true)) return -EINVAL; + lock_kernel(); sbi->s_flags = uopt.flags; sbi->s_uid = uopt.uid; sbi->s_gid = uopt.gid; @@ -581,13 +582,16 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) *flags |= MS_RDONLY; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + unlock_kernel(); return 0; + } if (*flags & MS_RDONLY) udf_close_lvid(sb); else udf_open_lvid(sb); + unlock_kernel(); return 0; } @@ -2062,6 +2066,9 @@ static void udf_put_super(struct super_block *sb) struct udf_sb_info *sbi; sbi = UDF_SB(sb); + + lock_kernel(); + if (sbi->s_vat_inode) iput(sbi->s_vat_inode); if (sbi->s_partitions) @@ -2077,6 +2084,8 @@ static void udf_put_super(struct super_block *sb) kfree(sbi->s_partmaps); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int udf_sync_fs(struct super_block *sb, int wait) diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index cac51b77a5d1..8d46f4294ee7 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -223,9 +223,6 @@ extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, extern int udf_new_block(struct super_block *, struct inode *, uint16_t, uint32_t, int *); -/* fsync.c */ -extern int udf_fsync_file(struct file *, struct dentry *, int); - /* directory.c */ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 6321b797061b..6f671f1ac271 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -666,6 +666,6 @@ not_empty: const struct file_operations ufs_dir_operations = { .read = generic_read_dir, .readdir = ufs_readdir, - .fsync = ufs_sync_file, + .fsync = simple_fsync, .llseek = generic_file_llseek, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 2bd3a1615714..73655c61240a 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -24,31 +24,10 @@ */ #include <linux/fs.h> -#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ #include "ufs_fs.h" #include "ufs.h" - -int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = ufs_sync_inode(inode); - if (ret == 0) - ret = err; - return ret; -} - - /* * We have mostly NULL's here: the current defaults are ok for * the ufs filesystem. @@ -62,6 +41,6 @@ const struct file_operations ufs_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .open = generic_file_open, - .fsync = ufs_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 60359291761f..5faed7954d0a 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -263,6 +263,7 @@ void ufs_panic (struct super_block * sb, const char * function, struct ufs_super_block_first * usb1; va_list args; + lock_kernel(); uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); @@ -594,6 +595,9 @@ static void ufs_put_super_internal(struct super_block *sb) UFSD("ENTER\n"); + + lock_kernel(); + ufs_put_cstotal(sb); size = uspi->s_cssize; blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -621,6 +625,9 @@ static void ufs_put_super_internal(struct super_block *sb) brelse (sbi->s_ucg[i]); kfree (sbi->s_ucg); kfree (base); + + unlock_kernel(); + UFSD("EXIT\n"); } @@ -1118,32 +1125,45 @@ failed_nomem: return -ENOMEM; } -static void ufs_write_super(struct super_block *sb) +static int ufs_sync_fs(struct super_block *sb, int wait) { struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; struct ufs_super_block_third * usb3; unsigned flags; + lock_super(sb); lock_kernel(); + UFSD("ENTER\n"); + flags = UFS_SB(sb)->s_flags; uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); usb3 = ubh_get_usb_third(uspi); - if (!(sb->s_flags & MS_RDONLY)) { - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); - if ((flags & UFS_ST_MASK) == UFS_ST_SUN - || (flags & UFS_ST_MASK) == UFS_ST_SUNOS - || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) - ufs_set_fs_state(sb, usb1, usb3, - UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); - ufs_put_cstotal(sb); - } + usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + if ((flags & UFS_ST_MASK) == UFS_ST_SUN || + (flags & UFS_ST_MASK) == UFS_ST_SUNOS || + (flags & UFS_ST_MASK) == UFS_ST_SUNx86) + ufs_set_fs_state(sb, usb1, usb3, + UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); + ufs_put_cstotal(sb); sb->s_dirt = 0; + UFSD("EXIT\n"); unlock_kernel(); + unlock_super(sb); + + return 0; +} + +static void ufs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ufs_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static void ufs_put_super(struct super_block *sb) @@ -1152,6 +1172,9 @@ static void ufs_put_super(struct super_block *sb) UFSD("ENTER\n"); + if (sb->s_dirt) + ufs_write_super(sb); + if (!(sb->s_flags & MS_RDONLY)) ufs_put_super_internal(sb); @@ -1171,7 +1194,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) struct ufs_super_block_third * usb3; unsigned new_mount_opt, ufstype; unsigned flags; - + + lock_kernel(); + lock_super(sb); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1184,17 +1209,24 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE; new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); - if (!ufs_parse_options (data, &new_mount_opt)) + if (!ufs_parse_options (data, &new_mount_opt)) { + unlock_super(sb); + unlock_kernel(); return -EINVAL; + } if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { printk("ufstype can't be changed during remount\n"); + unlock_super(sb); + unlock_kernel(); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + unlock_super(sb); + unlock_kernel(); return 0; } @@ -1219,6 +1251,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #ifndef CONFIG_UFS_FS_WRITE printk("ufs was compiled with read-only support, " "can't be mounted as read-write\n"); + unlock_super(sb); + unlock_kernel(); return -EINVAL; #else if (ufstype != UFS_MOUNT_UFSTYPE_SUN && @@ -1227,16 +1261,22 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { printk("this ufstype is read-only supported\n"); + unlock_super(sb); + unlock_kernel(); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { printk("failed during remounting\n"); + unlock_super(sb); + unlock_kernel(); return -EPERM; } sb->s_flags &= ~MS_RDONLY; #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + unlock_super(sb); + unlock_kernel(); return 0; } @@ -1352,6 +1392,7 @@ static const struct super_operations ufs_super_ops = { .delete_inode = ufs_delete_inode, .put_super = ufs_put_super, .write_super = ufs_write_super, + .sync_fs = ufs_sync_fs, .statfs = ufs_statfs, .remount_fs = ufs_remount, .show_options = ufs_show_options, diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index d0c4acd4f1f3..644e77e13599 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -99,7 +99,6 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, extern const struct inode_operations ufs_file_inode_operations; extern const struct file_operations ufs_file_operations; extern const struct address_space_operations ufs_aops; -extern int ufs_sync_file(struct file *, struct dentry *, int); /* ialloc.c */ extern void ufs_free_inode (struct inode *inode); diff --git a/fs/xattr.c b/fs/xattr.c index d51b8f9db921..1c3d0af59ddf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = setxattr(dentry, name, value, size, flags); mnt_drop_write(f->f_path.mnt); @@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = removexattr(dentry, name); mnt_drop_write(f->f_path.mnt); diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 29228f5899cd..480f28127f09 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -39,6 +39,7 @@ config XFS_QUOTA config XFS_POSIX_ACL bool "XFS POSIX ACL support" depends on XFS_FS + select FS_POSIX_ACL help POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 60f107e47fe9..7a59daed1782 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o endif xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o @@ -88,8 +88,7 @@ xfs-y += xfs_alloc.o \ xfs_utils.o \ xfs_vnodeops.o \ xfs_rw.o \ - xfs_dmops.o \ - xfs_qmops.o + xfs_dmops.o xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ xfs_dir2_trace.o diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c new file mode 100644 index 000000000000..1e9d1246eebc --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2008, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_vnodeops.h" +#include <linux/xattr.h> +#include <linux/posix_acl_xattr.h> + + +#define XFS_ACL_NOT_CACHED ((void *)-1) + +/* + * Locking scheme: + * - all ACL updates are protected by inode->i_mutex, which is taken before + * calling into this file. + * - access and updates to the ip->i_acl and ip->i_default_acl pointers are + * protected by inode->i_lock. + */ + +STATIC struct posix_acl * +xfs_acl_from_disk(struct xfs_acl *aclp) +{ + struct posix_acl_entry *acl_e; + struct posix_acl *acl; + struct xfs_acl_entry *ace; + int count, i; + + count = be32_to_cpu(aclp->acl_cnt); + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + acl_e = &acl->a_entries[i]; + ace = &aclp->acl_entry[i]; + + /* + * The tag is 32 bits on disk and 16 bits in core. + * + * Because every access to it goes through the core + * format first this is not a problem. + */ + acl_e->e_tag = be32_to_cpu(ace->ae_tag); + acl_e->e_perm = be16_to_cpu(ace->ae_perm); + + switch (acl_e->e_tag) { + case ACL_USER: + case ACL_GROUP: + acl_e->e_id = be32_to_cpu(ace->ae_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + acl_e->e_id = ACL_UNDEFINED_ID; + break; + default: + goto fail; + } + } + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +STATIC void +xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) +{ + const struct posix_acl_entry *acl_e; + struct xfs_acl_entry *ace; + int i; + + aclp->acl_cnt = cpu_to_be32(acl->a_count); + for (i = 0; i < acl->a_count; i++) { + ace = &aclp->acl_entry[i]; + acl_e = &acl->a_entries[i]; + + ace->ae_tag = cpu_to_be32(acl_e->e_tag); + ace->ae_id = cpu_to_be32(acl_e->e_id); + ace->ae_perm = cpu_to_be16(acl_e->e_perm); + } +} + +/* + * Update the cached ACL pointer in the inode. + * + * Because we don't hold any locks while reading/writing the attribute + * from/to disk another thread could have raced and updated the cached + * ACL value before us. In that case we release the previous cached value + * and update it with our new value. + */ +STATIC void +xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED) + posix_acl_release(*p_acl); + *p_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl * +xfs_get_acl(struct inode *inode, int type) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl *acl = NULL, **p_acl; + struct xfs_acl *xfs_acl; + int len = sizeof(struct xfs_acl); + char *ea_name; + int error; + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + p_acl = &ip->i_acl; + break; + case ACL_TYPE_DEFAULT: + ea_name = SGI_ACL_DEFAULT; + p_acl = &ip->i_default_acl; + break; + default: + return ERR_PTR(-EINVAL); + } + + spin_lock(&inode->i_lock); + if (*p_acl != XFS_ACL_NOT_CACHED) + acl = posix_acl_dup(*p_acl); + spin_unlock(&inode->i_lock); + + /* + * If we have a cached ACLs value just return it, not need to + * go out to the disk. + */ + if (acl) + return acl; + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return ERR_PTR(-ENOMEM); + + error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); + if (error) { + /* + * If the attribute doesn't exist make sure we have a negative + * cache entry, for any other error assume it is transient and + * leave the cache entry as XFS_ACL_NOT_CACHED. + */ + if (error == -ENOATTR) { + acl = NULL; + goto out_update_cache; + } + goto out; + } + + acl = xfs_acl_from_disk(xfs_acl); + if (IS_ERR(acl)) + goto out; + + out_update_cache: + xfs_update_cached_acl(inode, p_acl, acl); + out: + kfree(xfs_acl); + return acl; +} + +STATIC int +xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl **p_acl; + char *ea_name; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + p_acl = &ip->i_acl; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + ea_name = SGI_ACL_DEFAULT; + p_acl = &ip->i_default_acl; + break; + default: + return -EINVAL; + } + + if (acl) { + struct xfs_acl *xfs_acl; + int len; + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return -ENOMEM; + + xfs_acl_to_disk(xfs_acl, acl); + len = sizeof(struct xfs_acl) - + (sizeof(struct xfs_acl_entry) * + (XFS_ACL_MAX_ENTRIES - acl->a_count)); + + error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, + len, ATTR_ROOT); + + kfree(xfs_acl); + } else { + /* + * A NULL ACL argument means we want to remove the ACL. + */ + error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); + + /* + * If the attribute didn't exist to start with that's fine. + */ + if (error == -ENOATTR) + error = 0; + } + + if (!error) + xfs_update_cached_acl(inode, p_acl, acl); + return error; +} + +int +xfs_check_acl(struct inode *inode, int mask) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl *acl; + int error = -EAGAIN; + + xfs_itrace_entry(ip); + + /* + * If there is no attribute fork no ACL exists on this inode and + * we can skip the whole exercise. + */ + if (!XFS_IFORK_Q(ip)) + return -EAGAIN; + + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + } + + return error; +} + +static int +xfs_set_mode(struct inode *inode, mode_t mode) +{ + int error = 0; + + if (mode != inode->i_mode) { + struct iattr iattr; + + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; + + error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + } + + return error; +} + +static int +xfs_acl_exists(struct inode *inode, char *name) +{ + int len = sizeof(struct xfs_acl); + + return (xfs_attr_get(XFS_I(inode), name, NULL, &len, + ATTR_ROOT|ATTR_KERNOVAL) == 0); +} + +int +posix_acl_access_exists(struct inode *inode) +{ + return xfs_acl_exists(inode, SGI_ACL_FILE); +} + +int +posix_acl_default_exists(struct inode *inode) +{ + if (!S_ISDIR(inode->i_mode)) + return 0; + return xfs_acl_exists(inode, SGI_ACL_DEFAULT); +} + +/* + * No need for i_mutex because the inode is not yet exposed to the VFS. + */ +int +xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl) +{ + struct posix_acl *clone; + mode_t mode; + int error = 0, inherit = 0; + + if (S_ISDIR(inode->i_mode)) { + error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl); + if (error) + return error; + } + + clone = posix_acl_clone(default_acl, GFP_KERNEL); + if (!clone) + return -ENOMEM; + + mode = inode->i_mode; + error = posix_acl_create_masq(clone, &mode); + if (error < 0) + goto out_release_clone; + + /* + * If posix_acl_create_masq returns a positive value we need to + * inherit a permission that can't be represented using the Unix + * mode bits and we actually need to set an ACL. + */ + if (error > 0) + inherit = 1; + + error = xfs_set_mode(inode, mode); + if (error) + goto out_release_clone; + + if (inherit) + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + + out_release_clone: + posix_acl_release(clone); + return error; +} + +int +xfs_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + + posix_acl_release(clone); + return error; +} + +void +xfs_inode_init_acls(struct xfs_inode *ip) +{ + /* + * No need for locking, inode is not live yet. + */ + ip->i_acl = XFS_ACL_NOT_CACHED; + ip->i_default_acl = XFS_ACL_NOT_CACHED; +} + +void +xfs_inode_clear_acls(struct xfs_inode *ip) +{ + /* + * No need for locking here, the inode is not live anymore + * and just about to be freed. + */ + if (ip->i_acl != XFS_ACL_NOT_CACHED) + posix_acl_release(ip->i_acl); + if (ip->i_default_acl != XFS_ACL_NOT_CACHED) + posix_acl_release(ip->i_default_acl); +} + + +/* + * System xattr handlers. + * + * Currently Posix ACLs are the only system namespace extended attribute + * handlers supported by XFS, so we just implement the handlers here. + * If we ever support other system extended attributes this will need + * some refactoring. + */ + +static int +xfs_decode_acl(const char *name) +{ + if (strcmp(name, "posix_acl_access") == 0) + return ACL_TYPE_ACCESS; + else if (strcmp(name, "posix_acl_default") == 0) + return ACL_TYPE_DEFAULT; + return -EINVAL; +} + +static int +xfs_xattr_system_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + struct posix_acl *acl; + int type, error; + + type = xfs_decode_acl(name); + if (type < 0) + return type; + + acl = xfs_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + error = posix_acl_to_xattr(acl, value, size); + posix_acl_release(acl); + + return error; +} + +static int +xfs_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct posix_acl *acl = NULL; + int error = 0, type; + + type = xfs_decode_acl(name); + if (type < 0) + return type; + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > XFS_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = xfs_set_mode(inode, mode); + if (error) + goto out_release; + } + + set_acl: + error = xfs_set_acl(inode, type, acl); + out_release: + posix_acl_release(acl); + out: + return error; +} + +struct xattr_handler xfs_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = xfs_xattr_system_get, + .set = xfs_xattr_system_set, +}; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 34eaab608e6e..5bb523d7f37e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -41,7 +41,6 @@ #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" @@ -899,7 +898,8 @@ xfs_ioctl_setattr( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int lock_flags = 0; - struct xfs_dquot *udqp = NULL, *gdqp = NULL; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; @@ -919,7 +919,7 @@ xfs_ioctl_setattr( * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { - code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid, + code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, &gdqp); if (code) @@ -954,10 +954,11 @@ xfs_ioctl_setattr( * Do a quota reservation only if projid is actually going to change. */ if (mask & FSX_PROJID) { - if (XFS_IS_PQUOTA_ON(mp) && + if (XFS_IS_QUOTA_RUNNING(mp) && + XFS_IS_PQUOTA_ON(mp) && ip->i_d.di_projid != fa->fsx_projid) { ASSERT(tp); - code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -1059,8 +1060,8 @@ xfs_ioctl_setattr( * in the transaction. */ if (ip->i_d.di_projid != fa->fsx_projid) { - if (XFS_IS_PQUOTA_ON(mp)) { - olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip, + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_projid = fa->fsx_projid; @@ -1106,9 +1107,9 @@ xfs_ioctl_setattr( /* * Release any dquot(s) the inode had kept before chown. */ - XFS_QM_DQRELE(mp, olddquot); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(olddquot); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (code) return code; @@ -1122,8 +1123,8 @@ xfs_ioctl_setattr( return 0; error_return: - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 6075382336d7..58973bb46038 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -17,6 +17,7 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -51,6 +52,7 @@ #include <linux/capability.h> #include <linux/xattr.h> #include <linux/namei.h> +#include <linux/posix_acl.h> #include <linux/security.h> #include <linux/falloc.h> #include <linux/fiemap.h> @@ -202,9 +204,8 @@ xfs_vn_mknod( { struct inode *inode; struct xfs_inode *ip = NULL; - xfs_acl_t *default_acl = NULL; + struct posix_acl *default_acl = NULL; struct xfs_name name; - int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS; int error; /* @@ -219,18 +220,14 @@ xfs_vn_mknod( rdev = 0; } - if (test_default_acl && test_default_acl(dir)) { - if (!_ACL_ALLOC(default_acl)) { - return -ENOMEM; - } - if (!_ACL_GET_DEFAULT(dir, default_acl)) { - _ACL_FREE(default_acl); - default_acl = NULL; - } - } + if (IS_POSIXACL(dir)) { + default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(default_acl)) + return -PTR_ERR(default_acl); - if (IS_POSIXACL(dir) && !default_acl) - mode &= ~current_umask(); + if (!default_acl) + mode &= ~current_umask(); + } xfs_dentry_to_name(&name, dentry); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); @@ -244,10 +241,10 @@ xfs_vn_mknod( goto out_cleanup_inode; if (default_acl) { - error = _ACL_INHERIT(inode, mode, default_acl); + error = -xfs_inherit_acl(inode, default_acl); if (unlikely(error)) goto out_cleanup_inode; - _ACL_FREE(default_acl); + posix_acl_release(default_acl); } @@ -257,8 +254,7 @@ xfs_vn_mknod( out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); out_free_acl: - if (default_acl) - _ACL_FREE(default_acl); + posix_acl_release(default_acl); return -error; } @@ -488,26 +484,6 @@ xfs_vn_put_link( kfree(s); } -#ifdef CONFIG_XFS_POSIX_ACL -STATIC int -xfs_check_acl( - struct inode *inode, - int mask) -{ - struct xfs_inode *ip = XFS_I(inode); - int error; - - xfs_itrace_entry(ip); - - if (XFS_IFORK_Q(ip)) { - error = xfs_acl_iaccess(ip, mask, NULL); - if (error != -1) - return -error; - } - - return -EAGAIN; -} - STATIC int xfs_vn_permission( struct inode *inode, @@ -515,9 +491,6 @@ xfs_vn_permission( { return generic_permission(inode, mask, xfs_check_acl); } -#else -#define xfs_vn_permission NULL -#endif STATIC int xfs_vn_getattr( diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 9142192ccbe6..7078974a6eee 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_inode_item.h" #include "xfs_buf_item.h" diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 94d9a633d3d9..cb6e2cca214f 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -50,9 +50,11 @@ xfs_fs_quota_sync( { struct xfs_mount *mp = XFS_M(sb); + if (sb->s_flags & MS_RDONLY) + return -EROFS; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; - return -xfs_sync_inodes(mp, SYNC_DELWRI); + return -xfs_sync_data(mp, 0); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bb685269f832..2e09efbca8db 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -43,7 +43,6 @@ #include "xfs_itable.h" #include "xfs_fsops.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -405,6 +404,14 @@ xfs_parseargs( return EINVAL; } +#ifndef CONFIG_XFS_QUOTA + if (XFS_IS_QUOTA_RUNNING(mp)) { + cmn_err(CE_WARN, + "XFS: quota support not available in this kernel."); + return EINVAL; + } +#endif + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { cmn_err(CE_WARN, @@ -1063,7 +1070,18 @@ xfs_fs_put_super( int unmount_event_flags = 0; xfs_syncd_stop(mp); - xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); + + if (!(sb->s_flags & MS_RDONLY)) { + /* + * XXX(hch): this should be SYNC_WAIT. + * + * Or more likely not needed at all because the VFS is already + * calling ->sync_fs after shutting down all filestem + * operations and just before calling ->put_super. + */ + xfs_sync_data(mp, 0); + xfs_sync_attr(mp, 0); + } #ifdef HAVE_DMAPI if (mp->m_flags & XFS_MOUNT_DMAPI) { @@ -1098,21 +1116,11 @@ xfs_fs_put_super( xfs_freesb(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - xfs_qmops_put(mp); xfs_dmops_put(mp); xfs_free_fsname(mp); kfree(mp); } -STATIC void -xfs_fs_write_super( - struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - xfs_sync_fsdata(XFS_M(sb), 0); - sb->s_dirt = 0; -} - STATIC int xfs_fs_sync_super( struct super_block *sb, @@ -1137,7 +1145,6 @@ xfs_fs_sync_super( error = xfs_quiesce_data(mp); else error = xfs_sync_fsdata(mp, 0); - sb->s_dirt = 0; if (unlikely(laptop_mode)) { int prev_sync_seq = mp->m_sync_seq; @@ -1168,6 +1175,7 @@ xfs_fs_statfs( { struct xfs_mount *mp = XFS_M(dentry->d_sb); xfs_sb_t *sbp = &mp->m_sb; + struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; xfs_extlen_t lsize; @@ -1196,7 +1204,10 @@ xfs_fs_statfs( statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); spin_unlock(&mp->m_sb_lock); - XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp); + if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || + ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + xfs_qm_statvfs(ip, statp); return 0; } @@ -1404,16 +1415,13 @@ xfs_fs_fill_super( error = xfs_dmops_get(mp); if (error) goto out_free_fsname; - error = xfs_qmops_get(mp); - if (error) - goto out_put_dmops; if (silent) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) - goto out_put_qmops; + goto out_put_dmops; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; @@ -1443,7 +1451,6 @@ xfs_fs_fill_super( XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); - sb->s_dirt = 1; sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; @@ -1482,8 +1489,6 @@ xfs_fs_fill_super( out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); - out_put_qmops: - xfs_qmops_put(mp); out_put_dmops: xfs_dmops_put(mp); out_free_fsname: @@ -1533,7 +1538,6 @@ static struct super_operations xfs_super_operations = { .write_inode = xfs_fs_write_inode, .clear_inode = xfs_fs_clear_inode, .put_super = xfs_fs_put_super, - .write_super = xfs_fs_write_super, .sync_fs = xfs_fs_sync_super, .freeze_fs = xfs_fs_freeze, .statfs = xfs_fs_statfs, @@ -1718,18 +1722,8 @@ xfs_init_zones(void) if (!xfs_ili_zone) goto out_destroy_inode_zone; -#ifdef CONFIG_XFS_POSIX_ACL - xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl"); - if (!xfs_acl_zone) - goto out_destroy_ili_zone; -#endif - return 0; -#ifdef CONFIG_XFS_POSIX_ACL - out_destroy_ili_zone: -#endif - kmem_zone_destroy(xfs_ili_zone); out_destroy_inode_zone: kmem_zone_destroy(xfs_inode_zone); out_destroy_efi_zone: @@ -1763,9 +1757,6 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { -#ifdef CONFIG_XFS_POSIX_ACL - kmem_zone_destroy(xfs_acl_zone); -#endif kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index f7ba76633c29..b619d6b8ca43 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -43,166 +43,267 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_rw.h" +#include "xfs_quota.h" #include <linux/kthread.h> #include <linux/freezer.h> -/* - * Sync all the inodes in the given AG according to the - * direction given by the flags. - */ -STATIC int -xfs_sync_inodes_ag( - xfs_mount_t *mp, - int ag, - int flags) -{ - xfs_perag_t *pag = &mp->m_perag[ag]; - int nr_found; - uint32_t first_index = 0; - int error = 0; - int last_error = 0; - do { - struct inode *inode; - xfs_inode_t *ip = NULL; - int lock_flags = XFS_ILOCK_SHARED; +STATIC xfs_inode_t * +xfs_inode_ag_lookup( + struct xfs_mount *mp, + struct xfs_perag *pag, + uint32_t *first_index, + int tag) +{ + int nr_found; + struct xfs_inode *ip; - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + if (tag == XFS_ICI_NO_TAG) { nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void**)&ip, first_index, 1); + (void **)&ip, *first_index, 1); + } else { + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void **)&ip, *first_index, 1, tag); + } + if (!nr_found) + goto unlock; - if (!nr_found) { - read_unlock(&pag->pag_ici_lock); - break; - } + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + goto unlock; - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { - read_unlock(&pag->pag_ici_lock); - break; - } + return ip; - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(mp)) { - read_unlock(&pag->pag_ici_lock); - return 0; - } +unlock: + read_unlock(&pag->pag_ici_lock); + return NULL; +} - /* - * If we can't get a reference on the inode, it must be - * in reclaim. Leave it for the reclaim code to flush. - */ - inode = VFS_I(ip); - if (!igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - read_unlock(&pag->pag_ici_lock); +STATIC int +xfs_inode_ag_walk( + struct xfs_mount *mp, + xfs_agnumber_t ag, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags, + int tag) +{ + struct xfs_perag *pag = &mp->m_perag[ag]; + uint32_t first_index; + int last_error = 0; + int skipped; - /* avoid new or bad inodes */ - if (is_bad_inode(inode) || - xfs_iflags_test(ip, XFS_INEW)) { - IRELE(ip); - continue; - } +restart: + skipped = 0; + first_index = 0; + do { + int error = 0; + xfs_inode_t *ip; - /* - * If we have to flush data or wait for I/O completion - * we need to hold the iolock. - */ - if (flags & SYNC_DELWRI) { - if (VN_DIRTY(inode)) { - if (flags & SYNC_TRYLOCK) { - if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) - lock_flags |= XFS_IOLOCK_SHARED; - } else { - xfs_ilock(ip, XFS_IOLOCK_SHARED); - lock_flags |= XFS_IOLOCK_SHARED; - } - if (lock_flags & XFS_IOLOCK_SHARED) { - error = xfs_flush_pages(ip, 0, -1, - (flags & SYNC_WAIT) ? 0 - : XFS_B_ASYNC, - FI_NONE); - } - } - if (VN_CACHED(inode) && (flags & SYNC_IOWAIT)) - xfs_ioend_wait(ip); - } - xfs_ilock(ip, XFS_ILOCK_SHARED); - - if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { - if (flags & SYNC_WAIT) { - xfs_iflock(ip); - if (!xfs_inode_clean(ip)) - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - else - xfs_ifunlock(ip); - } else if (xfs_iflock_nowait(ip)) { - if (!xfs_inode_clean(ip)) - error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); - else - xfs_ifunlock(ip); - } - } - xfs_iput(ip, lock_flags); + ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); + if (!ip) + break; + error = execute(ip, pag, flags); + if (error == EAGAIN) { + skipped++; + continue; + } if (error) last_error = error; /* * bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) - return XFS_ERROR(error); + break; - } while (nr_found); + } while (1); + + if (skipped) { + delay(1); + goto restart; + } + xfs_put_perag(mp, pag); return last_error; } int -xfs_sync_inodes( - xfs_mount_t *mp, - int flags) +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags, + int tag) { - int error; - int last_error; - int i; - int lflags = XFS_LOG_FORCE; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - error = 0; - last_error = 0; + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + if (!mp->m_perag[ag].pag_ici_init) + continue; + error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); + if (error) { + last_error = error; + if (error == EFSCORRUPTED) + break; + } + } + return XFS_ERROR(last_error); +} + +/* must be called with pag_ici_lock held and releases it */ +int +xfs_sync_inode_valid( + struct xfs_inode *ip, + struct xfs_perag *pag) +{ + struct inode *inode = VFS_I(ip); + + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + read_unlock(&pag->pag_ici_lock); + return EFSCORRUPTED; + } + + /* + * If we can't get a reference on the inode, it must be in reclaim. + * Leave it for the reclaim code to flush. Also avoid inodes that + * haven't been fully initialised. + */ + if (!igrab(inode)) { + read_unlock(&pag->pag_ici_lock); + return ENOENT; + } + read_unlock(&pag->pag_ici_lock); + + if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); + return ENOENT; + } + + return 0; +} + +STATIC int +xfs_sync_inode_data( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + struct inode *inode = VFS_I(ip); + struct address_space *mapping = inode->i_mapping; + int error = 0; + + error = xfs_sync_inode_valid(ip, pag); + if (error) + return error; + + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + goto out_wait; + + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (flags & SYNC_TRYLOCK) + goto out_wait; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } + + error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? + 0 : XFS_B_ASYNC, FI_NONE); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + out_wait: if (flags & SYNC_WAIT) - lflags |= XFS_LOG_SYNC; + xfs_ioend_wait(ip); + IRELE(ip); + return error; +} - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - if (!mp->m_perag[i].pag_ici_init) - continue; - error = xfs_sync_inodes_ag(mp, i, flags); - if (error) - last_error = error; - if (error == EFSCORRUPTED) - break; +STATIC int +xfs_sync_inode_attr( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + int error = 0; + + error = xfs_sync_inode_valid(ip, pag); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_clean(ip)) + goto out_unlock; + if (!xfs_iflock_nowait(ip)) { + if (!(flags & SYNC_WAIT)) + goto out_unlock; + xfs_iflock(ip); } - if (flags & SYNC_DELWRI) - xfs_log_force(mp, 0, lflags); - return XFS_ERROR(last_error); + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + goto out_unlock; + } + + error = xfs_iflush(ip, (flags & SYNC_WAIT) ? + XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + IRELE(ip); + return error; +} + +/* + * Write out pagecache data for the whole filesystem. + */ +int +xfs_sync_data( + struct xfs_mount *mp, + int flags) +{ + int error; + + ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); + + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, + XFS_ICI_NO_TAG); + if (error) + return XFS_ERROR(error); + + xfs_log_force(mp, 0, + (flags & SYNC_WAIT) ? + XFS_LOG_FORCE | XFS_LOG_SYNC : + XFS_LOG_FORCE); + return 0; +} + +/* + * Write out inode metadata (attributes) for the whole filesystem. + */ +int +xfs_sync_attr( + struct xfs_mount *mp, + int flags) +{ + ASSERT((flags & ~SYNC_WAIT) == 0); + + return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, + XFS_ICI_NO_TAG); } STATIC int @@ -252,7 +353,7 @@ xfs_sync_fsdata( * If this is xfssyncd() then only sync the superblock if we can * lock it without sleeping and it is not pinned. */ - if (flags & SYNC_BDFLUSH) { + if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); @@ -316,13 +417,13 @@ xfs_quiesce_data( int error; /* push non-blocking */ - xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH); - XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + xfs_sync_data(mp, 0); + xfs_qm_sync(mp, SYNC_TRYLOCK); xfs_filestream_flush(mp); /* push and block */ - xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); - XFS_QM_DQSYNC(mp, SYNC_WAIT); + xfs_sync_data(mp, SYNC_WAIT); + xfs_qm_sync(mp, SYNC_WAIT); /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, 0); @@ -341,7 +442,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop @@ -350,7 +451,7 @@ xfs_quiesce_fs( * logged before we can write the unmount record. */ do { - xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); + xfs_sync_attr(mp, SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); @@ -433,8 +534,8 @@ xfs_flush_inodes_work( void *arg) { struct inode *inode = arg; - xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK); - xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT); + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); iput(inode); } @@ -465,10 +566,10 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ - error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); - error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); + error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); if (xfs_log_need_covered(mp)) error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); } @@ -569,7 +670,7 @@ xfs_reclaim_inode( xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); } - return 1; + return -EAGAIN; } __xfs_iflags_set(ip, XFS_IRECLAIM); spin_unlock(&ip->i_flags_lock); @@ -654,101 +755,27 @@ xfs_inode_clear_reclaim_tag( xfs_put_perag(mp, pag); } - -STATIC void -xfs_reclaim_inodes_ag( - xfs_mount_t *mp, - int ag, - int noblock, - int mode) +STATIC int +xfs_reclaim_inode_now( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) { - xfs_inode_t *ip = NULL; - xfs_perag_t *pag = &mp->m_perag[ag]; - int nr_found; - uint32_t first_index; - int skipped; - -restart: - first_index = 0; - skipped = 0; - do { - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); - nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, - (void**)&ip, first_index, 1, - XFS_ICI_RECLAIM_TAG); - - if (!nr_found) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* ignore if already under reclaim */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - - if (noblock) { - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - read_unlock(&pag->pag_ici_lock); - continue; - } - } + /* ignore if already under reclaim */ + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { read_unlock(&pag->pag_ici_lock); - - /* - * hmmm - this is an inode already in reclaim. Do - * we even bother catching it here? - */ - if (xfs_reclaim_inode(ip, noblock, mode)) - skipped++; - } while (nr_found); - - if (skipped) { - delay(1); - goto restart; + return 0; } - return; + read_unlock(&pag->pag_ici_lock); + return xfs_reclaim_inode(ip, 0, flags); } int xfs_reclaim_inodes( xfs_mount_t *mp, - int noblock, int mode) { - int i; - - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - if (!mp->m_perag[i].pag_ici_init) - continue; - xfs_reclaim_inodes_ag(mp, i, noblock, mode); - } - return 0; + return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, + XFS_ICI_RECLAIM_TAG); } - - diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 308d5bf6dfbd..2a10301c99c7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -29,17 +29,14 @@ typedef struct xfs_sync_work { struct completion *w_completion; } xfs_sync_work_t; -#define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_DELWRI 0x0002 /* look at delayed writes */ -#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ -#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ -#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ +#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ +#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync_inodes(struct xfs_mount *mp, int flags); +int xfs_sync_attr(struct xfs_mount *mp, int flags); +int xfs_sync_data(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); @@ -48,10 +45,16 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); -int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); + +int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); +int xfs_inode_ag_iterator(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), + int flags, int tag); + #endif diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index 964621fde6ed..497c7fb75cc1 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c @@ -29,67 +29,6 @@ #include <linux/xattr.h> -/* - * ACL handling. Should eventually be moved into xfs_acl.c - */ - -static int -xfs_decode_acl(const char *name) -{ - if (strcmp(name, "posix_acl_access") == 0) - return _ACL_TYPE_ACCESS; - else if (strcmp(name, "posix_acl_default") == 0) - return _ACL_TYPE_DEFAULT; - return -EINVAL; -} - -/* - * Get system extended attributes which at the moment only - * includes Posix ACLs. - */ -static int -xfs_xattr_system_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - int acl; - - acl = xfs_decode_acl(name); - if (acl < 0) - return acl; - - return xfs_acl_vget(inode, buffer, size, acl); -} - -static int -xfs_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - int acl; - - acl = xfs_decode_acl(name); - if (acl < 0) - return acl; - if (flags & XATTR_CREATE) - return -EINVAL; - - if (!value) - return xfs_acl_vremove(inode, acl); - - return xfs_acl_vset(inode, (void *)value, size, acl); -} - -static struct xattr_handler xfs_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = xfs_xattr_system_get, - .set = xfs_xattr_system_set, -}; - - -/* - * Real xattr handling. The only difference between the namespaces is - * a flag passed to the low-level attr code. - */ - static int __xfs_xattr_get(struct inode *inode, const char *name, void *value, size_t size, int xflags) @@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = { &xfs_xattr_user_handler, &xfs_xattr_trusted_handler, &xfs_xattr_security_handler, +#ifdef CONFIG_XFS_POSIX_ACL &xfs_xattr_system_handler, +#endif NULL }; @@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) /* * Then add the two synthetic ACL attributes. */ - if (xfs_acl_vhasacl_access(inode)) { + if (posix_acl_access_exists(inode)) { error = list_one_attr(POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS) + 1, data, size, &context.count); @@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) return error; } - if (xfs_acl_vhasacl_default(inode)) { + if (posix_acl_default_exists(inode)) { error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, strlen(POSIX_ACL_XATTR_DEFAULT) + 1, data, size, &context.count); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index e4babcc63423..2f3f2229eaaf 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -1194,7 +1193,9 @@ void xfs_qm_dqrele( xfs_dquot_t *dqp) { - ASSERT(dqp); + if (!dqp) + return; + xfs_dqtrace_entry(dqp, "DQRELE"); xfs_dqlock(dqp); diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index de0f402ddb4c..6533ead9b889 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -181,7 +181,6 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, xfs_dqid_t, uint, uint, xfs_dquot_t **); extern void xfs_qm_dqput(xfs_dquot_t *); -extern void xfs_qm_dqrele(xfs_dquot_t *); extern void xfs_dqlock(xfs_dquot_t *); extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); extern void xfs_dqunlock(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 1728f6a7c4f5..d0d4a9a0bbd7 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5b6695049e00..45b1bfef7388 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -287,11 +286,13 @@ xfs_qm_rele_quotafs_ref( * Just destroy the quotainfo structure. */ void -xfs_qm_unmount_quotadestroy( - xfs_mount_t *mp) +xfs_qm_unmount( + struct xfs_mount *mp) { - if (mp->m_quotainfo) + if (mp->m_quotainfo) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); xfs_qm_destroy_quotainfo(mp); + } } @@ -385,8 +386,13 @@ xfs_qm_mount_quotas( if (error) { xfs_fs_cmn_err(CE_WARN, mp, "Failed to initialize disk quotas."); + return; } - return; + +#ifdef QUOTADEBUG + if (XFS_IS_QUOTA_ON(mp)) + xfs_qm_internalqcheck(mp); +#endif } /* @@ -774,12 +780,11 @@ xfs_qm_dqattach_grouphint( * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON * into account. * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. - * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL. * Inode may get unlocked and relocked in here, and the caller must deal with * the consequences. */ int -xfs_qm_dqattach( +xfs_qm_dqattach_locked( xfs_inode_t *ip, uint flags) { @@ -787,17 +792,14 @@ xfs_qm_dqattach( uint nquotas = 0; int error = 0; - if ((! XFS_IS_QUOTA_ON(mp)) || - (! XFS_NOT_DQATTACHED(mp, ip)) || - (ip->i_ino == mp->m_sb.sb_uquotino) || - (ip->i_ino == mp->m_sb.sb_gquotino)) + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || + !XFS_NOT_DQATTACHED(mp, ip) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) return 0; - ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || - xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (! (flags & XFS_QMOPT_ILOCKED)) - xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (XFS_IS_UQUOTA_ON(mp)) { error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, @@ -849,8 +851,7 @@ xfs_qm_dqattach( xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); } - done: - + done: #ifdef QUOTADEBUG if (! error) { if (XFS_IS_UQUOTA_ON(mp)) @@ -858,15 +859,22 @@ xfs_qm_dqattach( if (XFS_IS_OQUOTA_ON(mp)) ASSERT(ip->i_gdquot); } + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); #endif + return error; +} - if (! (flags & XFS_QMOPT_ILOCKED)) - xfs_iunlock(ip, XFS_ILOCK_EXCL); +int +xfs_qm_dqattach( + struct xfs_inode *ip, + uint flags) +{ + int error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_qm_dqattach_locked(ip, flags); + xfs_iunlock(ip, XFS_ILOCK_EXCL); -#ifdef QUOTADEBUG - else - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif return error; } @@ -896,11 +904,6 @@ xfs_qm_dqdetach( } } -/* - * This is called to sync quotas. We can be told to use non-blocking - * semantics by either the SYNC_BDFLUSH flag or the absence of the - * SYNC_WAIT flag. - */ int xfs_qm_sync( xfs_mount_t *mp, @@ -909,17 +912,13 @@ xfs_qm_sync( int recl, restarts; xfs_dquot_t *dqp; uint flush_flags; - boolean_t nowait; int error; - if (! XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; + flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI; restarts = 0; - /* - * We won't block unless we are asked to. - */ - nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0); again: xfs_qm_mplist_lock(mp); @@ -939,18 +938,10 @@ xfs_qm_sync( * don't 'seem' to be dirty. ie. don't acquire dqlock. * This is very similar to what xfs_sync does with inodes. */ - if (flags & SYNC_BDFLUSH) { - if (! XFS_DQ_IS_DIRTY(dqp)) + if (flags & SYNC_TRYLOCK) { + if (!XFS_DQ_IS_DIRTY(dqp)) continue; - } - - if (nowait) { - /* - * Try to acquire the dquot lock. We are NOT out of - * lock order, but we just don't want to wait for this - * lock, unless somebody wanted us to. - */ - if (! xfs_qm_dqlock_nowait(dqp)) + if (!xfs_qm_dqlock_nowait(dqp)) continue; } else { xfs_dqlock(dqp); @@ -967,7 +958,7 @@ xfs_qm_sync( /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); if (!xfs_dqflock_nowait(dqp)) { - if (nowait) { + if (flags & SYNC_TRYLOCK) { xfs_dqunlock(dqp); continue; } @@ -985,7 +976,6 @@ xfs_qm_sync( * Let go of the mplist lock. We don't want to hold it * across a disk write */ - flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC; xfs_qm_mplist_unlock(mp); xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH"); error = xfs_qm_dqflush(dqp, flush_flags); @@ -2319,20 +2309,20 @@ xfs_qm_write_sb_changes( */ int xfs_qm_vop_dqalloc( - xfs_mount_t *mp, - xfs_inode_t *ip, - uid_t uid, - gid_t gid, - prid_t prid, - uint flags, - xfs_dquot_t **O_udqpp, - xfs_dquot_t **O_gdqpp) + struct xfs_inode *ip, + uid_t uid, + gid_t gid, + prid_t prid, + uint flags, + struct xfs_dquot **O_udqpp, + struct xfs_dquot **O_gdqpp) { - int error; - xfs_dquot_t *uq, *gq; - uint lockflags; + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *uq, *gq; + int error; + uint lockflags; - if (!XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; lockflags = XFS_ILOCK_EXCL; @@ -2346,8 +2336,8 @@ xfs_qm_vop_dqalloc( * if necessary. The dquot(s) will not be locked. */ if (XFS_NOT_DQATTACHED(mp, ip)) { - if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC | - XFS_QMOPT_ILOCKED))) { + error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); + if (error) { xfs_iunlock(ip, lockflags); return error; } @@ -2469,6 +2459,7 @@ xfs_qm_vop_chown( uint bfield = XFS_IS_REALTIME_INODE(ip) ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); @@ -2508,13 +2499,13 @@ xfs_qm_vop_chown_reserve( xfs_dquot_t *gdqp, uint flags) { - int error; - xfs_mount_t *mp; + xfs_mount_t *mp = ip->i_mount; uint delblks, blkflags, prjflags = 0; xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; + int error; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - mp = ip->i_mount; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); delblks = ip->i_delayed_blks; @@ -2582,28 +2573,23 @@ xfs_qm_vop_chown_reserve( int xfs_qm_vop_rename_dqattach( - xfs_inode_t **i_tab) + struct xfs_inode **i_tab) { - xfs_inode_t *ip; - int i; - int error; + struct xfs_mount *mp = i_tab[0]->i_mount; + int i; - ip = i_tab[0]; - - if (! XFS_IS_QUOTA_ON(ip->i_mount)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; - } - for (i = 1; (i < 4 && i_tab[i]); i++) { + for (i = 0; (i < 4 && i_tab[i]); i++) { + struct xfs_inode *ip = i_tab[i]; + int error; + /* * Watch out for duplicate entries in the table. */ - if ((ip = i_tab[i]) != i_tab[i-1]) { - if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { + if (i == 0 || ip != i_tab[i-1]) { + if (XFS_NOT_DQATTACHED(mp, ip)) { error = xfs_qm_dqattach(ip, 0); if (error) return error; @@ -2614,17 +2600,19 @@ xfs_qm_vop_rename_dqattach( } void -xfs_qm_vop_dqattach_and_dqmod_newinode( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp) +xfs_qm_vop_create_dqattach( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp) { - if (!XFS_IS_QUOTA_ON(tp->t_mountp)) + struct xfs_mount *mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); if (udqp) { xfs_dqlock(udqp); @@ -2632,7 +2620,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode( xfs_dqunlock(udqp); ASSERT(ip->i_udquot == NULL); ip->i_udquot = udqp; - ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp)); + ASSERT(XFS_IS_UQUOTA_ON(mp)); ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } @@ -2642,8 +2630,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode( xfs_dqunlock(gdqp); ASSERT(ip->i_gdquot == NULL); ip->i_gdquot = gdqp; - ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp)); - ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ? + ASSERT(XFS_IS_OQUOTA_ON(mp)); + ASSERT((XFS_IS_GQUOTA_ON(mp) ? ip->i_d.di_gid : ip->i_d.di_projid) == be32_to_cpu(gdqp->q_core.d_id)); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index a371954cae1b..495564b8af38 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -127,8 +127,6 @@ typedef struct xfs_quotainfo { } xfs_quotainfo_t; -extern xfs_dqtrxops_t xfs_trans_dquot_ops; - extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, xfs_dquot_t *, xfs_dquot_t *, long, long, uint); @@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); -extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); -extern void xfs_qm_unmount_quotas(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); -extern int xfs_qm_sync(xfs_mount_t *, int); /* dquot stuff */ extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); -extern int xfs_qm_dqattach(xfs_inode_t *, uint); -extern void xfs_qm_dqdetach(xfs_inode_t *); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); @@ -183,19 +175,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); -/* vop stuff */ -extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, - uid_t, gid_t, prid_t, uint, - xfs_dquot_t **, xfs_dquot_t **); -extern void xfs_qm_vop_dqattach_and_dqmod_newinode( - xfs_trans_t *, xfs_inode_t *, - xfs_dquot_t *, xfs_dquot_t *); -extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **); -extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *, - xfs_dquot_t **, xfs_dquot_t *); -extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, - xfs_dquot_t *, xfs_dquot_t *, uint); - /* list stuff */ extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_unlink(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 63037c689a4b..a5346630dfae 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -42,7 +42,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" @@ -84,7 +83,7 @@ xfs_fill_statvfs_from_dquot( * return a statvfs of the project, not the entire filesystem. * This makes such trees appear as if they are filesystems in themselves. */ -STATIC void +void xfs_qm_statvfs( xfs_inode_t *ip, struct kstatfs *statp) @@ -92,20 +91,13 @@ xfs_qm_statvfs( xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; - if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || - !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - return; - if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { - xfs_disk_dquot_t *dp = &dqp->q_core; - - xfs_fill_statvfs_from_dquot(statp, dp); + xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); xfs_qm_dqput(dqp); } } -STATIC int +int xfs_qm_newmount( xfs_mount_t *mp, uint *needquotamount, @@ -114,9 +106,6 @@ xfs_qm_newmount( uint quotaondisk; uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; - *quotaflags = 0; - *needquotamount = B_FALSE; - quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); @@ -179,66 +168,6 @@ xfs_qm_newmount( return 0; } -STATIC int -xfs_qm_endmount( - xfs_mount_t *mp, - uint needquotamount, - uint quotaflags) -{ - if (needquotamount) { - ASSERT(mp->m_qflags == 0); - mp->m_qflags = quotaflags; - xfs_qm_mount_quotas(mp); - } - -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - if (! (XFS_IS_QUOTA_ON(mp))) - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); - else - xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); -#endif - -#ifdef QUOTADEBUG - if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp)) - cmn_err(CE_WARN, "XFS: mount internalqcheck failed"); -#endif - - return 0; -} - -STATIC void -xfs_qm_dqrele_null( - xfs_dquot_t *dq) -{ - /* - * Called from XFS, where we always check first for a NULL dquot. - */ - if (!dq) - return; - xfs_qm_dqrele(dq); -} - - -struct xfs_qmops xfs_qmcore_xfs = { - .xfs_qminit = xfs_qm_newmount, - .xfs_qmdone = xfs_qm_unmount_quotadestroy, - .xfs_qmmount = xfs_qm_endmount, - .xfs_qmunmount = xfs_qm_unmount_quotas, - .xfs_dqrele = xfs_qm_dqrele_null, - .xfs_dqattach = xfs_qm_dqattach, - .xfs_dqdetach = xfs_qm_dqdetach, - .xfs_dqpurgeall = xfs_qm_dqpurge_all, - .xfs_dqvopalloc = xfs_qm_vop_dqalloc, - .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode, - .xfs_dqvoprename = xfs_qm_vop_rename_dqattach, - .xfs_dqvopchown = xfs_qm_vop_chown, - .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve, - .xfs_dqstatvfs = xfs_qm_statvfs, - .xfs_dqsync = xfs_qm_sync, - .xfs_dqtrxops = &xfs_trans_dquot_ops, -}; -EXPORT_SYMBOL(xfs_qmcore_xfs); - void __init xfs_qm_init(void) { diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 709f5f545cf5..21b08c0396a1 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c @@ -42,7 +42,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index c7b66f6506ce..4e4276b956e8 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -45,7 +45,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_utils.h" @@ -847,105 +846,55 @@ xfs_qm_export_flags( } -/* - * Release all the dquots on the inodes in an AG. - */ -STATIC void -xfs_qm_dqrele_inodes_ag( - xfs_mount_t *mp, - int ag, - uint flags) +STATIC int +xfs_dqrele_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) { - xfs_inode_t *ip = NULL; - xfs_perag_t *pag = &mp->m_perag[ag]; - int first_index = 0; - int nr_found; - - do { - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - read_lock(&pag->pag_ici_lock); - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void**)&ip, first_index, 1); - - if (!nr_found) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { - read_unlock(&pag->pag_ici_lock); - break; - } - - /* skip quota inodes */ - if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); - read_unlock(&pag->pag_ici_lock); - continue; - } + int error; - /* - * If we can't get a reference on the inode, it must be - * in reclaim. Leave it for the reclaim code to flush. - */ - if (!igrab(VFS_I(ip))) { - read_unlock(&pag->pag_ici_lock); - continue; - } + /* skip quota inodes */ + if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); read_unlock(&pag->pag_ici_lock); + return 0; + } - /* avoid new inodes though we shouldn't find any here */ - if (xfs_iflags_test(ip, XFS_INEW)) { - IRELE(ip); - continue; - } + error = xfs_sync_inode_valid(ip, pag); + if (error) + return error; - xfs_ilock(ip, XFS_ILOCK_EXCL); - if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && - ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } - xfs_iput(ip, XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL); + if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } + xfs_iput(ip, XFS_ILOCK_EXCL); + IRELE(ip); - } while (nr_found); + return 0; } + /* * Go thru all the inodes in the file system, releasing their dquots. + * * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. This also gets called from - * xfs_rootumount. + * AFTER this, in the case of quotaoff. */ void xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { - int i; - ASSERT(mp->m_quotainfo); - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - if (!mp->m_perag[i].pag_ici_init) - continue; - xfs_qm_dqrele_inodes_ag(mp, i, flags); - } + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); } /*------------------------------------------------------------------------*/ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 447173bcf96d..97ac9640be98 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -42,7 +42,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -111,7 +110,7 @@ xfs_trans_log_dquot( * Carry forward whatever is left of the quota blk reservation to * the spanky new transaction */ -STATIC void +void xfs_trans_dup_dqinfo( xfs_trans_t *otp, xfs_trans_t *ntp) @@ -167,19 +166,17 @@ xfs_trans_dup_dqinfo( /* * Wrap around mod_dquot to account for both user and group quotas. */ -STATIC void +void xfs_trans_mod_dquot_byino( xfs_trans_t *tp, xfs_inode_t *ip, uint field, long delta) { - xfs_mount_t *mp; - - ASSERT(tp); - mp = tp->t_mountp; + xfs_mount_t *mp = tp->t_mountp; - if (!XFS_IS_QUOTA_ON(mp) || + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || ip->i_ino == mp->m_sb.sb_uquotino || ip->i_ino == mp->m_sb.sb_gquotino) return; @@ -229,6 +226,7 @@ xfs_trans_mod_dquot( xfs_dqtrx_t *qtrx; ASSERT(tp); + ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); qtrx = NULL; if (tp->t_dqinfo == NULL) @@ -346,7 +344,7 @@ xfs_trans_dqlockedjoin( * Unreserve just the reservations done by this transaction. * dquot is still left locked at exit. */ -STATIC void +void xfs_trans_apply_dquot_deltas( xfs_trans_t *tp) { @@ -357,7 +355,7 @@ xfs_trans_apply_dquot_deltas( long totalbdelta; long totalrtbdelta; - if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) return; ASSERT(tp->t_dqinfo); @@ -531,7 +529,7 @@ xfs_trans_apply_dquot_deltas( * we simply throw those away, since that's the expected behavior * when a transaction is curtailed without a commit. */ -STATIC void +void xfs_trans_unreserve_and_mod_dquots( xfs_trans_t *tp) { @@ -768,7 +766,7 @@ xfs_trans_reserve_quota_bydquots( { int resvd = 0, error; - if (!XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; if (tp && tp->t_dqinfo == NULL) @@ -811,18 +809,17 @@ xfs_trans_reserve_quota_bydquots( * This doesn't change the actual usage, just the reservation. * The inode sent in is locked. */ -STATIC int +int xfs_trans_reserve_quota_nblks( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_inode_t *ip, - long nblks, - long ninos, - uint flags) + struct xfs_trans *tp, + struct xfs_inode *ip, + long nblks, + long ninos, + uint flags) { - int error; + struct xfs_mount *mp = ip->i_mount; - if (!XFS_IS_QUOTA_ON(mp)) + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; if (XFS_IS_PQUOTA_ON(mp)) flags |= XFS_QMOPT_ENOSPC; @@ -831,7 +828,6 @@ xfs_trans_reserve_quota_nblks( ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == XFS_TRANS_DQ_RES_RTBLKS || (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == @@ -840,11 +836,9 @@ xfs_trans_reserve_quota_nblks( /* * Reserve nblks against these dquots, with trans as the mediator. */ - error = xfs_trans_reserve_quota_bydquots(tp, mp, - ip->i_udquot, ip->i_gdquot, - nblks, ninos, - flags); - return error; + return xfs_trans_reserve_quota_bydquots(tp, mp, + ip->i_udquot, ip->i_gdquot, + nblks, ninos, flags); } /* @@ -895,25 +889,15 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); + tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); } -STATIC void +void xfs_trans_free_dqinfo( xfs_trans_t *tp) { if (!tp->t_dqinfo) return; - kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo); - (tp)->t_dqinfo = NULL; + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); + tp->t_dqinfo = NULL; } - -xfs_dqtrxops_t xfs_trans_dquot_ops = { - .qo_dup_dqinfo = xfs_trans_dup_dqinfo, - .qo_free_dqinfo = xfs_trans_free_dqinfo, - .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino, - .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas, - .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks, - .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots, - .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots, -}; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c deleted file mode 100644 index a8cdd73999a4..000000000000 --- a/fs/xfs/xfs_acl.c +++ /dev/null @@ -1,874 +0,0 @@ -/* - * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_vnodeops.h" - -#include <linux/capability.h> -#include <linux/posix_acl_xattr.h> - -STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *); -STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_endian(xfs_acl_t *); -STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); -STATIC int xfs_acl_invalid(xfs_acl_t *); -STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); -STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); -STATIC int xfs_acl_allow_set(struct inode *, int); - -kmem_zone_t *xfs_acl_zone; - - -/* - * Test for existence of access ACL attribute as efficiently as possible. - */ -int -xfs_acl_vhasacl_access( - struct inode *vp) -{ - int error; - - xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error); - return (error == 0); -} - -/* - * Test for existence of default ACL attribute as efficiently as possible. - */ -int -xfs_acl_vhasacl_default( - struct inode *vp) -{ - int error; - - if (!S_ISDIR(vp->i_mode)) - return 0; - xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); - return (error == 0); -} - -/* - * Convert from extended attribute representation to in-memory for XFS. - */ -STATIC int -posix_acl_xattr_to_xfs( - posix_acl_xattr_header *src, - size_t size, - xfs_acl_t *dest) -{ - posix_acl_xattr_entry *src_entry; - xfs_acl_entry_t *dest_entry; - int n; - - if (!src || !dest) - return EINVAL; - - if (size < sizeof(posix_acl_xattr_header)) - return EINVAL; - - if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) - return EOPNOTSUPP; - - memset(dest, 0, sizeof(xfs_acl_t)); - dest->acl_cnt = posix_acl_xattr_count(size); - if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES) - return EINVAL; - - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - if (!dest->acl_cnt) - return 0; - - src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src)); - dest_entry = &dest->acl_entry[0]; - - for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) { - dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm); - if (_ACL_PERM_INVALID(dest_entry->ae_perm)) - return EINVAL; - dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag); - switch(dest_entry->ae_tag) { - case ACL_USER: - case ACL_GROUP: - dest_entry->ae_id = le32_to_cpu(src_entry->e_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - dest_entry->ae_id = ACL_UNDEFINED_ID; - break; - default: - return EINVAL; - } - } - if (xfs_acl_invalid(dest)) - return EINVAL; - - return 0; -} - -/* - * Comparison function called from xfs_sort(). - * Primary key is ae_tag, secondary key is ae_id. - */ -STATIC int -xfs_acl_entry_compare( - const void *va, - const void *vb) -{ - xfs_acl_entry_t *a = (xfs_acl_entry_t *)va, - *b = (xfs_acl_entry_t *)vb; - - if (a->ae_tag == b->ae_tag) - return (a->ae_id - b->ae_id); - return (a->ae_tag - b->ae_tag); -} - -/* - * Convert from in-memory XFS to extended attribute representation. - */ -STATIC int -posix_acl_xfs_to_xattr( - xfs_acl_t *src, - posix_acl_xattr_header *dest, - size_t size) -{ - int n; - size_t new_size = posix_acl_xattr_size(src->acl_cnt); - posix_acl_xattr_entry *dest_entry; - xfs_acl_entry_t *src_entry; - - if (size < new_size) - return -ERANGE; - - /* Need to sort src XFS ACL by <ae_tag,ae_id> */ - xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]), - xfs_acl_entry_compare); - - dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); - dest_entry = &dest->a_entries[0]; - src_entry = &src->acl_entry[0]; - for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) { - dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm); - if (_ACL_PERM_INVALID(src_entry->ae_perm)) - return -EINVAL; - dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag); - switch (src_entry->ae_tag) { - case ACL_USER: - case ACL_GROUP: - dest_entry->e_id = cpu_to_le32(src_entry->ae_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); - break; - default: - return -EINVAL; - } - } - return new_size; -} - -int -xfs_acl_vget( - struct inode *vp, - void *acl, - size_t size, - int kind) -{ - int error; - xfs_acl_t *xfs_acl = NULL; - posix_acl_xattr_header *ext_acl = acl; - int flags = 0; - - if(size) { - if (!(_ACL_ALLOC(xfs_acl))) { - error = ENOMEM; - goto out; - } - memset(xfs_acl, 0, sizeof(xfs_acl_t)); - } else - flags = ATTR_KERNOVAL; - - xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error); - if (error) - goto out; - - if (!size) { - error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES); - } else { - if (xfs_acl_invalid(xfs_acl)) { - error = EINVAL; - goto out; - } - if (kind == _ACL_TYPE_ACCESS) - xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl); - error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); - } -out: - if(xfs_acl) - _ACL_FREE(xfs_acl); - return -error; -} - -int -xfs_acl_vremove( - struct inode *vp, - int kind) -{ - int error; - - error = xfs_acl_allow_set(vp, kind); - if (!error) { - error = xfs_attr_remove(XFS_I(vp), - kind == _ACL_TYPE_DEFAULT? - SGI_ACL_DEFAULT: SGI_ACL_FILE, - ATTR_ROOT); - if (error == ENOATTR) - error = 0; /* 'scool */ - } - return -error; -} - -int -xfs_acl_vset( - struct inode *vp, - void *acl, - size_t size, - int kind) -{ - posix_acl_xattr_header *ext_acl = acl; - xfs_acl_t *xfs_acl; - int error; - int basicperms = 0; /* more than std unix perms? */ - - if (!acl) - return -EINVAL; - - if (!(_ACL_ALLOC(xfs_acl))) - return -ENOMEM; - - error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl); - if (error) { - _ACL_FREE(xfs_acl); - return -error; - } - if (!xfs_acl->acl_cnt) { - _ACL_FREE(xfs_acl); - return 0; - } - - error = xfs_acl_allow_set(vp, kind); - - /* Incoming ACL exists, set file mode based on its value */ - if (!error && kind == _ACL_TYPE_ACCESS) - error = xfs_acl_setmode(vp, xfs_acl, &basicperms); - - if (error) - goto out; - - /* - * If we have more than std unix permissions, set up the actual attr. - * Otherwise, delete any existing attr. This prevents us from - * having actual attrs for permissions that can be stored in the - * standard permission bits. - */ - if (!basicperms) { - xfs_acl_set_attr(vp, xfs_acl, kind, &error); - } else { - error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); - } - -out: - _ACL_FREE(xfs_acl); - return -error; -} - -int -xfs_acl_iaccess( - xfs_inode_t *ip, - mode_t mode, - cred_t *cr) -{ - xfs_acl_t *acl; - int rval; - struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE}; - - if (!(_ACL_ALLOC(acl))) - return -1; - - /* If the file has no ACL return -1. */ - rval = sizeof(xfs_acl_t); - if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) { - _ACL_FREE(acl); - return -1; - } - xfs_acl_get_endian(acl); - - /* If the file has an empty ACL return -1. */ - if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) { - _ACL_FREE(acl); - return -1; - } - - /* Synchronize ACL with mode bits */ - xfs_acl_sync_mode(ip->i_d.di_mode, acl); - - rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr); - _ACL_FREE(acl); - return rval; -} - -STATIC int -xfs_acl_allow_set( - struct inode *vp, - int kind) -{ - if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) - return EPERM; - if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode)) - return ENOTDIR; - if (vp->i_sb->s_flags & MS_RDONLY) - return EROFS; - if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER)) - return EPERM; - return 0; -} - -/* - * Note: cr is only used here for the capability check if the ACL test fails. - * It is not used to find out the credentials uid or groups etc, as was - * done in IRIX. It is assumed that the uid and groups for the current - * thread are taken from "current" instead of the cr parameter. - */ -STATIC int -xfs_acl_access( - uid_t fuid, - gid_t fgid, - xfs_acl_t *fap, - mode_t md, - cred_t *cr) -{ - xfs_acl_entry_t matched; - int i, allows; - int maskallows = -1; /* true, but not 1, either */ - int seen_userobj = 0; - - matched.ae_tag = 0; /* Invalid type */ - matched.ae_perm = 0; - - for (i = 0; i < fap->acl_cnt; i++) { - /* - * Break out if we've got a user_obj entry or - * a user entry and the mask (and have processed USER_OBJ) - */ - if (matched.ae_tag == ACL_USER_OBJ) - break; - if (matched.ae_tag == ACL_USER) { - if (maskallows != -1 && seen_userobj) - break; - if (fap->acl_entry[i].ae_tag != ACL_MASK && - fap->acl_entry[i].ae_tag != ACL_USER_OBJ) - continue; - } - /* True if this entry allows the requested access */ - allows = ((fap->acl_entry[i].ae_perm & md) == md); - - switch (fap->acl_entry[i].ae_tag) { - case ACL_USER_OBJ: - seen_userobj = 1; - if (fuid != current_fsuid()) - continue; - matched.ae_tag = ACL_USER_OBJ; - matched.ae_perm = allows; - break; - case ACL_USER: - if (fap->acl_entry[i].ae_id != current_fsuid()) - continue; - matched.ae_tag = ACL_USER; - matched.ae_perm = allows; - break; - case ACL_GROUP_OBJ: - if ((matched.ae_tag == ACL_GROUP_OBJ || - matched.ae_tag == ACL_GROUP) && !allows) - continue; - if (!in_group_p(fgid)) - continue; - matched.ae_tag = ACL_GROUP_OBJ; - matched.ae_perm = allows; - break; - case ACL_GROUP: - if ((matched.ae_tag == ACL_GROUP_OBJ || - matched.ae_tag == ACL_GROUP) && !allows) - continue; - if (!in_group_p(fap->acl_entry[i].ae_id)) - continue; - matched.ae_tag = ACL_GROUP; - matched.ae_perm = allows; - break; - case ACL_MASK: - maskallows = allows; - break; - case ACL_OTHER: - if (matched.ae_tag != 0) - continue; - matched.ae_tag = ACL_OTHER; - matched.ae_perm = allows; - break; - } - } - /* - * First possibility is that no matched entry allows access. - * The capability to override DAC may exist, so check for it. - */ - switch (matched.ae_tag) { - case ACL_OTHER: - case ACL_USER_OBJ: - if (matched.ae_perm) - return 0; - break; - case ACL_USER: - case ACL_GROUP_OBJ: - case ACL_GROUP: - if (maskallows && matched.ae_perm) - return 0; - break; - case 0: - break; - } - - /* EACCES tells generic_permission to check for capability overrides */ - return EACCES; -} - -/* - * ACL validity checker. - * This acl validation routine checks each ACL entry read in makes sense. - */ -STATIC int -xfs_acl_invalid( - xfs_acl_t *aclp) -{ - xfs_acl_entry_t *entry, *e; - int user = 0, group = 0, other = 0, mask = 0; - int mask_required = 0; - int i, j; - - if (!aclp) - goto acl_invalid; - - if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES) - goto acl_invalid; - - for (i = 0; i < aclp->acl_cnt; i++) { - entry = &aclp->acl_entry[i]; - switch (entry->ae_tag) { - case ACL_USER_OBJ: - if (user++) - goto acl_invalid; - break; - case ACL_GROUP_OBJ: - if (group++) - goto acl_invalid; - break; - case ACL_OTHER: - if (other++) - goto acl_invalid; - break; - case ACL_USER: - case ACL_GROUP: - for (j = i + 1; j < aclp->acl_cnt; j++) { - e = &aclp->acl_entry[j]; - if (e->ae_id == entry->ae_id && - e->ae_tag == entry->ae_tag) - goto acl_invalid; - } - mask_required++; - break; - case ACL_MASK: - if (mask++) - goto acl_invalid; - break; - default: - goto acl_invalid; - } - } - if (!user || !group || !other || (mask_required && !mask)) - goto acl_invalid; - else - return 0; -acl_invalid: - return EINVAL; -} - -/* - * Do ACL endian conversion. - */ -STATIC void -xfs_acl_get_endian( - xfs_acl_t *aclp) -{ - xfs_acl_entry_t *ace, *end; - - INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - end = &aclp->acl_entry[0]+aclp->acl_cnt; - for (ace = &aclp->acl_entry[0]; ace < end; ace++) { - INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag); - INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id); - INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm); - } -} - -/* - * Get the ACL from the EA and do endian conversion. - */ -STATIC void -xfs_acl_get_attr( - struct inode *vp, - xfs_acl_t *aclp, - int kind, - int flags, - int *error) -{ - int len = sizeof(xfs_acl_t); - - ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); - flags |= ATTR_ROOT; - *error = xfs_attr_get(XFS_I(vp), - kind == _ACL_TYPE_ACCESS ? - SGI_ACL_FILE : SGI_ACL_DEFAULT, - (char *)aclp, &len, flags); - if (*error || (flags & ATTR_KERNOVAL)) - return; - xfs_acl_get_endian(aclp); -} - -/* - * Set the EA with the ACL and do endian conversion. - */ -STATIC void -xfs_acl_set_attr( - struct inode *vp, - xfs_acl_t *aclp, - int kind, - int *error) -{ - xfs_acl_entry_t *ace, *newace, *end; - xfs_acl_t *newacl; - int len; - - if (!(_ACL_ALLOC(newacl))) { - *error = ENOMEM; - return; - } - - len = sizeof(xfs_acl_t) - - (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt)); - end = &aclp->acl_entry[0]+aclp->acl_cnt; - for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0]; - ace < end; - ace++, newace++) { - INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag); - INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id); - INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); - } - INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - *error = xfs_attr_set(XFS_I(vp), - kind == _ACL_TYPE_ACCESS ? - SGI_ACL_FILE: SGI_ACL_DEFAULT, - (char *)newacl, len, ATTR_ROOT); - _ACL_FREE(newacl); -} - -int -xfs_acl_vtoacl( - struct inode *vp, - xfs_acl_t *access_acl, - xfs_acl_t *default_acl) -{ - int error = 0; - - if (access_acl) { - /* - * Get the Access ACL and the mode. If either cannot - * be obtained for some reason, invalidate the access ACL. - */ - xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); - if (error) - access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; - else /* We have a good ACL and the file mode, synchronize. */ - xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl); - } - - if (default_acl) { - xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error); - if (error) - default_acl->acl_cnt = XFS_ACL_NOT_PRESENT; - } - return error; -} - -/* - * This function retrieves the parent directory's acl, processes it - * and lets the child inherit the acl(s) that it should. - */ -int -xfs_acl_inherit( - struct inode *vp, - mode_t mode, - xfs_acl_t *pdaclp) -{ - xfs_acl_t *cacl; - int error = 0; - int basicperms = 0; - - /* - * If the parent does not have a default ACL, or it's an - * invalid ACL, we're done. - */ - if (!vp) - return 0; - if (!pdaclp || xfs_acl_invalid(pdaclp)) - return 0; - - /* - * Copy the default ACL of the containing directory to - * the access ACL of the new file and use the mode that - * was passed in to set up the correct initial values for - * the u::,g::[m::], and o:: entries. This is what makes - * umask() "work" with ACL's. - */ - - if (!(_ACL_ALLOC(cacl))) - return ENOMEM; - - memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); - xfs_acl_filter_mode(mode, cacl); - error = xfs_acl_setmode(vp, cacl, &basicperms); - if (error) - goto out_error; - - /* - * Set the Default and Access ACL on the file. The mode is already - * set on the file, so we don't need to worry about that. - * - * If the new file is a directory, its default ACL is a copy of - * the containing directory's default ACL. - */ - if (S_ISDIR(vp->i_mode)) - xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); - if (!error && !basicperms) - xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); -out_error: - _ACL_FREE(cacl); - return error; -} - -/* - * Set up the correct mode on the file based on the supplied ACL. This - * makes sure that the mode on the file reflects the state of the - * u::,g::[m::], and o:: entries in the ACL. Since the mode is where - * the ACL is going to get the permissions for these entries, we must - * synchronize the mode whenever we set the ACL on a file. - */ -STATIC int -xfs_acl_setmode( - struct inode *vp, - xfs_acl_t *acl, - int *basicperms) -{ - struct iattr iattr; - xfs_acl_entry_t *ap; - xfs_acl_entry_t *gap = NULL; - int i, nomask = 1; - - *basicperms = 1; - - if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) - return 0; - - /* - * Copy the u::, g::, o::, and m:: bits from the ACL into the - * mode. The m:: bits take precedence over the g:: bits. - */ - iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = XFS_I(vp)->i_d.di_mode; - iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); - ap = acl->acl_entry; - for (i = 0; i < acl->acl_cnt; ++i) { - switch (ap->ae_tag) { - case ACL_USER_OBJ: - iattr.ia_mode |= ap->ae_perm << 6; - break; - case ACL_GROUP_OBJ: - gap = ap; - break; - case ACL_MASK: /* more than just standard modes */ - nomask = 0; - iattr.ia_mode |= ap->ae_perm << 3; - *basicperms = 0; - break; - case ACL_OTHER: - iattr.ia_mode |= ap->ae_perm; - break; - default: /* more than just standard modes */ - *basicperms = 0; - break; - } - ap++; - } - - /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ - if (gap && nomask) - iattr.ia_mode |= gap->ae_perm << 3; - - return xfs_setattr(XFS_I(vp), &iattr, 0); -} - -/* - * The permissions for the special ACL entries (u::, g::[m::], o::) are - * actually stored in the file mode (if there is both a group and a mask, - * the group is stored in the ACL entry and the mask is stored on the file). - * This allows the mode to remain automatically in sync with the ACL without - * the need for a call-back to the ACL system at every point where the mode - * could change. This function takes the permissions from the specified mode - * and places it in the supplied ACL. - * - * This implementation draws its validity from the fact that, when the ACL - * was assigned, the mode was copied from the ACL. - * If the mode did not change, therefore, the mode remains exactly what was - * taken from the special ACL entries at assignment. - * If a subsequent chmod() was done, the POSIX spec says that the change in - * mode must cause an update to the ACL seen at user level and used for - * access checks. Before and after a mode change, therefore, the file mode - * most accurately reflects what the special ACL entries should permit/deny. - * - * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly, - * the existing mode bits will override whatever is in the - * ACL. Similarly, if there is a pre-existing ACL that was - * never in sync with its mode (owing to a bug in 6.5 and - * before), it will now magically (or mystically) be - * synchronized. This could cause slight astonishment, but - * it is better than inconsistent permissions. - * - * The supplied ACL is a template that may contain any combination - * of special entries. These are treated as place holders when we fill - * out the ACL. This routine does not add or remove special entries, it - * simply unites each special entry with its associated set of permissions. - */ -STATIC void -xfs_acl_sync_mode( - mode_t mode, - xfs_acl_t *acl) -{ - int i, nomask = 1; - xfs_acl_entry_t *ap; - xfs_acl_entry_t *gap = NULL; - - /* - * Set ACL entries. POSIX1003.1eD16 requires that the MASK - * be set instead of the GROUP entry, if there is a MASK. - */ - for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { - switch (ap->ae_tag) { - case ACL_USER_OBJ: - ap->ae_perm = (mode >> 6) & 0x7; - break; - case ACL_GROUP_OBJ: - gap = ap; - break; - case ACL_MASK: - nomask = 0; - ap->ae_perm = (mode >> 3) & 0x7; - break; - case ACL_OTHER: - ap->ae_perm = mode & 0x7; - break; - default: - break; - } - } - /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ - if (gap && nomask) - gap->ae_perm = (mode >> 3) & 0x7; -} - -/* - * When inheriting an Access ACL from a directory Default ACL, - * the ACL bits are set to the intersection of the ACL default - * permission bits and the file permission bits in mode. If there - * are no permission bits on the file then we must not give them - * the ACL. This is what what makes umask() work with ACLs. - */ -STATIC void -xfs_acl_filter_mode( - mode_t mode, - xfs_acl_t *acl) -{ - int i, nomask = 1; - xfs_acl_entry_t *ap; - xfs_acl_entry_t *gap = NULL; - - /* - * Set ACL entries. POSIX1003.1eD16 requires that the MASK - * be merged with GROUP entry, if there is a MASK. - */ - for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) { - switch (ap->ae_tag) { - case ACL_USER_OBJ: - ap->ae_perm &= (mode >> 6) & 0x7; - break; - case ACL_GROUP_OBJ: - gap = ap; - break; - case ACL_MASK: - nomask = 0; - ap->ae_perm &= (mode >> 3) & 0x7; - break; - case ACL_OTHER: - ap->ae_perm &= mode & 0x7; - break; - default: - break; - } - } - /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */ - if (gap && nomask) - gap->ae_perm &= (mode >> 3) & 0x7; -} diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 642f1db4def4..63dc1f2efad5 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -18,81 +18,48 @@ #ifndef __XFS_ACL_H__ #define __XFS_ACL_H__ -/* - * Access Control Lists - */ -typedef __uint16_t xfs_acl_perm_t; -typedef __int32_t xfs_acl_tag_t; -typedef __int32_t xfs_acl_id_t; +struct inode; +struct posix_acl; +struct xfs_inode; #define XFS_ACL_MAX_ENTRIES 25 #define XFS_ACL_NOT_PRESENT (-1) -typedef struct xfs_acl_entry { - xfs_acl_tag_t ae_tag; - xfs_acl_id_t ae_id; - xfs_acl_perm_t ae_perm; -} xfs_acl_entry_t; - -typedef struct xfs_acl { - __int32_t acl_cnt; - xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES]; -} xfs_acl_t; +/* On-disk XFS access control list structure */ +struct xfs_acl { + __be32 acl_cnt; + struct xfs_acl_entry { + __be32 ae_tag; + __be32 ae_id; + __be16 ae_perm; + } acl_entry[XFS_ACL_MAX_ENTRIES]; +}; /* On-disk XFS extended attribute names */ -#define SGI_ACL_FILE "SGI_ACL_FILE" -#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" +#define SGI_ACL_FILE "SGI_ACL_FILE" +#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) -#define _ACL_TYPE_ACCESS 1 -#define _ACL_TYPE_DEFAULT 2 - #ifdef CONFIG_XFS_POSIX_ACL +extern int xfs_check_acl(struct inode *inode, int mask); +extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); +extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); +extern int xfs_acl_chmod(struct inode *inode); +extern void xfs_inode_init_acls(struct xfs_inode *ip); +extern void xfs_inode_clear_acls(struct xfs_inode *ip); +extern int posix_acl_access_exists(struct inode *inode); +extern int posix_acl_default_exists(struct inode *inode); -struct vattr; -struct xfs_inode; - -extern struct kmem_zone *xfs_acl_zone; -#define xfs_acl_zone_init(zone, name) \ - (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) -#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) - -extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *); -extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); -extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); -extern int xfs_acl_vhasacl_access(struct inode *); -extern int xfs_acl_vhasacl_default(struct inode *); -extern int xfs_acl_vset(struct inode *, void *, size_t, int); -extern int xfs_acl_vget(struct inode *, void *, size_t, int); -extern int xfs_acl_vremove(struct inode *, int); - -#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) - -#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) -#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0) -#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0) -#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access -#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default - -#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP)) -#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0) - +extern struct xattr_handler xfs_xattr_system_handler; #else -#define xfs_acl_zone_init(zone,name) -#define xfs_acl_zone_destroy(zone) -#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP) -#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP) -#define xfs_acl_vremove(v,t) (-EOPNOTSUPP) -#define xfs_acl_vhasacl_access(v) (0) -#define xfs_acl_vhasacl_default(v) (0) -#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */ -#define _ACL_FREE(a) ((void)0) -#define _ACL_INHERIT(c,m,d) (0) -#define _ACL_GET_ACCESS(pv,pa) (0) -#define _ACL_GET_DEFAULT(pv,pd) (0) -#define _ACL_ACCESS_EXISTS (NULL) -#define _ACL_DEFAULT_EXISTS (NULL) -#endif - +# define xfs_check_acl NULL +# define xfs_get_acl(inode, type) NULL +# define xfs_inherit_acl(inode, default_acl) 0 +# define xfs_acl_chmod(inode) 0 +# define xfs_inode_init_acls(ip) +# define xfs_inode_clear_acls(ip) +# define posix_acl_access_exists(inode) 0 +# define posix_acl_default_exists(inode) 0 +#endif /* CONFIG_XFS_POSIX_ACL */ #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index c8641f713caa..f24b50b68d03 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -212,6 +212,8 @@ typedef struct xfs_perag /* * tags for inode radix tree */ +#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup + in xfs_inode_ag_iterator */ #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index 53d5e70d1360..0902249354a0 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b) #endif /* __KERNEL__ */ -/* do we need conversion? */ -#define ARCH_NOCONVERT 1 -#ifdef XFS_NATIVE_HOST -# define ARCH_CONVERT ARCH_NOCONVERT -#else -# define ARCH_CONVERT 0 -#endif - -/* generic swapping macros */ - -#ifndef HAVE_SWABMACROS -#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var)))) -#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var)))) -#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var)))) -#endif - -#define INT_SWAP(type, var) \ - ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \ - ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \ - ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \ - (var)))) - /* * get and set integers from potentially unaligned locations */ @@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b) ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ } -/* does not return a value */ -#define INT_SET(reference,arch,valueref) \ - (__builtin_constant_p(valueref) ? \ - (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \ - (void)( \ - ((reference) = (valueref)), \ - ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \ - ) \ - ) - /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 5fde1654b430..db15feb906ff 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -45,7 +45,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" -#include "xfs_acl.h" #include "xfs_rw.h" #include "xfs_vnodeops.h" @@ -249,8 +248,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, /* * Attach the dquots to the inode. */ - if ((error = XFS_QM_DQATTACH(mp, dp, 0))) - return (error); + error = xfs_qm_dqattach(dp, 0); + if (error) + return error; /* * If the inode doesn't have an attribute fork, add one. @@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, } xfs_ilock(dp, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, + error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { @@ -501,8 +501,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) /* * Attach the dquots to the inode. */ - if ((error = XFS_QM_DQATTACH(mp, dp, 0))) - return (error); + error = xfs_qm_dqattach(dp, 0); + if (error) + return error; /* * Start our first transaction of the day. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index ca7c6005a487..7928b9983c1d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc( * Adjust the disk quota also. This was reserved * earlier. */ - XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip, + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); } else { @@ -2995,7 +2995,7 @@ xfs_bmap_btalloc( * Adjust the disk quota also. This was reserved * earlier. */ - XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip, + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, (long) args.len); @@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents( return error; xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); ip->i_d.di_nblocks--; - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); if (cur->bc_bufs[0] == cbp) cur->bc_bufs[0] = NULL; @@ -3386,7 +3386,7 @@ xfs_bmap_del_extent( * Adjust quota data. */ if (qfield) - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks); + xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); /* * Account for change in delayed indirect blocks. @@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree( *firstblock = cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); /* * Fill in the child block. @@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents( XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, 1); ip->i_d.di_nblocks = 1; - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip, + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); flags |= xfs_ilog_fext(whichfork); } else { @@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork( XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT))) goto error0; xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ? + error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { @@ -4983,10 +4983,11 @@ xfs_bmapi( * adjusted later. We return if we haven't * allocated blocks already inside this loop. */ - if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS( - mp, NULL, ip, (long)alen, 0, + error = xfs_trans_reserve_quota_nblks( + NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : - XFS_QMOPT_RES_REGBLKS))) { + XFS_QMOPT_RES_REGBLKS); + if (error) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); @@ -5035,8 +5036,8 @@ xfs_bmapi( if (XFS_IS_QUOTA_ON(mp)) /* unreserve the blocks now */ (void) - XFS_TRANS_UNRESERVE_QUOTA_NBLKS( - mp, NULL, ip, + xfs_trans_unreserve_quota_nblks( + NULL, ip, (long)alen, 0, rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); @@ -5691,14 +5692,14 @@ xfs_bunmapi( do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, (int64_t)rtexts, rsvd); - (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, - NULL, ip, -((long)del.br_blockcount), 0, + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)del.br_blockcount, rsvd); - (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, - NULL, ip, -((long)del.br_blockcount), 0, + (void)xfs_trans_reserve_quota_nblks(NULL, + ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); } ip->i_delayed_blks -= del.br_blockcount; @@ -6085,6 +6086,7 @@ xfs_getbmap( break; } + kmem_free(out); return error; } diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 0760d352586f..5c1ade06578e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -590,7 +590,7 @@ xfs_bmbt_alloc_block( cur->bc_private.b.allocated++; cur->bc_private.b.ip->i_d.di_nblocks++; xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, + xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, XFS_TRANS_DQ_BCOUNT, 1L); new->l = cpu_to_be64(args.fsbno); @@ -618,7 +618,7 @@ xfs_bmbt_free_block( ip->i_d.di_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, bp); return 0; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 6c87c8f304ef..edf8bdf4141f 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -542,10 +542,8 @@ xfs_filestream_associate( * waiting for the lock because someone else is waiting on the lock we * hold and we cannot drop that as we are in a transaction here. * - * Lucky for us, this inversion is rarely a problem because it's a - * directory inode that we are trying to lock here and that means the - * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is - * used. i.e. freeze, remount-ro, quotasync or unmount. + * Lucky for us, this inversion is not a problem because it's a + * directory inode that we are trying to lock here. * * So, if we can't get the iolock without sleeping then just give up */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index f7c06fac8229..c4ea51b55dce 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks { * Minimum and maximum sizes need for growth checks */ #define XFS_MIN_AG_BLOCKS 64 -#define XFS_MIN_LOG_BLOCKS 512 -#define XFS_MAX_LOG_BLOCKS (64 * 1024) -#define XFS_MIN_LOG_BYTES (256 * 1024) -#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024) +#define XFS_MIN_LOG_BLOCKS 512ULL +#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) +#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) + +/* keep the maximum size under 2^31 by a small amount */ +#define XFS_MAX_LOG_BYTES \ + ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 89b81eedce6a..76c540f719e4 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -18,6 +18,7 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" +#include "xfs_acl.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" @@ -82,6 +83,7 @@ xfs_inode_alloc( memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; ip->i_new_size = 0; + xfs_inode_init_acls(ip); /* * Initialize inode's trace buffers. @@ -500,10 +502,7 @@ xfs_ireclaim( * ilock one but will still hold the iolock. */ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - /* - * Release dquots (and their references) if any. - */ - XFS_QM_DQDETACH(ip->i_mount, ip); + xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); switch (ip->i_d.di_mode & S_IFMT) { @@ -561,6 +560,7 @@ xfs_ireclaim( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); + xfs_inode_clear_acls(ip); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 123b20c8cbf2..1f22d65fed0a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -49,7 +49,6 @@ #include "xfs_utils.h" #include "xfs_dir2_trace.h" #include "xfs_quota.h" -#include "xfs_acl.h" #include "xfs_filestream.h" #include "xfs_vnodeops.h" diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f879c1bc4b96..77016702938b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -18,6 +18,7 @@ #ifndef __XFS_INODE_H__ #define __XFS_INODE_H__ +struct posix_acl; struct xfs_dinode; struct xfs_inode; @@ -272,6 +273,11 @@ typedef struct xfs_inode { /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ +#ifdef CONFIG_XFS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5aaa2d7ec155..67ae5555a30a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -42,7 +42,6 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" @@ -385,7 +384,7 @@ xfs_iomap_write_direct( * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ - error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED); + error = xfs_qm_dqattach_locked(ip, 0); if (error) return XFS_ERROR(error); @@ -444,8 +443,7 @@ xfs_iomap_write_direct( if (error) goto error_out; - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, - qblocks, 0, quota_flag); + error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) goto error1; @@ -495,7 +493,7 @@ xfs_iomap_write_direct( error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); + xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -582,7 +580,7 @@ xfs_iomap_write_delay( * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ - error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + error = xfs_qm_dqattach_locked(ip, 0); if (error) return XFS_ERROR(error); @@ -684,7 +682,8 @@ xfs_iomap_write_allocate( /* * Make sure that the dquots are there. */ - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7ba450116d4f..47da2fb45377 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer( error = 0; if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { + if (item->ri_buf[i].i_addr == NULL) { + cmn_err(CE_ALERT, + "XFS: NULL dquot in %s.", __func__); + goto next; + } + if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) { + cmn_err(CE_ALERT, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[i].i_len, __func__); + goto next; + } error = xfs_qm_dqcheck((xfs_disk_dquot_t *) item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); + if (error) + goto next; } - if (!error) - memcpy(xfs_buf_offset(bp, - (uint)bit << XFS_BLI_SHIFT), /* dest */ - item->ri_buf[i].i_addr, /* source */ - nbits<<XFS_BLI_SHIFT); /* length */ + + memcpy(xfs_buf_offset(bp, + (uint)bit << XFS_BLI_SHIFT), /* dest */ + item->ri_buf[i].i_addr, /* source */ + nbits<<XFS_BLI_SHIFT); /* length */ + next: i++; bit += nbits; } @@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans( return (0); recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; - ASSERT(recddq); + + if (item->ri_buf[1].i_addr == NULL) { + cmn_err(CE_ALERT, + "XFS: NULL dquot in %s.", __func__); + return XFS_ERROR(EIO); + } + if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) { + cmn_err(CE_ALERT, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[1].i_len, __func__); + return XFS_ERROR(EIO); + } + /* * This type of quotas was turned off, so ignore this record. */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 65a99725d0cc..5c6f092659c1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp) } /* + * Clear the quotaflags in memory and in the superblock. + */ +int +xfs_mount_reset_sbqflags( + struct xfs_mount *mp) +{ + int error; + struct xfs_trans *tp; + + mp->m_qflags = 0; + + /* + * It is OK to look at sb_qflags here in mount path, + * without m_sb_lock. + */ + if (mp->m_sb.sb_qflags == 0) + return 0; + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = 0; + spin_unlock(&mp->m_sb_lock); + + /* + * If the fs is readonly, let the incore superblock run + * with quotas off but don't flush the update out to disk + */ + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + +#ifdef QUOTADEBUG + xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); +#endif + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_fs_cmn_err(CE_ALERT, mp, + "xfs_mount_reset_sbqflags: Superblock update failed!"); + return error; + } + + xfs_mod_sb(tp, XFS_SB_QFLAGS); + return xfs_trans_commit(tp, 0); +} + +/* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct * - if we're a 32-bit kernel, do a size check on the superblock @@ -976,7 +1023,8 @@ xfs_mountfs( xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; __uint64_t resblks; - uint quotamount, quotaflags; + uint quotamount = 0; + uint quotaflags = 0; int error = 0; xfs_mount_common(mp, sbp); @@ -1210,9 +1258,28 @@ xfs_mountfs( /* * Initialise the XFS quota management subsystem for this mount */ - error = XFS_QM_INIT(mp, "amount, "aflags); - if (error) - goto out_rtunmount; + if (XFS_IS_QUOTA_RUNNING(mp)) { + error = xfs_qm_newmount(mp, "amount, "aflags); + if (error) + goto out_rtunmount; + } else { + ASSERT(!XFS_IS_QUOTA_ON(mp)); + + /* + * If a file system had quotas running earlier, but decided to + * mount without -o uquota/pquota/gquota options, revoke the + * quotachecked license. + */ + if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { + cmn_err(CE_NOTE, + "XFS: resetting qflags for filesystem %s", + mp->m_fsname); + + error = xfs_mount_reset_sbqflags(mp); + if (error) + return error; + } + } /* * Finish recovering the file system. This part needed to be @@ -1228,9 +1295,19 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - error = XFS_QM_MOUNT(mp, quotamount, quotaflags); - if (error) - goto out_rtunmount; + if (quotamount) { + ASSERT(mp->m_qflags == 0); + mp->m_qflags = quotaflags; + + xfs_qm_mount_quotas(mp); + } + +#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) + if (XFS_IS_QUOTA_ON(mp)) + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on"); + else + xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on"); +#endif /* * Now we are mounted, reserve a small amount of unused space for @@ -1279,12 +1356,7 @@ xfs_unmountfs( __uint64_t resblks; int error; - /* - * Release dquot that rootinode, rbmino and rsumino might be holding, - * and release the quota inodes. - */ - XFS_QM_UNMOUNT(mp); - + xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); IRELE(mp->m_rootip); @@ -1299,12 +1371,9 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC); - - XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); + xfs_qm_unmount(mp); /* * Flush out the log synchronously so that we know for sure diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d6a64392f983..a5122382afde 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -64,6 +64,8 @@ struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; +struct xfs_quotainfo; + /* * Prototypes and functions for the Data Migration subsystem. @@ -107,86 +109,6 @@ typedef struct xfs_dmops { (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) -/* - * Prototypes and functions for the Quota Management subsystem. - */ - -struct xfs_dquot; -struct xfs_dqtrxops; -struct xfs_quotainfo; - -typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); -typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); -typedef void (*xfs_qmunmount_t)(struct xfs_mount *); -typedef void (*xfs_qmdone_t)(struct xfs_mount *); -typedef void (*xfs_dqrele_t)(struct xfs_dquot *); -typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); -typedef void (*xfs_dqdetach_t)(struct xfs_inode *); -typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint); -typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *, - struct xfs_inode *, uid_t, gid_t, prid_t, uint, - struct xfs_dquot **, struct xfs_dquot **); -typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *); -typedef int (*xfs_dqvoprename_t)(struct xfs_inode **); -typedef struct xfs_dquot * (*xfs_dqvopchown_t)( - struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot **, struct xfs_dquot *); -typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *, uint); -typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); -typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); - -typedef struct xfs_qmops { - xfs_qminit_t xfs_qminit; - xfs_qmdone_t xfs_qmdone; - xfs_qmmount_t xfs_qmmount; - xfs_qmunmount_t xfs_qmunmount; - xfs_dqrele_t xfs_dqrele; - xfs_dqattach_t xfs_dqattach; - xfs_dqdetach_t xfs_dqdetach; - xfs_dqpurgeall_t xfs_dqpurgeall; - xfs_dqvopalloc_t xfs_dqvopalloc; - xfs_dqvopcreate_t xfs_dqvopcreate; - xfs_dqvoprename_t xfs_dqvoprename; - xfs_dqvopchown_t xfs_dqvopchown; - xfs_dqvopchownresv_t xfs_dqvopchownresv; - xfs_dqstatvfs_t xfs_dqstatvfs; - xfs_dqsync_t xfs_dqsync; - struct xfs_dqtrxops *xfs_dqtrxops; -} xfs_qmops_t; - -#define XFS_QM_INIT(mp, mnt, fl) \ - (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) -#define XFS_QM_MOUNT(mp, mnt, fl) \ - (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl) -#define XFS_QM_UNMOUNT(mp) \ - (*(mp)->m_qm_ops->xfs_qmunmount)(mp) -#define XFS_QM_DONE(mp) \ - (*(mp)->m_qm_ops->xfs_qmdone)(mp) -#define XFS_QM_DQRELE(mp, dq) \ - (*(mp)->m_qm_ops->xfs_dqrele)(dq) -#define XFS_QM_DQATTACH(mp, ip, fl) \ - (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl) -#define XFS_QM_DQDETACH(mp, ip) \ - (*(mp)->m_qm_ops->xfs_dqdetach)(ip) -#define XFS_QM_DQPURGEALL(mp, fl) \ - (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl) -#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \ - (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2) -#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \ - (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2) -#define XFS_QM_DQVOPRENAME(mp, ip) \ - (*(mp)->m_qm_ops->xfs_dqvoprename)(ip) -#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \ - (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq) -#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \ - (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl) -#define XFS_QM_DQSTATVFS(ip, statp) \ - (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp) -#define XFS_QM_DQSYNC(mp, flags) \ - (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags) - #ifdef HAVE_PERCPU_SB /* @@ -510,8 +432,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *); extern void xfs_dmops_put(struct xfs_mount *); -extern int xfs_qmops_get(struct xfs_mount *); -extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c deleted file mode 100644 index e101790ea8e7..000000000000 --- a/fs/xfs/xfs_qmops.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_error.h" - - -STATIC struct xfs_dquot * -xfs_dqvopchown_default( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_dquot **dqp, - struct xfs_dquot *dq) -{ - return NULL; -} - -/* - * Clear the quotaflags in memory and in the superblock. - */ -int -xfs_mount_reset_sbqflags(xfs_mount_t *mp) -{ - int error; - xfs_trans_t *tp; - - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without m_sb_lock. - */ - if (mp->m_sb.sb_qflags == 0) - return 0; - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = 0; - spin_unlock(&mp->m_sb_lock); - - /* - * if the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); - return error; - } - xfs_mod_sb(tp, XFS_SB_QFLAGS); - error = xfs_trans_commit(tp, 0); - return error; -} - -STATIC int -xfs_noquota_init( - xfs_mount_t *mp, - uint *needquotamount, - uint *quotaflags) -{ - int error = 0; - - *quotaflags = 0; - *needquotamount = B_FALSE; - - ASSERT(!XFS_IS_QUOTA_ON(mp)); - - /* - * If a file system had quotas running earlier, but decided to - * mount without -o uquota/pquota/gquota options, revoke the - * quotachecked license. - */ - if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { - cmn_err(CE_NOTE, - "XFS resetting qflags for filesystem %s", - mp->m_fsname); - - error = xfs_mount_reset_sbqflags(mp); - } - return error; -} - -static struct xfs_qmops xfs_qmcore_stub = { - .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, - .xfs_qmdone = (xfs_qmdone_t) fs_noerr, - .xfs_qmmount = (xfs_qmmount_t) fs_noerr, - .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, - .xfs_dqrele = (xfs_dqrele_t) fs_noerr, - .xfs_dqattach = (xfs_dqattach_t) fs_noerr, - .xfs_dqdetach = (xfs_dqdetach_t) fs_noerr, - .xfs_dqpurgeall = (xfs_dqpurgeall_t) fs_noerr, - .xfs_dqvopalloc = (xfs_dqvopalloc_t) fs_noerr, - .xfs_dqvopcreate = (xfs_dqvopcreate_t) fs_noerr, - .xfs_dqvoprename = (xfs_dqvoprename_t) fs_noerr, - .xfs_dqvopchown = xfs_dqvopchown_default, - .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr, - .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval, - .xfs_dqsync = (xfs_dqsync_t) fs_noerr, -}; - -int -xfs_qmops_get(struct xfs_mount *mp) -{ - if (XFS_IS_QUOTA_RUNNING(mp)) { -#ifdef CONFIG_XFS_QUOTA - mp->m_qm_ops = &xfs_qmcore_xfs; -#else - cmn_err(CE_WARN, - "XFS: qouta support not available in this kernel."); - return EINVAL; -#endif - } else { - mp->m_qm_ops = &xfs_qmcore_stub; - } - - return 0; -} - -void -xfs_qmops_put(struct xfs_mount *mp) -{ -} diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index f5d1202dde25..3ec91ac74c2a 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ #define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ -#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ @@ -302,69 +301,79 @@ typedef struct xfs_dqtrx { long qt_delrtb_delta; /* delayed RT blk count changes */ } xfs_dqtrx_t; -/* - * Dquot transaction functions, used if quota is enabled. - */ -typedef void (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *); -typedef void (*qo_mod_dquot_byino_t)(struct xfs_trans *, - struct xfs_inode *, uint, long); -typedef void (*qo_free_dqinfo_t)(struct xfs_trans *); -typedef void (*qo_apply_dquot_deltas_t)(struct xfs_trans *); -typedef void (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *); -typedef int (*qo_reserve_quota_nblks_t)( - struct xfs_trans *, struct xfs_mount *, - struct xfs_inode *, long, long, uint); -typedef int (*qo_reserve_quota_bydquots_t)( - struct xfs_trans *, struct xfs_mount *, - struct xfs_dquot *, struct xfs_dquot *, - long, long, uint); -typedef struct xfs_dqtrxops { - qo_dup_dqinfo_t qo_dup_dqinfo; - qo_free_dqinfo_t qo_free_dqinfo; - qo_mod_dquot_byino_t qo_mod_dquot_byino; - qo_apply_dquot_deltas_t qo_apply_dquot_deltas; - qo_reserve_quota_nblks_t qo_reserve_quota_nblks; - qo_reserve_quota_bydquots_t qo_reserve_quota_bydquots; - qo_unreserve_and_mod_dquots_t qo_unreserve_and_mod_dquots; -} xfs_dqtrxops_t; - -#define XFS_DQTRXOP(mp, tp, op, args...) \ - ((mp)->m_qm_ops->xfs_dqtrxops ? \ - ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0) - -#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \ - ((mp)->m_qm_ops->xfs_dqtrxops ? \ - ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0) - -#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \ - XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp) -#define XFS_TRANS_FREE_DQINFO(mp, tp) \ - XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo) -#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \ - XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta) -#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \ - XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas) -#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \ - XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl) -#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \ - XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl) -#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \ - XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots) - -#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags) -#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ - XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \ - f | XFS_QMOPT_RES_REGBLKS) -#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ - XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \ +#ifdef CONFIG_XFS_QUOTA +extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); +extern void xfs_trans_free_dqinfo(struct xfs_trans *); +extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, + uint, long); +extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); +extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); +extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, + struct xfs_inode *, long, long, uint); +extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, + struct xfs_mount *, struct xfs_dquot *, + struct xfs_dquot *, long, long, uint); + +extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, + struct xfs_dquot **, struct xfs_dquot **); +extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot *, struct xfs_dquot *); +extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); +extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, + struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); +extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, + struct xfs_dquot *, struct xfs_dquot *, uint); +extern int xfs_qm_dqattach(struct xfs_inode *, uint); +extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); +extern void xfs_qm_dqdetach(struct xfs_inode *); +extern void xfs_qm_dqrele(struct xfs_dquot *); +extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); +extern int xfs_qm_sync(struct xfs_mount *, int); +extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); +extern void xfs_qm_mount_quotas(struct xfs_mount *); +extern void xfs_qm_unmount(struct xfs_mount *); +extern void xfs_qm_unmount_quotas(struct xfs_mount *); + +#else +static inline int +xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, + uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) +{ + *udqp = NULL; + *gdqp = NULL; + return 0; +} +#define xfs_trans_dup_dqinfo(tp, tp2) +#define xfs_trans_free_dqinfo(tp) +#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) +#define xfs_trans_apply_dquot_deltas(tp) +#define xfs_trans_unreserve_and_mod_dquots(tp) +#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) +#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) +#define xfs_qm_vop_create_dqattach(tp, ip, u, g) +#define xfs_qm_vop_rename_dqattach(it) (0) +#define xfs_qm_vop_chown(tp, ip, old, new) (NULL) +#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) +#define xfs_qm_dqattach(ip, fl) (0) +#define xfs_qm_dqattach_locked(ip, fl) (0) +#define xfs_qm_dqdetach(ip) +#define xfs_qm_dqrele(d) +#define xfs_qm_statvfs(ip, s) +#define xfs_qm_sync(mp, fl) (0) +#define xfs_qm_newmount(mp, a, b) (0) +#define xfs_qm_mount_quotas(mp) +#define xfs_qm_unmount(mp) +#define xfs_qm_unmount_quotas(mp) (0) +#endif /* CONFIG_XFS_QUOTA */ + +#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ + xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) +#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ + xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); -extern struct xfs_qmops xfs_qmcore_xfs; - #endif /* __KERNEL__ */ - #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 58f85e9cd11d..b81deea0ce19 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -166,7 +166,8 @@ xfs_rename( /* * Attach the dquots to the inodes */ - if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { + error = xfs_qm_vop_rename_dqattach(inodes); + if (error) { xfs_trans_cancel(tp, cancel_flags); goto std_return; } diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 36f3a21c54d2..fea68615ed23 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -41,7 +41,6 @@ #include "xfs_ialloc.h" #include "xfs_attr.h" #include "xfs_bmap.h" -#include "xfs_acl.h" #include "xfs_error.h" #include "xfs_buf_item.h" #include "xfs_rw.h" diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8570b826fedd..66b849358e62 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -297,7 +297,7 @@ xfs_trans_dup( tp->t_rtx_res = tp->t_rtx_res_used; ntp->t_pflags = tp->t_pflags; - XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp); + xfs_trans_dup_dqinfo(tp, ntp); atomic_inc(&tp->t_mountp->m_active_trans); return ntp; @@ -628,8 +628,6 @@ xfs_trans_apply_sb_deltas( xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), offsetof(xfs_dsb_t, sb_frextents) + sizeof(sbp->sb_frextents) - 1); - - tp->t_mountp->m_super->s_dirt = 1; } /* @@ -831,7 +829,7 @@ shut_us_down: * means is that we have some (non-persistent) quota * reservations that need to be unreserved. */ - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); + xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); @@ -850,10 +848,9 @@ shut_us_down: /* * If we need to update the superblock, then do it now. */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) { + if (tp->t_flags & XFS_TRANS_SB_DIRTY) xfs_trans_apply_sb_deltas(tp); - } - XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp); + xfs_trans_apply_dquot_deltas(tp); /* * Ask each log item how many log_vector entries it will @@ -1058,7 +1055,7 @@ xfs_trans_uncommit( } xfs_trans_unreserve_and_mod_sb(tp); - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp); + xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_free_items(tp, flags); xfs_trans_free_busy(tp); @@ -1183,7 +1180,7 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp); + xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { if (flags & XFS_TRANS_RELEASE_LOG_RES) { @@ -1213,7 +1210,7 @@ xfs_trans_free( xfs_trans_t *tp) { atomic_dec(&tp->t_mountp->m_active_trans); - XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); + xfs_trans_free_dqinfo(tp); kmem_zone_free(xfs_trans_zone, tp); } diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 79b9e5ea5359..4d88616bde91 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -166,7 +166,7 @@ xfs_dir_ialloc( xfs_buf_relse(ialloc_context); if (dqinfo) { tp->t_dqinfo = dqinfo; - XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); + xfs_trans_free_dqinfo(tp); } *tpp = ntp; *ipp = NULL; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 19cf90a9c762..c4eca5ed5dab 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -42,6 +42,7 @@ #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" +#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_rw.h" #include "xfs_error.h" @@ -118,7 +119,7 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid, + code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, qflags, &udqp, &gdqp); if (code) return code; @@ -180,10 +181,11 @@ xfs_setattr( * Do a quota reservation only if uid/gid is actually * going to change. */ - if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { ASSERT(tp); - code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ @@ -217,7 +219,7 @@ xfs_setattr( /* * Make sure that the dquots are attached to the inode. */ - code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + code = xfs_qm_dqattach_locked(ip, 0); if (code) goto error_return; @@ -351,21 +353,21 @@ xfs_setattr( * in the transaction. */ if (iuid != uid) { - if (XFS_IS_UQUOTA_ON(mp)) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); - olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = uid; inode->i_uid = uid; } if (igid != gid) { - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); - olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = gid; @@ -461,13 +463,25 @@ xfs_setattr( /* * Release any dquot(s) the inode had kept before chown. */ - XFS_QM_DQRELE(mp, olddquot1); - XFS_QM_DQRELE(mp, olddquot2); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); - if (code) { + if (code) return code; + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + code = -xfs_acl_chmod(inode); + if (code) + return XFS_ERROR(code); } if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && @@ -482,8 +496,8 @@ xfs_setattr( commit_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (tp) { xfs_trans_cancel(tp, commit_flags); } @@ -739,7 +753,8 @@ xfs_free_eofblocks( /* * Attach the dquots to the inode up front. */ - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return error; /* @@ -1181,7 +1196,8 @@ xfs_inactive( ASSERT(ip->i_d.di_nlink == 0); - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return VN_INACTIVE_CACHE; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); @@ -1307,7 +1323,7 @@ xfs_inactive( /* * Credit the quota account(s). The inode is gone. */ - XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); /* * Just ignore errors at this point. There is nothing we can @@ -1323,11 +1339,11 @@ xfs_inactive( xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " "xfs_trans_commit() returned error %d", error); } + /* * Release the dquots held by inode, if any. */ - XFS_QM_DQDETACH(mp, ip); - + xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); out: @@ -1427,8 +1443,7 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -1489,7 +1504,7 @@ xfs_create( /* * Reserve disk quota and the inode. */ - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); if (error) goto out_trans_cancel; @@ -1561,7 +1576,7 @@ xfs_create( * These ids of the inode couldn't have changed since the new * inode has been locked ever since it was created. */ - XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); /* * xfs_trans_commit normally decrements the vnode ref count @@ -1580,8 +1595,8 @@ xfs_create( goto out_dqrele; } - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); *ipp = ip; @@ -1602,8 +1617,8 @@ xfs_create( out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); out_dqrele: - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -1837,11 +1852,11 @@ xfs_remove( return error; } - error = XFS_QM_DQATTACH(mp, dp, 0); + error = xfs_qm_dqattach(dp, 0); if (error) goto std_return; - error = XFS_QM_DQATTACH(mp, ip, 0); + error = xfs_qm_dqattach(ip, 0); if (error) goto std_return; @@ -2028,11 +2043,11 @@ xfs_link( /* Return through std_return after this point. */ - error = XFS_QM_DQATTACH(mp, sip, 0); + error = xfs_qm_dqattach(sip, 0); if (error) goto std_return; - error = XFS_QM_DQATTACH(mp, tdp, 0); + error = xfs_qm_dqattach(tdp, 0); if (error) goto std_return; @@ -2205,8 +2220,7 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2248,7 +2262,7 @@ xfs_symlink( /* * Reserve disk quota : blocks and inode. */ - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); if (error) goto error_return; @@ -2288,7 +2302,7 @@ xfs_symlink( /* * Also attach the dquot(s) to it, if applicable. */ - XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); if (resblks) resblks -= XFS_IALLOC_SPACE_RES(mp); @@ -2376,8 +2390,8 @@ xfs_symlink( goto error2; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ @@ -2401,8 +2415,8 @@ std_return: cancel_flags |= XFS_TRANS_ABORT; error_return: xfs_trans_cancel(tp, cancel_flags); - XFS_QM_DQRELE(mp, udqp); - XFS_QM_DQRELE(mp, gdqp); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -2541,7 +2555,8 @@ xfs_alloc_file_space( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return error; if (len <= 0) @@ -2628,8 +2643,8 @@ retry: break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, - qblocks, 0, quota_flag); + error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, + 0, quota_flag); if (error) goto error1; @@ -2688,7 +2703,7 @@ dmapi_enospc_check: error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); + xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); @@ -2827,7 +2842,8 @@ xfs_free_file_space( xfs_itrace_entry(ip); - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) + error = xfs_qm_dqattach(ip, 0); + if (error) return error; error = 0; @@ -2953,9 +2969,9 @@ xfs_free_file_space( break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, - XFS_QMOPT_RES_REGBLKS); + error = xfs_trans_reserve_quota(tp, mp, + ip->i_udquot, ip->i_gdquot, + resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 04373c6c61ff..a9e102de71a1 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ +#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_fsync(struct xfs_inode *ip); |