diff options
Diffstat (limited to 'fs')
176 files changed, 2922 insertions, 2319 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile index ff7be98f84f2..9619ccadd2fc 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -10,10 +10,7 @@ obj-$(CONFIG_9P_FS) := 9p.o vfs_dentry.o \ v9fs.o \ fid.o \ - xattr.o \ - xattr_user.o \ - xattr_trusted.o + xattr.o 9p-$(CONFIG_9P_FSCACHE) += cache.o 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o -9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 31c010372660..a7e28890f5ef 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -212,26 +212,9 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep, return 0; } -static int v9fs_remote_get_acl(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - char *full_name; - - switch (type) { - case ACL_TYPE_ACCESS: - full_name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - full_name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - BUG(); - } - return v9fs_xattr_get(dentry, full_name, buffer, size); -} - -static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int v9fs_xattr_get_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { struct v9fs_session_info *v9ses; struct posix_acl *acl; @@ -245,9 +228,9 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, * We allow set/get/list of acl when access=client is not specified */ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) - return v9fs_remote_get_acl(dentry, name, buffer, size, type); + return v9fs_xattr_get(dentry, handler->prefix, buffer, size); - acl = v9fs_get_cached_acl(d_inode(dentry), type); + acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -258,29 +241,9 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, return error; } -static int v9fs_remote_set_acl(struct dentry *dentry, const char *name, - const void *value, size_t size, - int flags, int type) -{ - char *full_name; - - switch (type) { - case ACL_TYPE_ACCESS: - full_name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - full_name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - BUG(); - } - return v9fs_xattr_set(dentry, full_name, value, size, flags); -} - - -static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, - const void *value, size_t size, - int flags, int type) +static int v9fs_xattr_set_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { int retval; struct posix_acl *acl; @@ -296,8 +259,8 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, * xattr value. We leave it to the server to validate */ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) - return v9fs_remote_set_acl(dentry, name, - value, size, flags, type); + return v9fs_xattr_set(dentry, handler->prefix, value, size, + flags); if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -316,9 +279,8 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, } else acl = NULL; - switch (type) { + switch (handler->flags) { case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; if (acl) { umode_t mode = inode->i_mode; retval = posix_acl_equiv_mode(acl, &mode); @@ -349,7 +311,6 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, } break; case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; if (!S_ISDIR(inode->i_mode)) { retval = acl ? -EINVAL : 0; goto err_out; @@ -358,9 +319,9 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, default: BUG(); } - retval = v9fs_xattr_set(dentry, name, value, size, flags); + retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags); if (!retval) - set_cached_acl(inode, type, acl); + set_cached_acl(inode, handler->flags, acl); err_out: posix_acl_release(acl); return retval; diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 2f9675491095..247e47e54bcc 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -21,6 +21,7 @@ */ #ifndef _9P_CACHE_H +#define _9P_CACHE_H #ifdef CONFIG_9P_FSCACHE #include <linux/fscache.h> #include <linux/spinlock.h> diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 0cf44b6cccd6..e3d026ac382e 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -137,6 +137,48 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) return v9fs_xattr_get(dentry, NULL, buffer, buffer_size); } +static int v9fs_xattr_handler_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + const char *full_name = xattr_full_name(handler, name); + + if (strcmp(name, "") == 0) + return -EINVAL; + return v9fs_xattr_get(dentry, full_name, buffer, size); +} + +static int v9fs_xattr_handler_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + const char *full_name = xattr_full_name(handler, name); + + if (strcmp(name, "") == 0) + return -EINVAL; + return v9fs_xattr_set(dentry, full_name, value, size, flags); +} + +static struct xattr_handler v9fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = v9fs_xattr_handler_get, + .set = v9fs_xattr_handler_set, +}; + +static struct xattr_handler v9fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = v9fs_xattr_handler_get, + .set = v9fs_xattr_handler_set, +}; + +#ifdef CONFIG_9P_FS_SECURITY +static struct xattr_handler v9fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = v9fs_xattr_handler_get, + .set = v9fs_xattr_handler_set, +}; +#endif + const struct xattr_handler *v9fs_xattr_handlers[] = { &v9fs_xattr_user_handler, &v9fs_xattr_trusted_handler, diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index d3e2ea3840be..c63c3bea5de5 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h @@ -19,9 +19,6 @@ #include <net/9p/client.h> extern const struct xattr_handler *v9fs_xattr_handlers[]; -extern struct xattr_handler v9fs_xattr_user_handler; -extern struct xattr_handler v9fs_xattr_trusted_handler; -extern struct xattr_handler v9fs_xattr_security_handler; extern const struct xattr_handler v9fs_xattr_acl_access_handler; extern const struct xattr_handler v9fs_xattr_acl_default_handler; diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c deleted file mode 100644 index cb247a142a6e..000000000000 --- a/fs/9p/xattr_security.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corporation, 2010 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include "xattr.h" - -static int v9fs_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(full_name+prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_get(dentry, full_name, buffer, size); - kfree(full_name); - return retval; -} - -static int v9fs_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(full_name + prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_set(dentry, full_name, value, size, flags); - kfree(full_name); - return retval; -} - -struct xattr_handler v9fs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .get = v9fs_xattr_security_get, - .set = v9fs_xattr_security_set, -}; diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c deleted file mode 100644 index e30d33b8a3fb..000000000000 --- a/fs/9p/xattr_trusted.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corporation, 2010 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include "xattr.h" - -static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(full_name+prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_get(dentry, full_name, buffer, size); - kfree(full_name); - return retval; -} - -static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(full_name + prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_set(dentry, full_name, value, size, flags); - kfree(full_name); - return retval; -} - -struct xattr_handler v9fs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .get = v9fs_xattr_trusted_get, - .set = v9fs_xattr_trusted_set, -}; diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c deleted file mode 100644 index d0b701b72080..000000000000 --- a/fs/9p/xattr_user.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corporation, 2010 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include "xattr.h" - -static int v9fs_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_USER_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_USER_PREFIX, prefix_len); - memcpy(full_name+prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_get(dentry, full_name, buffer, size); - kfree(full_name); - return retval; -} - -static int v9fs_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_USER_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_USER_PREFIX, prefix_len); - memcpy(full_name + prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_set(dentry, full_name, value, size, flags); - kfree(full_name); - return retval; -} - -struct xattr_handler v9fs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .get = v9fs_xattr_user_get, - .set = v9fs_xattr_user_set, -}; diff --git a/fs/Kconfig b/fs/Kconfig index da3f32f1a4e4..6ce72d8d1ee1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -46,6 +46,12 @@ config FS_DAX or if unsure, say N. Saying Y will increase the size of the kernel by about 5kB. +config FS_DAX_PMD + bool + default FS_DAX + depends on FS_DAX + depends on BROKEN + endif # BLOCK # Posix ACL utility routines diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 5f399ea1d20a..3a93755e880f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -488,7 +488,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr, } /** - * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header + * arch_check_elf() - check an ELF executable * @ehdr: The main ELF header * @has_interp: True if the ELF has an interpreter, else false. * @state: Architecture-specific state preserved throughout the process @@ -760,16 +760,16 @@ static int load_elf_binary(struct linux_binprm *bprm) */ would_dump(bprm, interpreter); - retval = kernel_read(interpreter, 0, bprm->buf, - BINPRM_BUF_SIZE); - if (retval != BINPRM_BUF_SIZE) { + /* Get the exec headers */ + retval = kernel_read(interpreter, 0, + (void *)&loc->interp_elf_ex, + sizeof(loc->interp_elf_ex)); + if (retval != sizeof(loc->interp_elf_ex)) { if (retval >= 0) retval = -EIO; goto out_free_dentry; } - /* Get the exec headers */ - loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } elf_ppnt++; diff --git a/fs/block_dev.c b/fs/block_dev.c index 0a793c7930eb..c25639e907bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,12 +50,21 @@ struct block_device *I_BDEV(struct inode *inode) } EXPORT_SYMBOL(I_BDEV); -static void bdev_write_inode(struct inode *inode) +static void bdev_write_inode(struct block_device *bdev) { + struct inode *inode = bdev->bd_inode; + int ret; + spin_lock(&inode->i_lock); while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); - WARN_ON_ONCE(write_inode_now(inode, true)); + ret = write_inode_now(inode, true); + if (ret) { + char name[BDEVNAME_SIZE]; + pr_warn_ratelimited("VFS: Dirty inode writeback failed " + "for block device %s (err=%d).\n", + bdevname(bdev, name), ret); + } spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); @@ -381,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, struct page *page) { const struct block_device_operations *ops = bdev->bd_disk->fops; + int result = -EOPNOTSUPP; + if (!ops->rw_page || bdev_get_integrity(bdev)) - return -EOPNOTSUPP; - return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + return result; + + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + blk_queue_exit(bdev->bd_queue); + return result; } EXPORT_SYMBOL_GPL(bdev_read_page); @@ -412,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, int result; int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); if (result) end_page_writeback(page); else unlock_page(page); + blk_queue_exit(bdev->bd_queue); return result; } EXPORT_SYMBOL_GPL(bdev_write_page); @@ -1504,7 +1527,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) * ->release can cause the queue to disappear, so flush all * dirty data before. */ - bdev_write_inode(bdev->bd_inode); + bdev_write_inode(bdev); } if (bdev->bd_contains == bdev) { if (disk->fops->release) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 640598c0d0e7..974be09e7556 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the qgroup rescan worker to stop */ + btrfs_qgroup_wait_for_completion(fs_info); + /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al., set sem back to initial state */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99a8e57da8a1..acf3ed11cfb6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10279,22 +10279,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) block_group = list_first_entry(&fs_info->unused_bgs, struct btrfs_block_group_cache, bg_list); - space_info = block_group->space_info; list_del_init(&block_group->bg_list); + + space_info = block_group->space_info; + if (ret || btrfs_mixed_space_info(space_info)) { btrfs_put_block_group(block_group); continue; } spin_unlock(&fs_info->unused_bgs_lock); - mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + mutex_lock(&fs_info->delete_unused_bgs_mutex); /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); if (block_group->reserved || btrfs_block_group_used(&block_group->item) || - block_group->ro) { + block_group->ro || + list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do @@ -10410,7 +10413,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: - mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6bd5ce9d75f0..977e715f0bf2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ next_slot: } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ next_slot: btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0e4f2bfcc37d..994490d5fa64 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1304,8 +1304,14 @@ next_slot: num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; @@ -7503,6 +7509,28 @@ struct btrfs_dio_data { u64 reserve; }; +static void adjust_dio_outstanding_extents(struct inode *inode, + struct btrfs_dio_data *dio_data, + const u64 len) +{ + unsigned num_extents; + + num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (dio_data->outstanding_extents) { + dio_data->outstanding_extents -= num_extents; + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents += num_extents; + spin_unlock(&BTRFS_I(inode)->lock); + } +} + static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -7538,8 +7566,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. */ - if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) - return -ENOTBLK; + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, + create)) { + ret = -ENOTBLK; + goto err; + } em = btrfs_get_extent(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { @@ -7657,19 +7688,7 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - /* - * If we have an outstanding_extents count still set then we're - * within our reservation, otherwise we need to adjust our inode - * counter appropriately. - */ - if (dio_data->outstanding_extents) { - (dio_data->outstanding_extents)--; - } else { - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->lock); - } - + adjust_dio_outstanding_extents(inode, dio_data, len); btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; @@ -7696,8 +7715,17 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); +err: if (dio_data) current->journal_info = dio_data; + /* + * Compensate the delalloc release we do in btrfs_direct_IO() when we + * write less data then expected, so that we don't underflow our inode's + * outstanding extents counter. + */ + if (create && dio_data) + adjust_dio_outstanding_extents(inode, dio_data, len); + return ret; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 46476c226395..93e12c18ffd7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2198,7 +2198,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, int slot; int ret; - path->leave_spinning = 1; mutex_lock(&fs_info->qgroup_rescan_lock); ret = btrfs_search_slot_for_read(fs_info->extent_root, &fs_info->qgroup_rescan_progress, @@ -2286,7 +2285,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) goto out; err = 0; - while (!err) { + while (!err && !btrfs_fs_closing(fs_info)) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2307,7 +2306,8 @@ out: btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { @@ -2336,7 +2336,9 @@ out: } btrfs_end_transaction(trans, fs_info->quota_root); - if (err >= 0) { + if (btrfs_fs_closing(fs_info)) { + btrfs_info(fs_info, "qgroup scan paused"); + } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", err > 0 ? " (inconsistency flag cleared)" : ""); } else { @@ -2384,12 +2386,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_progress, 0, sizeof(fs_info->qgroup_rescan_progress)); fs_info->qgroup_rescan_progress.objectid = progress_objectid; + init_completion(&fs_info->qgroup_rescan_completion); spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); - init_completion(&fs_info->qgroup_rescan_completion); - memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 550de89a8661..2907a77fb1f6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -248,14 +248,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct scrub_block *sblocks_for_recheck); static void scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size, int retry_failed_mirror); -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size); + struct scrub_block *sblock, + int retry_failed_mirror); +static void scrub_recheck_block_checksum(struct scrub_block *sblock); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good); static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, @@ -889,11 +884,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) struct btrfs_fs_info *fs_info; u64 length; u64 logical; - u64 generation; unsigned int failed_mirror_index; unsigned int is_metadata; unsigned int have_csum; - u8 *csum; struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */ struct scrub_block *sblock_bad; int ret; @@ -918,13 +911,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) } length = sblock_to_check->page_count * PAGE_SIZE; logical = sblock_to_check->pagev[0]->logical; - generation = sblock_to_check->pagev[0]->generation; BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; is_metadata = !(sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); have_csum = sblock_to_check->pagev[0]->have_csum; - csum = sblock_to_check->pagev[0]->csum; dev = sblock_to_check->pagev[0]->dev; if (sctx->is_dev_replace && !is_metadata && !have_csum) { @@ -987,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock_bad = sblocks_for_recheck + failed_mirror_index; /* build and submit the bios for the failed mirror, check checksums */ - scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, - csum, generation, sctx->csum_size, 1); + scrub_recheck_block(fs_info, sblock_bad, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) { @@ -1101,9 +1091,7 @@ nodatasum_case: sblock_other = sblocks_for_recheck + mirror_index; /* build and submit the bios, check checksums */ - scrub_recheck_block(fs_info, sblock_other, is_metadata, - have_csum, csum, generation, - sctx->csum_size, 0); + scrub_recheck_block(fs_info, sblock_other, 0); if (!sblock_other->header_error && !sblock_other->checksum_error && @@ -1215,9 +1203,7 @@ nodatasum_case: * is verified, but most likely the data comes out * of the page cache. */ - scrub_recheck_block(fs_info, sblock_bad, - is_metadata, have_csum, csum, - generation, sctx->csum_size, 1); + scrub_recheck_block(fs_info, sblock_bad, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) @@ -1318,6 +1304,9 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; u64 length = original_sblock->page_count * PAGE_SIZE; u64 logical = original_sblock->pagev[0]->logical; + u64 generation = original_sblock->pagev[0]->generation; + u64 flags = original_sblock->pagev[0]->flags; + u64 have_csum = original_sblock->pagev[0]->have_csum; struct scrub_recover *recover; struct btrfs_bio *bbio; u64 sublen; @@ -1372,6 +1361,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, sblock = sblocks_for_recheck + mirror_index; sblock->sctx = sctx; + page = kzalloc(sizeof(*page), GFP_NOFS); if (!page) { leave_nomem: @@ -1383,7 +1373,15 @@ leave_nomem: } scrub_page_get(page); sblock->pagev[page_index] = page; + page->sblock = sblock; + page->flags = flags; + page->generation = generation; page->logical = logical; + page->have_csum = have_csum; + if (have_csum) + memcpy(page->csum, + original_sblock->pagev[0]->csum, + sctx->csum_size); scrub_stripe_index_and_offset(logical, bbio->map_type, @@ -1474,15 +1472,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, * the pages that are errored in the just handled mirror can be repaired. */ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size, int retry_failed_mirror) + struct scrub_block *sblock, + int retry_failed_mirror) { int page_num; sblock->no_io_error_seen = 1; - sblock->header_error = 0; - sblock->checksum_error = 0; for (page_num = 0; page_num < sblock->page_count; page_num++) { struct bio *bio; @@ -1518,9 +1513,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } if (sblock->no_io_error_seen) - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - csum_size); + scrub_recheck_block_checksum(sblock); return; } @@ -1535,61 +1528,16 @@ static inline int scrub_check_fsid(u8 fsid[], return !ret; } -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size) +static void scrub_recheck_block_checksum(struct scrub_block *sblock) { - int page_num; - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u32 crc = ~(u32)0; - void *mapped_buffer; - - WARN_ON(!sblock->pagev[0]->page); - if (is_metadata) { - struct btrfs_header *h; - - mapped_buffer = kmap_atomic(sblock->pagev[0]->page); - h = (struct btrfs_header *)mapped_buffer; - - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || - !scrub_check_fsid(h->fsid, sblock->pagev[0]) || - memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, - BTRFS_UUID_SIZE)) { - sblock->header_error = 1; - } else if (generation != btrfs_stack_header_generation(h)) { - sblock->header_error = 1; - sblock->generation_error = 1; - } - csum = h->csum; - } else { - if (!have_csum) - return; - - mapped_buffer = kmap_atomic(sblock->pagev[0]->page); - } - - for (page_num = 0;;) { - if (page_num == 0 && is_metadata) - crc = btrfs_csum_data( - ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE, - crc, PAGE_SIZE - BTRFS_CSUM_SIZE); - else - crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE); - - kunmap_atomic(mapped_buffer); - page_num++; - if (page_num >= sblock->page_count) - break; - WARN_ON(!sblock->pagev[page_num]->page); - - mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page); - } + sblock->header_error = 0; + sblock->checksum_error = 0; + sblock->generation_error = 0; - btrfs_csum_final(crc, calculated_csum); - if (memcmp(calculated_csum, csum, csum_size)) - sblock->checksum_error = 1; + if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA) + scrub_checksum_data(sblock); + else + scrub_checksum_tree_block(sblock); } static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, @@ -1833,6 +1781,18 @@ static int scrub_checksum(struct scrub_block *sblock) u64 flags; int ret; + /* + * No need to initialize these stats currently, + * because this function only use return value + * instead of these stats value. + * + * Todo: + * always use stats + */ + sblock->header_error = 0; + sblock->generation_error = 0; + sblock->checksum_error = 0; + WARN_ON(sblock->page_count < 1); flags = sblock->pagev[0]->flags; ret = 0; @@ -1858,7 +1818,6 @@ static int scrub_checksum_data(struct scrub_block *sblock) struct page *page; void *buffer; u32 crc = ~(u32)0; - int fail = 0; u64 len; int index; @@ -1889,9 +1848,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) btrfs_csum_final(crc, csum); if (memcmp(csum, on_disk_csum, sctx->csum_size)) - fail = 1; + sblock->checksum_error = 1; - return fail; + return sblock->checksum_error; } static int scrub_checksum_tree_block(struct scrub_block *sblock) @@ -1907,8 +1866,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) u64 mapped_size; void *p; u32 crc = ~(u32)0; - int fail = 0; - int crc_fail = 0; u64 len; int index; @@ -1923,19 +1880,20 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) * a) don't have an extent buffer and * b) the page is already kmapped */ - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) - ++fail; + sblock->header_error = 1; - if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) - ++fail; + if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) { + sblock->header_error = 1; + sblock->generation_error = 1; + } if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) - ++fail; + sblock->header_error = 1; if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) - ++fail; + sblock->header_error = 1; len = sctx->nodesize - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; @@ -1960,9 +1918,9 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) btrfs_csum_final(crc, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) - ++crc_fail; + sblock->checksum_error = 1; - return fail || crc_fail; + return sblock->header_error || sblock->checksum_error; } static int scrub_checksum_super(struct scrub_block *sblock) @@ -2176,39 +2134,27 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) { struct scrub_block *sblock = container_of(work, struct scrub_block, work); struct scrub_ctx *sctx = sblock->sctx; - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - unsigned int is_metadata; - unsigned int have_csum; - u8 *csum; - u64 generation; u64 logical; struct btrfs_device *dev; - is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); - have_csum = sblock->pagev[0]->have_csum; - csum = sblock->pagev[0]->csum; - generation = sblock->pagev[0]->generation; logical = sblock->pagev[0]->logical; dev = sblock->pagev[0]->dev; - if (sblock->no_io_error_seen) { - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - sctx->csum_size); - } + if (sblock->no_io_error_seen) + scrub_recheck_block_checksum(sblock); if (!sblock->no_io_error_seen) { spin_lock(&sctx->stat_lock); sctx->stat.read_errors++; spin_unlock(&sctx->stat_lock); - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, "IO error rebuilding logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else if (sblock->header_error || sblock->checksum_error) { spin_lock(&sctx->stat_lock); sctx->stat.uncorrectable_errors++; spin_unlock(&sctx->stat_lock); - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, "failed to rebuild valid logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else { @@ -2500,8 +2446,7 @@ static void scrub_block_complete(struct scrub_block *sblock) } } -static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, - u8 *csum) +static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) { struct btrfs_ordered_sum *sum = NULL; unsigned long index; @@ -2565,7 +2510,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, if (flags & BTRFS_EXTENT_FLAG_DATA) { /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); + have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) ++sctx->stat.no_csum; if (sctx->is_dev_replace && !have_csum) { @@ -2703,7 +2648,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, if (flags & BTRFS_EXTENT_FLAG_DATA) { /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); + have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) goto skip; } @@ -3012,6 +2957,9 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, logic_start + map->stripe_len)) { btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", key.objectid, logic_start); + spin_lock(&sctx->stat_lock); + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); goto next; } again: @@ -3361,6 +3309,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, "scrub: tree block %llu spanning " "stripes, ignored. logical=%llu", key.objectid, logical); + spin_lock(&sctx->stat_lock); + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); goto next; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9b2dafa5ba59..a6df8fdc1312 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3400,6 +3400,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) u32 count_data = 0; u32 count_meta = 0; u32 count_sys = 0; + int chunk_reserved = 0; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -3501,6 +3502,7 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); + btrfs_release_path(path); if (!ret) { mutex_unlock(&fs_info->delete_unused_bgs_mutex); @@ -3537,6 +3539,25 @@ again: goto loop; } + if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) { + trans = btrfs_start_transaction(chunk_root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + ret = PTR_ERR(trans); + goto error; + } + + ret = btrfs_force_chunk_alloc(trans, chunk_root, + BTRFS_BLOCK_GROUP_DATA); + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + goto error; + } + + btrfs_end_transaction(trans, chunk_root); + chunk_reserved = 1; + } + ret = btrfs_relocate_chunk(chunk_root, found_key.offset); mutex_unlock(&fs_info->delete_unused_bgs_mutex); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e1c1..1fcd7b6e7564 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) diff --git a/fs/buffer.c b/fs/buffer.c index 51aff0296ce2..4f4cd959da7c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2420,9 +2420,9 @@ EXPORT_SYMBOL(block_commit_write); * unlock the page. * * Direct callers of this function should protect against filesystem freezing - * using sb_start_write() - sb_end_write() functions. + * using sb_start_pagefault() - sb_end_pagefault() functions. */ -int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { struct page *page = vmf->page; @@ -2459,26 +2459,6 @@ out_unlock: unlock_page(page); return ret; } -EXPORT_SYMBOL(__block_page_mkwrite); - -int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) -{ - int ret; - struct super_block *sb = file_inode(vma->vm_file)->i_sb; - - sb_start_pagefault(sb); - - /* - * Update file times before taking page lock. We may end up failing the - * fault so this update may be superfluous but who really cares... - */ - file_update_time(vma->vm_file); - - ret = __block_page_mkwrite(vma, vmf, get_block); - sb_end_pagefault(sb); - return block_page_mkwrite_return(ret); -} EXPORT_SYMBOL(block_page_mkwrite); /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index fc1056f5c96a..c4b893453e0e 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -655,6 +655,8 @@ lookup_again: aops = d_backing_inode(object->dentry)->i_mapping->a_ops; if (!aops->bmap) goto check_error; + if (object->dentry->d_sb->s_blocksize > PAGE_SIZE) + goto check_error; object->backer = object->dentry; } else { diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3cbb0e834694..c0f3da3926a0 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -414,9 +414,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto enobufs; - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; op->op.flags &= FSCACHE_OP_KEEP_FLAGS; @@ -711,9 +708,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto all_enobufs; - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; pagevec_init(&pagevec, 0); @@ -885,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) loff_t pos, eof; size_t len; void *data; - int ret; + int ret = -ENOBUFS; ASSERT(op != NULL); ASSERT(page != NULL); @@ -905,6 +899,15 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); + pos = (loff_t)page->index << PAGE_SHIFT; + + /* We mustn't write more data than we have, so we have to beware of a + * partial page at EOF. + */ + eof = object->fscache.store_limit_l; + if (pos >= eof) + goto error; + /* write the page to the backing filesystem and let it store it in its * own time */ path.mnt = cache->mnt; @@ -912,40 +915,38 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); - } else { - pos = (loff_t) page->index << PAGE_SHIFT; - - /* we mustn't write more data than we have, so we have - * to beware of a partial page at EOF */ - eof = object->fscache.store_limit_l; - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - ASSERTCMP(pos, <, eof); - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } - } - - data = kmap(page); - ret = __kernel_write(file, data, len, &pos); - kunmap(page); - if (ret != len) - ret = -EIO; - fput(file); + goto error_2; } - if (ret < 0) { - if (ret == -EIO) - cachefiles_io_error_obj( - object, "Write page to backing file failed"); - ret = -ENOBUFS; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } } - _leave(" = %d", ret); - return ret; + data = kmap(page); + ret = __kernel_write(file, data, len, &pos); + kunmap(page); + fput(file); + if (ret != len) + goto error_eio; + + _leave(" = 0"); + return 0; + +error_eio: + ret = -EIO; +error_2: + if (ret == -EIO) + cachefiles_io_error_obj(object, + "Write page to backing file failed"); +error: + _leave(" = -ENOBUFS [%d]", ret); + return -ENOBUFS; } /* diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 834f9f3723fb..a4766ded1ba7 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -88,7 +88,7 @@ static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data, const struct ceph_inode_info* ci = cookie_netfs_data; uint16_t klen; - /* use ceph virtual inode (id + snaphot) */ + /* use ceph virtual inode (id + snapshot) */ klen = sizeof(ci->i_vino); if (klen > maxbuf) return 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 27b566874bc1..c69e1253b47b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1655,9 +1655,8 @@ retry_locked: !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ inode->i_data.nrpages && /* have cached pages */ - (file_wanted == 0 || /* no open files */ - (revoking & (CEPH_CAP_FILE_CACHE| - CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ + (revoking & (CEPH_CAP_FILE_CACHE| + CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */ !tried_invalidate) { dout("check_caps trying to invalidate on %p\n", inode); if (try_nonblocking_invalidate(inode) < 0) { @@ -1971,49 +1970,46 @@ out: } /* - * wait for any uncommitted directory operations to commit. + * wait for any unsafe requests to complete. */ -static int unsafe_dirop_wait(struct inode *inode) +static int unsafe_request_wait(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_dirops; - struct ceph_mds_request *req; - u64 last_tid; - int ret = 0; - - if (!S_ISDIR(inode->i_mode)) - return 0; + struct ceph_mds_request *req1 = NULL, *req2 = NULL; + int ret, err = 0; spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - req = list_last_entry(head, struct ceph_mds_request, - r_unsafe_dir_item); - last_tid = req->r_tid; - - do { - ceph_mdsc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); + if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { + req1 = list_last_entry(&ci->i_unsafe_dirops, + struct ceph_mds_request, + r_unsafe_dir_item); + ceph_mdsc_get_request(req1); + } + if (!list_empty(&ci->i_unsafe_iops)) { + req2 = list_last_entry(&ci->i_unsafe_iops, + struct ceph_mds_request, + r_unsafe_target_item); + ceph_mdsc_get_request(req2); + } + spin_unlock(&ci->i_unsafe_lock); - dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", - inode, req->r_tid, last_tid); - ret = !wait_for_completion_timeout(&req->r_safe_completion, - ceph_timeout_jiffies(req->r_timeout)); + dout("unsafe_requeset_wait %p wait on tid %llu %llu\n", + inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); + if (req1) { + ret = !wait_for_completion_timeout(&req1->r_safe_completion, + ceph_timeout_jiffies(req1->r_timeout)); if (ret) - ret = -EIO; /* timed out */ - - ceph_mdsc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - if (ret || list_empty(head)) - break; - req = list_first_entry(head, struct ceph_mds_request, - r_unsafe_dir_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); - return ret; + err = -EIO; + ceph_mdsc_put_request(req1); + } + if (req2) { + ret = !wait_for_completion_timeout(&req2->r_safe_completion, + ceph_timeout_jiffies(req2->r_timeout)); + if (ret) + err = -EIO; + ceph_mdsc_put_request(req2); + } + return err; } int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -2039,7 +2035,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); - ret = unsafe_dirop_wait(inode); + ret = unsafe_request_wait(inode); /* * only wait on non-file metadata writeback (the mds diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0c62868b5c56..3c68e6aee2f0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -34,6 +34,74 @@ * need to wait for MDS acknowledgement. */ +/* + * Calculate the length sum of direct io vectors that can + * be combined into one page vector. + */ +static size_t dio_get_pagev_size(const struct iov_iter *it) +{ + const struct iovec *iov = it->iov; + const struct iovec *iovend = iov + it->nr_segs; + size_t size; + + size = iov->iov_len - it->iov_offset; + /* + * An iov can be page vectored when both the current tail + * and the next base are page aligned. + */ + while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && + (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { + size += iov->iov_len; + } + dout("dio_get_pagevlen len = %zu\n", size); + return size; +} + +/* + * Allocate a page vector based on (@it, @nbytes). + * The return value is the tuple describing a page vector, + * that is (@pages, @page_align, @num_pages). + */ +static struct page ** +dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, + size_t *page_align, int *num_pages) +{ + struct iov_iter tmp_it = *it; + size_t align; + struct page **pages; + int ret = 0, idx, npages; + + align = (unsigned long)(it->iov->iov_base + it->iov_offset) & + (PAGE_SIZE - 1); + npages = calc_pages_for(align, nbytes); + pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); + if (!pages) { + pages = vmalloc(sizeof(*pages) * npages); + if (!pages) + return ERR_PTR(-ENOMEM); + } + + for (idx = 0; idx < npages; ) { + size_t start; + ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, + npages - idx, &start); + if (ret < 0) + goto fail; + + iov_iter_advance(&tmp_it, ret); + nbytes -= ret; + idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; + } + + BUG_ON(nbytes != 0); + *num_pages = npages; + *page_align = align; + dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align); + return pages; +fail: + ceph_put_page_vector(pages, idx, false); + return ERR_PTR(ret); +} /* * Prepare an open request. Preallocate ceph_cap to avoid an @@ -458,11 +526,10 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, size_t start; ssize_t n; - n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); - if (n < 0) - return n; - - num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; + n = dio_get_pagev_size(i); + pages = dio_get_pages_alloc(i, n, &start, &num_pages); + if (IS_ERR(pages)) + return PTR_ERR(pages); ret = striped_read(inode, off, n, pages, num_pages, checkeof, @@ -592,7 +659,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, CEPH_OSD_FLAG_WRITE; while (iov_iter_count(from) > 0) { - u64 len = iov_iter_single_seg_count(from); + u64 len = dio_get_pagev_size(from); size_t start; ssize_t n; @@ -611,14 +678,14 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); - n = iov_iter_get_pages_alloc(from, &pages, len, &start); - if (unlikely(n < 0)) { - ret = n; + n = len; + pages = dio_get_pages_alloc(from, len, &start, &num_pages); + if (IS_ERR(pages)) { ceph_osdc_put_request(req); + ret = PTR_ERR(pages); break; } - num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; /* * throw out any page cache pages in this range. this * may block. diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 96d2bd829902..498dcfa2dcdb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -452,6 +452,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_unsafe_writes); INIT_LIST_HEAD(&ci->i_unsafe_dirops); + INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock); ci->i_snap_realm = NULL; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 51cb02da75d9..e7b130a637f9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -633,13 +633,8 @@ static void __register_request(struct ceph_mds_client *mdsc, mdsc->oldest_tid = req->r_tid; if (dir) { - struct ceph_inode_info *ci = ceph_inode(dir); - ihold(dir); - spin_lock(&ci->i_unsafe_lock); req->r_unsafe_dir = dir; - list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); - spin_unlock(&ci->i_unsafe_lock); } } @@ -665,13 +660,20 @@ static void __unregister_request(struct ceph_mds_client *mdsc, rb_erase(&req->r_node, &mdsc->request_tree); RB_CLEAR_NODE(&req->r_node); - if (req->r_unsafe_dir) { + if (req->r_unsafe_dir && req->r_got_unsafe) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); - spin_lock(&ci->i_unsafe_lock); list_del_init(&req->r_unsafe_dir_item); spin_unlock(&ci->i_unsafe_lock); + } + if (req->r_target_inode && req->r_got_unsafe) { + struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); + spin_lock(&ci->i_unsafe_lock); + list_del_init(&req->r_unsafe_target_item); + spin_unlock(&ci->i_unsafe_lock); + } + if (req->r_unsafe_dir) { iput(req->r_unsafe_dir); req->r_unsafe_dir = NULL; } @@ -1430,6 +1432,13 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; } + /* The inode has cached pages, but it's no longer used. + * we can safely drop it */ + if (wanted == 0 && used == CEPH_CAP_FILE_CACHE && + !(oissued & CEPH_CAP_FILE_CACHE)) { + used = 0; + oissued = 0; + } if ((used | wanted) & ~oissued & mine) goto out; /* we need these caps */ @@ -1438,7 +1447,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) /* we aren't the only cap.. just remove us */ __ceph_remove_cap(cap, true); } else { - /* try to drop referring dentries */ + /* try dropping referring dentries */ spin_unlock(&ci->i_ceph_lock); d_prune_aliases(inode); dout("trim_caps_cb %p cap %p pruned, count now %d\n", @@ -1704,6 +1713,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) req->r_started = jiffies; req->r_resend_mds = -1; INIT_LIST_HEAD(&req->r_unsafe_dir_item); + INIT_LIST_HEAD(&req->r_unsafe_target_item); req->r_fmode = -1; kref_init(&req->r_kref); INIT_LIST_HEAD(&req->r_wait); @@ -1935,7 +1945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -2477,6 +2487,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } else { req->r_got_unsafe = true; list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); + if (req->r_unsafe_dir) { + struct ceph_inode_info *ci = + ceph_inode(req->r_unsafe_dir); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_dir_item, + &ci->i_unsafe_dirops); + spin_unlock(&ci->i_unsafe_lock); + } } dout("handle_reply tid %lld result %d\n", tid, result); @@ -2518,6 +2536,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) up_read(&mdsc->snap_rwsem); if (realm) ceph_put_snap_realm(mdsc, realm); + + if (err == 0 && req->r_got_unsafe && req->r_target_inode) { + struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops); + spin_unlock(&ci->i_unsafe_lock); + } out_err: mutex_lock(&mdsc->mutex); if (!req->r_aborted) { @@ -3917,17 +3942,19 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, return msg; } -static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) +static int mds_sign_message(struct ceph_msg *msg) { - struct ceph_mds_session *s = con->private; + struct ceph_mds_session *s = msg->con->private; struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_sign_message(auth, msg); } -static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) +static int mds_check_message_signature(struct ceph_msg *msg) { - struct ceph_mds_session *s = con->private; + struct ceph_mds_session *s = msg->con->private; struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_check_message_signature(auth, msg); } @@ -3940,8 +3967,8 @@ static const struct ceph_connection_operations mds_con_ops = { .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, .alloc_msg = mds_alloc_msg, - .sign_message = sign_message, - .check_message_signature = check_message_signature, + .sign_message = mds_sign_message, + .check_message_signature = mds_check_message_signature, }; /* eof */ diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f575eafe2261..ccf11ef0ca87 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -236,6 +236,9 @@ struct ceph_mds_request { struct inode *r_unsafe_dir; struct list_head r_unsafe_dir_item; + /* unsafe requests that modify the target inode */ + struct list_head r_unsafe_target_item; + struct ceph_mds_session *r_session; int r_attempts; /* resend attempts */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2f2460d23a06..75b7d125ce66 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -342,6 +342,7 @@ struct ceph_inode_info { struct list_head i_unsafe_writes; /* uncommitted sync writes */ struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ + struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock; struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e739950ca084..cbc0f4bca0c0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -454,6 +454,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nocase"); if (tcon->retry) seq_puts(s, ",hard"); + if (tcon->use_persistent) + seq_puts(s, ",persistenthandles"); + else if (tcon->use_resilient) + seq_puts(s, ",resilienthandles"); if (tcon->unix_ext) seq_puts(s, ",unix"); else @@ -921,9 +925,7 @@ const struct file_operations cifs_file_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -939,9 +941,7 @@ const struct file_operations cifs_file_strict_ops = { .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -957,9 +957,7 @@ const struct file_operations cifs_file_direct_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -975,9 +973,7 @@ const struct file_operations cifs_file_nobrl_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -992,9 +988,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1009,9 +1003,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b406a32deb1f..2b510c537a0d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -493,7 +493,10 @@ struct smb_vol { bool mfsymlinks:1; /* use Minshall+French Symlinks */ bool multiuser:1; bool rwpidforward:1; /* pid forward for read/write operations */ - bool nosharesock; + bool nosharesock:1; + bool persistent:1; + bool nopersistent:1; + bool resilient:1; /* noresilient not required since not fored for CA */ unsigned int rsize; unsigned int wsize; bool sockopt_tcp_nodelay:1; @@ -895,6 +898,8 @@ struct cifs_tcon { bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ + bool use_resilient:1; /* use resilient instead of durable handles */ + bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ @@ -1015,6 +1020,7 @@ struct cifs_fid { __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ + __u8 create_guid[16]; #endif struct cifs_pending_open *pending_open; unsigned int epoch; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3f2228570d44..ecb0803bdb0e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -87,6 +87,8 @@ enum { Opt_sign, Opt_seal, Opt_noac, Opt_fsc, Opt_mfsymlinks, Opt_multiuser, Opt_sloppy, Opt_nosharesock, + Opt_persistent, Opt_nopersistent, + Opt_resilient, Opt_noresilient, /* Mount options which take numeric value */ Opt_backupuid, Opt_backupgid, Opt_uid, @@ -169,6 +171,10 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_multiuser, "multiuser" }, { Opt_sloppy, "sloppy" }, { Opt_nosharesock, "nosharesock" }, + { Opt_persistent, "persistenthandles"}, + { Opt_nopersistent, "nopersistenthandles"}, + { Opt_resilient, "resilienthandles"}, + { Opt_noresilient, "noresilienthandles"}, { Opt_backupuid, "backupuid=%s" }, { Opt_backupgid, "backupgid=%s" }, @@ -1497,6 +1503,33 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_nosharesock: vol->nosharesock = true; break; + case Opt_nopersistent: + vol->nopersistent = true; + if (vol->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_persistent: + vol->persistent = true; + if ((vol->nopersistent) || (vol->resilient)) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_resilient: + vol->resilient = true; + if (vol->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_noresilient: + vol->resilient = false; /* already the default */ + break; /* Numeric Values */ case Opt_backupuid: @@ -2655,6 +2688,42 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags); } tcon->seal = volume_info->seal; + tcon->use_persistent = false; + /* check if SMB2 or later, CIFS does not support persistent handles */ + if (volume_info->persistent) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB3 or later required for persistent handles\n"); + rc = -EOPNOTSUPP; + goto out_fail; +#ifdef CONFIG_CIFS_SMB2 + } else if (ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) + tcon->use_persistent = true; + else /* persistent handles requested but not supported */ { + cifs_dbg(VFS, + "Persistent handles not supported on share\n"); + rc = -EOPNOTSUPP; + goto out_fail; +#endif /* CONFIG_CIFS_SMB2 */ + } +#ifdef CONFIG_CIFS_SMB2 + } else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) + && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) + && (volume_info->nopersistent == false)) { + cifs_dbg(FYI, "enabling persistent handles\n"); + tcon->use_persistent = true; +#endif /* CONFIG_CIFS_SMB2 */ + } else if (volume_info->resilient) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB2.1 or later required for resilient handles\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } + tcon->use_resilient = true; + } + /* * We can have only one retry value for a connection to a share so for * resources mounted more than once to the same server share the last @@ -3503,6 +3572,15 @@ try_mount_again: goto mount_fail_check; } +#ifdef CONFIG_CIFS_SMB2 + if ((volume_info->persistent == true) && ((ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) { + cifs_dbg(VFS, "persistent handles not supported by server\n"); + rc = -EOPNOTSUPP; + goto mount_fail_check; + } +#endif /* CONFIG_CIFS_SMB2*/ + /* search for existing tcon to this server share */ tcon = cifs_get_tcon(ses, volume_info); if (IS_ERR(tcon)) { diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 28a77bf1d559..35cf990f87d3 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -85,9 +85,14 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); - /* check if source and target are on same tree connection */ - if (src_tcon != target_tcon) { - cifs_dbg(VFS, "file copy src and target on different volume\n"); + /* check source and target on same server (or volume if dup_extents) */ + if (dup_extents && (src_tcon != target_tcon)) { + cifs_dbg(VFS, "source and target of copy not on same share\n"); + goto out_fput; + } + + if (!dup_extents && (src_tcon->ses != target_tcon->ses)) { + cifs_dbg(VFS, "source and target of copy not on same server\n"); goto out_fput; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 2ab297dae5a7..f9e766f464be 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -43,6 +43,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, struct smb2_file_all_info *smb2_data = NULL; __u8 smb2_oplock[17]; struct cifs_fid *fid = oparms->fid; + struct network_resiliency_req nr_ioctl_req; smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb); if (smb2_path == NULL) { @@ -67,6 +68,24 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, if (rc) goto out; + + if (oparms->tcon->use_resilient) { + nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */ + nr_ioctl_req.Reserved = 0; + rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, + fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true, + (char *)&nr_ioctl_req, sizeof(nr_ioctl_req), + NULL, NULL /* no return info */); + if (rc == -EOPNOTSUPP) { + cifs_dbg(VFS, + "resiliency not supported by server, disabling\n"); + oparms->tcon->use_resilient = false; + } else if (rc) + cifs_dbg(FYI, "error %d setting resiliency\n", rc); + + rc = 0; + } + if (buf) { /* open response does not have IndexNumber field - get it */ rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 18da19f4f811..53ccdde6ff18 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -810,7 +810,6 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, &eof, false); } -#ifdef CONFIG_CIFS_SMB311 static int smb2_duplicate_extents(const unsigned int xid, struct cifsFileInfo *srcfile, @@ -854,8 +853,6 @@ smb2_duplicate_extents(const unsigned int xid, duplicate_extents_out: return rc; } -#endif /* CONFIG_CIFS_SMB311 */ - static int smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, @@ -1703,6 +1700,7 @@ struct smb_version_operations smb30_operations = { .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, .clone_range = smb2_clone_range, + .duplicate_extents = smb2_duplicate_extents, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, @@ -1840,7 +1838,7 @@ struct smb_version_values smb21_values = { struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -1860,7 +1858,7 @@ struct smb_version_values smb30_values = { struct smb_version_values smb302_values = { .version_string = SMB302_VERSION_STRING, .protocol_id = SMB302_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -1881,7 +1879,7 @@ struct smb_version_values smb302_values = { struct smb_version_values smb311_values = { .version_string = SMB311_VERSION_STRING, .protocol_id = SMB311_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 61276929d139..767555518d40 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1151,13 +1151,130 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, return 0; } +static struct create_durable_v2 * +create_durable_v2_buf(struct cifs_fid *pfid) +{ + struct create_durable_v2 *buf; + + buf = kzalloc(sizeof(struct create_durable_v2), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_durable_v2, dcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct durable_context_v2)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_durable_v2, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + + buf->dcontext.Timeout = 0; /* Should this be configurable by workload */ + buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); + get_random_bytes(buf->dcontext.CreateGuid, 16); + memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + + /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */ + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = '2'; + buf->Name[3] = 'Q'; + return buf; +} + +static struct create_durable_handle_reconnect_v2 * +create_reconnect_durable_v2_buf(struct cifs_fid *fid) +{ + struct create_durable_handle_reconnect_v2 *buf; + + buf = kzalloc(sizeof(struct create_durable_handle_reconnect_v2), + GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = + cpu_to_le16(offsetof(struct create_durable_handle_reconnect_v2, + dcontext)); + buf->ccontext.DataLength = + cpu_to_le32(sizeof(struct durable_reconnect_context_v2)); + buf->ccontext.NameOffset = + cpu_to_le16(offsetof(struct create_durable_handle_reconnect_v2, + Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + + buf->dcontext.Fid.PersistentFileId = fid->persistent_fid; + buf->dcontext.Fid.VolatileFileId = fid->volatile_fid; + buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); + memcpy(buf->dcontext.CreateGuid, fid->create_guid, 16); + + /* SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 is "DH2C" */ + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = '2'; + buf->Name[3] = 'C'; + return buf; +} + static int -add_durable_context(struct kvec *iov, unsigned int *num_iovec, +add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec, struct cifs_open_parms *oparms) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; + iov[num].iov_base = create_durable_v2_buf(oparms->fid); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable_v2); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_durable_v2)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + /* indicate that we don't need to relock the file */ + oparms->reconnect = false; + + iov[num].iov_base = create_reconnect_durable_v2_buf(oparms->fid); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, + sizeof(struct create_durable_handle_reconnect_v2)); + inc_rfc1001_len(&req->hdr, + sizeof(struct create_durable_handle_reconnect_v2)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms, bool use_persistent) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + if (use_persistent) { + if (oparms->reconnect) + return add_durable_reconnect_v2_context(iov, num_iovec, + oparms); + else + return add_durable_v2_context(iov, num_iovec, oparms); + } + if (oparms->reconnect) { iov[num].iov_base = create_reconnect_durable_buf(oparms->fid); /* indicate that we don't need to relock the file */ @@ -1275,7 +1392,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ccontext->Next = cpu_to_le32(server->vals->create_lease_size); } - rc = add_durable_context(iov, &num_iovecs, oparms); + + rc = add_durable_context(iov, &num_iovecs, oparms, + tcon->use_persistent); if (rc) { cifs_small_buf_release(req); kfree(copy_path); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 451108284a2f..4af52780ec35 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -590,6 +590,44 @@ struct create_durable { } Data; } __packed; +/* See MS-SMB2 2.2.13.2.11 */ +/* Flags */ +#define SMB2_DHANDLE_FLAG_PERSISTENT 0x00000002 +struct durable_context_v2 { + __le32 Timeout; + __le32 Flags; + __u64 Reserved; + __u8 CreateGuid[16]; +} __packed; + +struct create_durable_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct durable_context_v2 dcontext; +} __packed; + +/* See MS-SMB2 2.2.13.2.12 */ +struct durable_reconnect_context_v2 { + struct { + __u64 PersistentFileId; + __u64 VolatileFileId; + } Fid; + __u8 CreateGuid[16]; + __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ +} __packed; + +/* See MS-SMB2 2.2.14.2.12 */ +struct durable_reconnect_context_v2_rsp { + __le32 Timeout; + __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ +} __packed; + +struct create_durable_handle_reconnect_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct durable_reconnect_context_v2 dcontext; +} __packed; + #define COPY_CHUNK_RES_KEY_SIZE 24 struct resume_key_req { char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; @@ -643,6 +681,13 @@ struct fsctl_get_integrity_information_rsp { /* Integrity flags for above */ #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001 +/* See MS-SMB2 2.2.31.3 */ +struct network_resiliency_req { + __le32 Timeout; + __le32 Reserved; +} __packed; +/* There is no buffer for the response ie no struct network_resiliency_rsp */ + struct validate_negotiate_info_req { __le32 Capabilities; diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index a639d0dab453..f996daeea271 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -90,7 +90,7 @@ #define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064 /* Retrieve an opaque file reference for server-side data movement ie copy */ #define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078 -#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ +#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index c81ce7f200a6..a7a1b218f308 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = { .iterate = configfs_readdir, }; +/** + * configfs_register_group - creates a parent-child relation between two groups + * @parent_group: parent group + * @group: child group + * + * link groups, creates dentry for the child and attaches it to the + * parent dentry. + * + * Return: 0 on success, negative errno code on error + */ +int configfs_register_group(struct config_group *parent_group, + struct config_group *group) +{ + struct configfs_subsystem *subsys = parent_group->cg_subsys; + struct dentry *parent; + int ret; + + mutex_lock(&subsys->su_mutex); + link_group(parent_group, group); + mutex_unlock(&subsys->su_mutex); + + parent = parent_group->cg_item.ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + ret = create_default_group(parent_group, group); + if (!ret) { + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); + } + mutex_unlock(&d_inode(parent)->i_mutex); + return ret; +} +EXPORT_SYMBOL(configfs_register_group); + +/** + * configfs_unregister_group() - unregisters a child group from its parent + * @group: parent group to be unregistered + * + * Undoes configfs_register_group() + */ +void configfs_unregister_group(struct config_group *group) +{ + struct configfs_subsystem *subsys = group->cg_subsys; + struct dentry *dentry = group->cg_item.ci_dentry; + struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + spin_lock(&configfs_dirent_lock); + configfs_detach_prep(dentry, NULL); + spin_unlock(&configfs_dirent_lock); + + configfs_detach_group(&group->cg_item); + d_inode(dentry)->i_flags |= S_DEAD; + dont_mount(dentry); + d_delete(dentry); + mutex_unlock(&d_inode(parent)->i_mutex); + + dput(dentry); + + mutex_lock(&subsys->su_mutex); + unlink_group(group); + mutex_unlock(&subsys->su_mutex); +} +EXPORT_SYMBOL(configfs_unregister_group); + +/** + * configfs_register_default_group() - allocates and registers a child group + * @parent_group: parent group + * @name: child group name + * @item_type: child item type description + * + * boilerplate to allocate and register a child group with its parent. We need + * kzalloc'ed memory because child's default_group is initially empty. + * + * Return: allocated config group or ERR_PTR() on error + */ +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type) +{ + int ret; + struct config_group *group; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + config_group_init_type_name(group, name, item_type); + + ret = configfs_register_group(parent_group, group); + if (ret) { + kfree(group); + return ERR_PTR(ret); + } + return group; +} +EXPORT_SYMBOL(configfs_register_default_group); + +/** + * configfs_unregister_default_group() - unregisters and frees a child group + * @group: the group to act on + */ +void configfs_unregister_default_group(struct config_group *group) +{ + configfs_unregister_group(group); + kfree(group); +} +EXPORT_SYMBOL(configfs_unregister_default_group); + int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 403269ffcdf3..d39099ea7df7 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -65,7 +65,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf { struct configfs_attribute * attr = to_attr(dentry); struct config_item * item = to_item(dentry->d_parent); - struct configfs_item_operations * ops = buffer->ops; int ret = 0; ssize_t count; @@ -74,7 +73,8 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf if (!buffer->page) return -ENOMEM; - count = ops->show_attribute(item,attr,buffer->page); + count = attr->show(item, buffer->page); + buffer->needs_read_fill = 0; BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); if (count >= 0) @@ -171,9 +171,8 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size { struct configfs_attribute * attr = to_attr(dentry); struct config_item * item = to_item(dentry->d_parent); - struct configfs_item_operations * ops = buffer->ops; - return ops->store_attribute(item,attr,buffer->page,count); + return attr->store(item, buffer->page, count); } @@ -237,8 +236,7 @@ static int check_perm(struct inode * inode, struct file * file) * and we must have a store method. */ if (file->f_mode & FMODE_WRITE) { - - if (!(inode->i_mode & S_IWUGO) || !ops->store_attribute) + if (!(inode->i_mode & S_IWUGO) || !attr->store) goto Eaccess; } @@ -248,7 +246,7 @@ static int check_perm(struct inode * inode, struct file * file) * must be a show method for it. */ if (file->f_mode & FMODE_READ) { - if (!(inode->i_mode & S_IRUGO) || !ops->show_attribute) + if (!(inode->i_mode & S_IRUGO) || !attr->show) goto Eaccess; } @@ -29,6 +29,11 @@ #include <linux/uio.h> #include <linux/vmstat.h> +/* + * dax_clear_blocks() is called from within transaction context from XFS, + * and hence this means the stack from this point must follow GFP_NOFS + * semantics for all operations. + */ int dax_clear_blocks(struct inode *inode, sector_t block, long size) { struct block_device *bdev = inode->i_sb->s_bdev; @@ -169,8 +174,10 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, else len = iov_iter_zero(max - pos, iter); - if (!len) + if (!len) { + retval = -EFAULT; break; + } pos += len; addr += len; @@ -534,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, unsigned long pfn; int result = 0; + /* dax pmd mappings are broken wrt gup and fork */ + if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) + return VM_FAULT_FALLBACK; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) return VM_FAULT_FALLBACK; @@ -622,6 +633,13 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) goto fallback; + /* + * TODO: teach vmf_insert_pfn_pmd() to support + * 'pte_special' for pmds + */ + if (pfn_valid(pfn)) + goto fallback; + if (buffer_unwritten(&bh) || buffer_new(&bh)) { int i; for (i = 0; i < PTRS_PER_PMD; i++) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 5d8f35f1382a..b7fcc0de0b2f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } diff --git a/fs/direct-io.c b/fs/direct-io.c index 18e7554cf94c..cb5337d8c273 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -109,6 +109,8 @@ struct dio_submit { struct dio { int flags; /* doesn't change */ int rw; + blk_qc_t bio_cookie; + struct block_device *bio_bdev; struct inode *inode; loff_t i_size; /* i_size when submitted */ dio_iodone_t *end_io; /* IO completion function */ @@ -397,11 +399,14 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); - if (sdio->submit_io) + dio->bio_bdev = bio->bi_bdev; + + if (sdio->submit_io) { sdio->submit_io(dio->rw, bio, dio->inode, sdio->logical_offset_in_bio); - else - submit_bio(dio->rw, bio); + dio->bio_cookie = BLK_QC_T_NONE; + } else + dio->bio_cookie = submit_bio(dio->rw, bio); sdio->bio = NULL; sdio->boundary = 0; @@ -440,7 +445,8 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - io_schedule(); + if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) + io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; diff --git a/fs/dlm/config.c b/fs/dlm/config.c index d521bddf876d..8e294fbbac39 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -61,35 +61,8 @@ static struct config_item *make_node(struct config_group *, const char *); static void drop_node(struct config_group *, struct config_item *); static void release_node(struct config_item *); -static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, - char *buf); -static ssize_t store_cluster(struct config_item *i, - struct configfs_attribute *a, - const char *buf, size_t len); -static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, - char *buf); -static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len); -static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, - char *buf); -static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len); - -static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf); -static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, - size_t len); -static ssize_t comm_local_read(struct dlm_comm *cm, char *buf); -static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, - size_t len); -static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, - size_t len); -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf); -static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); -static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, - size_t len); -static ssize_t node_weight_read(struct dlm_node *nd, char *buf); -static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, - size_t len); +static struct configfs_attribute *comm_attrs[]; +static struct configfs_attribute *node_attrs[]; struct dlm_cluster { struct config_group group; @@ -108,6 +81,12 @@ struct dlm_cluster { char cl_cluster_name[DLM_LOCKSPACE_LEN]; }; +static struct dlm_cluster *config_item_to_cluster(struct config_item *i) +{ + return i ? container_of(to_config_group(i), struct dlm_cluster, group) : + NULL; +} + enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, @@ -124,33 +103,24 @@ enum { CLUSTER_ATTR_CLUSTER_NAME, }; -struct cluster_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct dlm_cluster *, char *); - ssize_t (*store)(struct dlm_cluster *, const char *, size_t); -}; - -static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) +static ssize_t cluster_cluster_name_show(struct config_item *item, char *buf) { + struct dlm_cluster *cl = config_item_to_cluster(item); return sprintf(buf, "%s\n", cl->cl_cluster_name); } -static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, +static ssize_t cluster_cluster_name_store(struct config_item *item, const char *buf, size_t len) { + struct dlm_cluster *cl = config_item_to_cluster(item); + strlcpy(dlm_config.ci_cluster_name, buf, sizeof(dlm_config.ci_cluster_name)); strlcpy(cl->cl_cluster_name, buf, sizeof(cl->cl_cluster_name)); return len; } -static struct cluster_attribute cluster_attr_cluster_name = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "cluster_name", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = cluster_cluster_name_read, - .store = cluster_cluster_name_write, -}; +CONFIGFS_ATTR(cluster_, cluster_name); static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, int *info_field, int check_zero, @@ -175,17 +145,19 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, } #define CLUSTER_ATTR(name, check_zero) \ -static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \ +static ssize_t cluster_##name##_store(struct config_item *item, \ + const char *buf, size_t len) \ { \ + struct dlm_cluster *cl = config_item_to_cluster(item); \ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ check_zero, buf, len); \ } \ -static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \ +static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \ { \ + struct dlm_cluster *cl = config_item_to_cluster(item); \ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ } \ -static struct cluster_attribute cluster_attr_##name = \ -__CONFIGFS_ATTR(name, 0644, name##_read, name##_write) +CONFIGFS_ATTR(cluster_, name); CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); @@ -201,19 +173,19 @@ CLUSTER_ATTR(new_rsb_count, 0); CLUSTER_ATTR(recover_callbacks, 0); static struct configfs_attribute *cluster_attrs[] = { - [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, - [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, - [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, - [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, - [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, - [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, - [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, - [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, - [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, - [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, - [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, - [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr, - [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr, + [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port, + [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size, + [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size, + [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer, + [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs, + [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs, + [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug, + [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol, + [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs, + [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us, + [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count, + [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks, + [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name, NULL, }; @@ -224,83 +196,11 @@ enum { COMM_ATTR_ADDR_LIST, }; -struct comm_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct dlm_comm *, char *); - ssize_t (*store)(struct dlm_comm *, const char *, size_t); -}; - -static struct comm_attribute comm_attr_nodeid = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "nodeid", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = comm_nodeid_read, - .store = comm_nodeid_write, -}; - -static struct comm_attribute comm_attr_local = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "local", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = comm_local_read, - .store = comm_local_write, -}; - -static struct comm_attribute comm_attr_addr = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "addr", - .ca_mode = S_IWUSR }, - .store = comm_addr_write, -}; - -static struct comm_attribute comm_attr_addr_list = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "addr_list", - .ca_mode = S_IRUGO }, - .show = comm_addr_list_read, -}; - -static struct configfs_attribute *comm_attrs[] = { - [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, - [COMM_ATTR_LOCAL] = &comm_attr_local.attr, - [COMM_ATTR_ADDR] = &comm_attr_addr.attr, - [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr, - NULL, -}; - enum { NODE_ATTR_NODEID = 0, NODE_ATTR_WEIGHT, }; -struct node_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct dlm_node *, char *); - ssize_t (*store)(struct dlm_node *, const char *, size_t); -}; - -static struct node_attribute node_attr_nodeid = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "nodeid", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = node_nodeid_read, - .store = node_nodeid_write, -}; - -static struct node_attribute node_attr_weight = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "weight", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = node_weight_read, - .store = node_weight_write, -}; - -static struct configfs_attribute *node_attrs[] = { - [NODE_ATTR_NODEID] = &node_attr_nodeid.attr, - [NODE_ATTR_WEIGHT] = &node_attr_weight.attr, - NULL, -}; - struct dlm_clusters { struct configfs_subsystem subsys; }; @@ -349,8 +249,6 @@ static struct configfs_group_operations clusters_ops = { static struct configfs_item_operations cluster_ops = { .release = release_cluster, - .show_attribute = show_cluster, - .store_attribute = store_cluster, }; static struct configfs_group_operations spaces_ops = { @@ -369,8 +267,6 @@ static struct configfs_group_operations comms_ops = { static struct configfs_item_operations comm_ops = { .release = release_comm, - .show_attribute = show_comm, - .store_attribute = store_comm, }; static struct configfs_group_operations nodes_ops = { @@ -380,8 +276,6 @@ static struct configfs_group_operations nodes_ops = { static struct configfs_item_operations node_ops = { .release = release_node, - .show_attribute = show_node, - .store_attribute = store_node, }; static struct config_item_type clusters_type = { @@ -427,12 +321,6 @@ static struct config_item_type node_type = { .ct_owner = THIS_MODULE, }; -static struct dlm_cluster *config_item_to_cluster(struct config_item *i) -{ - return i ? container_of(to_config_group(i), struct dlm_cluster, group) : - NULL; -} - static struct dlm_space *config_item_to_space(struct config_item *i) { return i ? container_of(to_config_group(i), struct dlm_space, group) : @@ -687,66 +575,30 @@ void dlm_config_exit(void) * Functions for user space to read/write attributes */ -static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, - char *buf) -{ - struct dlm_cluster *cl = config_item_to_cluster(i); - struct cluster_attribute *cla = - container_of(a, struct cluster_attribute, attr); - return cla->show ? cla->show(cl, buf) : 0; -} - -static ssize_t store_cluster(struct config_item *i, - struct configfs_attribute *a, - const char *buf, size_t len) +static ssize_t comm_nodeid_show(struct config_item *item, char *buf) { - struct dlm_cluster *cl = config_item_to_cluster(i); - struct cluster_attribute *cla = - container_of(a, struct cluster_attribute, attr); - return cla->store ? cla->store(cl, buf, len) : -EINVAL; -} - -static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, - char *buf) -{ - struct dlm_comm *cm = config_item_to_comm(i); - struct comm_attribute *cma = - container_of(a, struct comm_attribute, attr); - return cma->show ? cma->show(cm, buf) : 0; -} - -static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len) -{ - struct dlm_comm *cm = config_item_to_comm(i); - struct comm_attribute *cma = - container_of(a, struct comm_attribute, attr); - return cma->store ? cma->store(cm, buf, len) : -EINVAL; + return sprintf(buf, "%d\n", config_item_to_comm(item)->nodeid); } -static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf) -{ - return sprintf(buf, "%d\n", cm->nodeid); -} - -static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, +static ssize_t comm_nodeid_store(struct config_item *item, const char *buf, size_t len) { - int rc = kstrtoint(buf, 0, &cm->nodeid); + int rc = kstrtoint(buf, 0, &config_item_to_comm(item)->nodeid); if (rc) return rc; return len; } -static ssize_t comm_local_read(struct dlm_comm *cm, char *buf) +static ssize_t comm_local_show(struct config_item *item, char *buf) { - return sprintf(buf, "%d\n", cm->local); + return sprintf(buf, "%d\n", config_item_to_comm(item)->local); } -static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, +static ssize_t comm_local_store(struct config_item *item, const char *buf, size_t len) { + struct dlm_comm *cm = config_item_to_comm(item); int rc = kstrtoint(buf, 0, &cm->local); if (rc) @@ -756,8 +608,10 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, return len; } -static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) +static ssize_t comm_addr_store(struct config_item *item, const char *buf, + size_t len) { + struct dlm_comm *cm = config_item_to_comm(item); struct sockaddr_storage *addr; int rv; @@ -783,8 +637,9 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return len; } -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) +static ssize_t comm_addr_list_show(struct config_item *item, char *buf) { + struct dlm_comm *cm = config_item_to_comm(item); ssize_t s; ssize_t allowance; int i; @@ -827,32 +682,28 @@ static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) return 4096 - allowance; } -static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, - char *buf) -{ - struct dlm_node *nd = config_item_to_node(i); - struct node_attribute *nda = - container_of(a, struct node_attribute, attr); - return nda->show ? nda->show(nd, buf) : 0; -} +CONFIGFS_ATTR(comm_, nodeid); +CONFIGFS_ATTR(comm_, local); +CONFIGFS_ATTR_WO(comm_, addr); +CONFIGFS_ATTR_RO(comm_, addr_list); -static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len) -{ - struct dlm_node *nd = config_item_to_node(i); - struct node_attribute *nda = - container_of(a, struct node_attribute, attr); - return nda->store ? nda->store(nd, buf, len) : -EINVAL; -} +static struct configfs_attribute *comm_attrs[] = { + [COMM_ATTR_NODEID] = &comm_attr_nodeid, + [COMM_ATTR_LOCAL] = &comm_attr_local, + [COMM_ATTR_ADDR] = &comm_attr_addr, + [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list, + NULL, +}; -static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) +static ssize_t node_nodeid_show(struct config_item *item, char *buf) { - return sprintf(buf, "%d\n", nd->nodeid); + return sprintf(buf, "%d\n", config_item_to_node(item)->nodeid); } -static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, +static ssize_t node_nodeid_store(struct config_item *item, const char *buf, size_t len) { + struct dlm_node *nd = config_item_to_node(item); uint32_t seq = 0; int rc = kstrtoint(buf, 0, &nd->nodeid); @@ -863,21 +714,30 @@ static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, return len; } -static ssize_t node_weight_read(struct dlm_node *nd, char *buf) +static ssize_t node_weight_show(struct config_item *item, char *buf) { - return sprintf(buf, "%d\n", nd->weight); + return sprintf(buf, "%d\n", config_item_to_node(item)->weight); } -static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, +static ssize_t node_weight_store(struct config_item *item, const char *buf, size_t len) { - int rc = kstrtoint(buf, 0, &nd->weight); + int rc = kstrtoint(buf, 0, &config_item_to_node(item)->weight); if (rc) return rc; return len; } +CONFIGFS_ATTR(node_, nodeid); +CONFIGFS_ATTR(node_, weight); + +static struct configfs_attribute *node_attrs[] = { + [NODE_ATTR_NODEID] = &node_attr_nodeid, + [NODE_ATTR_WEIGHT] = &node_attr_weight, + NULL, +}; + /* * Functions for the dlm to get the info that's been configured */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3a71cea68420..748d35afc902 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb) /* Fall through */ case Opt_dax: #ifdef CONFIG_FS_DAX + ext2_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(sbi->s_mount_opt, DAX); #else ext2_msg(sb, KERN_INFO, "dax option not supported"); diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0b6bfd3a398b..fa70848afa8f 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -293,10 +293,9 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", ext2_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); + size_t size = handler->list(handler, dentry, buffer, + rest, entry->e_name, + entry->e_name_len); if (buffer) { if (size > rest) { error = -ERANGE; diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index 702fc6840246..dfb08750370d 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -8,8 +8,9 @@ #include "xattr.h" static size_t -ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext2_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const int prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -23,8 +24,9 @@ ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext2_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -33,8 +35,9 @@ ext2_xattr_security_get(struct dentry *dentry, const char *name, } static int -ext2_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 42b6e9874bcc..3150dd3a7859 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -9,8 +9,9 @@ #include "xattr.h" static size_t -ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext2_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -27,8 +28,9 @@ ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext2_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -37,8 +39,9 @@ ext2_xattr_trusted_get(struct dentry *dentry, const char *name, } static int -ext2_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index ecdc4605192c..339a49bbb8ef 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -11,8 +11,9 @@ #include "xattr.h" static size_t -ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext2_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -29,8 +30,9 @@ ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext2_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -41,8 +43,9 @@ ext2_xattr_user_get(struct dentry *dentry, const char *name, } static int -ext2_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7d1aad1d9313..ea433a7f4bca 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5283,7 +5283,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) !ext4_should_journal_data(inode) && !ext4_nonda_switch(inode->i_sb)) { do { - ret = __block_page_mkwrite(vma, vmf, + ret = block_page_mkwrite(vma, vmf, ext4_da_get_block_prep); } while (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)); @@ -5330,7 +5330,7 @@ retry_alloc: ret = VM_FAULT_SIGBUS; goto out; } - ret = __block_page_mkwrite(vma, vmf, get_block); + ret = block_page_mkwrite(vma, vmf, get_block); if (!ret && ext4_should_journal_data(inode)) { if (ext4_walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 753f4e68b820..c9ab67da6e5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif -#ifndef CONFIG_FS_DAX } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else ext4_msg(sb, KERN_INFO, "dax option not supported"); return -1; #endif diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 984448c6f5f0..6b6b3e751f8c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -405,10 +405,9 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, ext4_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); + size_t size = handler->list(handler, dentry, buffer, + rest, entry->e_name, + entry->e_name_len); if (buffer) { if (size > rest) return -ERANGE; diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 95d90e0560f0..36f4c1a84c21 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -12,8 +12,9 @@ #include "xattr.h" static size_t -ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; const size_t total_len = prefix_len + name_len + 1; @@ -28,8 +29,9 @@ ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext4_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -38,8 +40,9 @@ ext4_xattr_security_get(struct dentry *dentry, const char *name, } static int -ext4_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 891ee2ddfbd6..488089053342 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -13,8 +13,9 @@ #include "xattr.h" static size_t -ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -31,8 +32,9 @@ ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, - size_t size, int type) +ext4_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -41,8 +43,9 @@ ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, } static int -ext4_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 6ed932b3c043..d2dec3364062 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -12,8 +12,9 @@ #include "xattr.h" static size_t -ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -30,8 +31,9 @@ ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext4_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -42,8 +44,9 @@ ext4_xattr_user_get(struct dentry *dentry, const char *name, } static int -ext4_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 4de2286c0e4d..862368a32e53 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -25,49 +25,45 @@ #include "f2fs.h" #include "xattr.h" -static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t len, int type) +static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t len) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - int total_len, prefix_len = 0; - const char *prefix = NULL; + int total_len, prefix_len; - switch (type) { + switch (handler->flags) { case F2FS_XATTR_INDEX_USER: if (!test_opt(sbi, XATTR_USER)) return -EOPNOTSUPP; - prefix = XATTR_USER_PREFIX; - prefix_len = XATTR_USER_PREFIX_LEN; break; case F2FS_XATTR_INDEX_TRUSTED: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - prefix = XATTR_TRUSTED_PREFIX; - prefix_len = XATTR_TRUSTED_PREFIX_LEN; break; case F2FS_XATTR_INDEX_SECURITY: - prefix = XATTR_SECURITY_PREFIX; - prefix_len = XATTR_SECURITY_PREFIX_LEN; break; default: return -EINVAL; } + prefix_len = strlen(handler->prefix); total_len = prefix_len + len + 1; if (list && total_len <= list_size) { - memcpy(list, prefix, prefix_len); + memcpy(list, handler->prefix, prefix_len); memcpy(list + prefix_len, name, len); list[prefix_len + len] = '\0'; } return total_len; } -static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int f2fs_xattr_generic_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - switch (type) { + switch (handler->flags) { case F2FS_XATTR_INDEX_USER: if (!test_opt(sbi, XATTR_USER)) return -EOPNOTSUPP; @@ -83,15 +79,17 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(d_inode(dentry), type, name, buffer, size, NULL); + return f2fs_getxattr(d_inode(dentry), handler->flags, name, + buffer, size, NULL); } -static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int f2fs_xattr_generic_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - switch (type) { + switch (handler->flags) { case F2FS_XATTR_INDEX_USER: if (!test_opt(sbi, XATTR_USER)) return -EOPNOTSUPP; @@ -108,27 +106,26 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(d_inode(dentry), type, name, + return f2fs_setxattr(d_inode(dentry), handler->flags, name, value, size, NULL, flags); } -static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t len, int type) +static size_t f2fs_xattr_advise_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t len) { const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; size_t size; - if (type != F2FS_XATTR_INDEX_ADVISE) - return 0; - size = strlen(xname) + 1; if (list && size <= list_size) memcpy(list, xname, size); return size; } -static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int f2fs_xattr_advise_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { struct inode *inode = d_inode(dentry); @@ -140,8 +137,9 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, return sizeof(char); } -static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int f2fs_xattr_advise_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { struct inode *inode = d_inode(dentry); @@ -462,8 +460,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (!handler) continue; - size = handler->list(dentry, buffer, rest, entry->e_name, - entry->e_name_len, handler->flags); + size = handler->list(handler, dentry, buffer, rest, + entry->e_name, entry->e_name_len); if (buffer && size > rest) { error = -ERANGE; goto cleanup; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4afc4d9d2e41..8b2127ffb226 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -610,9 +610,9 @@ parse_record: int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); if (status < 0) { - ctx->pos = cpos; + bh = NULL; ret = status; - goto out; + goto end_of_dir; } else if (status == PARSE_INVALID) goto record_end; else if (status == PARSE_NOT_LONGNAME) @@ -654,8 +654,9 @@ parse_record: fill_len = short_len; start_filldir: - if (!fake_offset) - ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + if (fake_offset && ctx->pos < 2) + ctx->pos = 2; if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { if (!dir_emit_dot(file, ctx)) @@ -681,14 +682,19 @@ record_end: fake_offset = 0; ctx->pos = cpos; goto get_new; + end_of_dir: - ctx->pos = cpos; + if (fake_offset && cpos < 2) + ctx->pos = 2; + else + ctx->pos = cpos; fill_failed: brelse(bh); if (unicode) __putname(unicode); out: mutex_unlock(&sbi->s_lock); + return ret; } diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 6d941f56faf4..9b28649df3a1 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -22,6 +22,7 @@ static LIST_HEAD(fscache_netfs_list); int __fscache_register_netfs(struct fscache_netfs *netfs) { struct fscache_netfs *ptr; + struct fscache_cookie *cookie; int ret; _enter("{%s}", netfs->name); @@ -29,29 +30,25 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) INIT_LIST_HEAD(&netfs->link); /* allocate a cookie for the primary index */ - netfs->primary_index = - kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); - if (!netfs->primary_index) { + if (!cookie) { _leave(" = -ENOMEM"); return -ENOMEM; } /* initialise the primary index cookie */ - atomic_set(&netfs->primary_index->usage, 1); - atomic_set(&netfs->primary_index->n_children, 0); - atomic_set(&netfs->primary_index->n_active, 1); + atomic_set(&cookie->usage, 1); + atomic_set(&cookie->n_children, 0); + atomic_set(&cookie->n_active, 1); - netfs->primary_index->def = &fscache_fsdef_netfs_def; - netfs->primary_index->parent = &fscache_fsdef_index; - netfs->primary_index->netfs_data = netfs; - netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED; + cookie->def = &fscache_fsdef_netfs_def; + cookie->parent = &fscache_fsdef_index; + cookie->netfs_data = netfs; + cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; - atomic_inc(&netfs->primary_index->parent->usage); - atomic_inc(&netfs->primary_index->parent->n_children); - - spin_lock_init(&netfs->primary_index->lock); - INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); + spin_lock_init(&cookie->lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ down_write(&fscache_addremove_sem); @@ -62,6 +59,10 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) goto already_registered; } + atomic_inc(&cookie->parent->usage); + atomic_inc(&cookie->parent->n_children); + + netfs->primary_index = cookie; list_add(&netfs->link, &fscache_netfs_list); ret = 0; @@ -70,11 +71,8 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) already_registered: up_write(&fscache_addremove_sem); - if (ret < 0) { - netfs->primary_index->parent = NULL; - __fscache_cookie_put(netfs->primary_index); - netfs->primary_index = NULL; - } + if (ret < 0) + kmem_cache_free(fscache_cookie_jar, cookie); _leave(" = %d", ret); return ret; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 79483b3d8c6f..6b35fc4860a0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -816,7 +816,7 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; page = results[0]; _debug("gang %d [%lx]", n, page->index); - if (page->index > op->store_limit) { + if (page->index >= op->store_limit) { fscache_stat(&fscache_n_store_pages_over_limit); goto superseded; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 4c096fa9e2a1..53ce76a374fe 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -583,11 +583,13 @@ out: * * Returns: actual size of data on success, -errno on error */ -static int gfs2_xattr_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int gfs2_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); struct gfs2_ea_location el; + int type = handler->flags; int error; if (!ip->i_eattr) @@ -1227,11 +1229,12 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, return error; } -static int gfs2_xattr_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int gfs2_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { return __gfs2_xattr_set(d_inode(dentry), name, value, - size, flags, type); + size, flags, handler->flags); } diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 416b1dbafe51..e41a010cd89c 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -849,8 +849,9 @@ end_removexattr: return err; } -static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_osx_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -871,8 +872,9 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, return __hfsplus_getxattr(d_inode(dentry), name, buffer, size); } -static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_osx_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -893,19 +895,8 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, return __hfsplus_setxattr(d_inode(dentry), name, buffer, size, flags); } -static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - const struct xattr_handler hfsplus_xattr_osx_handler = { .prefix = XATTR_MAC_OSX_PREFIX, - .list = hfsplus_osx_listxattr, .get = hfsplus_osx_getxattr, .set = hfsplus_osx_setxattr, }; diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index aacff00a9ff9..72a68a3a0c99 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -13,32 +13,24 @@ #include "xattr.h" #include "acl.h" -static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_security_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return hfsplus_getxattr(dentry, name, buffer, size, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } -static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_security_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { return hfsplus_setxattr(dentry, name, buffer, size, flags, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } -static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - static int hfsplus_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) @@ -92,7 +84,6 @@ int hfsplus_init_inode_security(struct inode *inode, const struct xattr_handler hfsplus_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = hfsplus_security_listxattr, .get = hfsplus_security_getxattr, .set = hfsplus_security_setxattr, }; diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index bcf65089b7f7..95a7704c7abb 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -11,34 +11,25 @@ #include "hfsplus_fs.h" #include "xattr.h" -static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_trusted_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return hfsplus_getxattr(dentry, name, buffer, size, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } -static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_trusted_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { return hfsplus_setxattr(dentry, name, buffer, size, flags, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } -static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - const struct xattr_handler hfsplus_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .list = hfsplus_trusted_listxattr, .get = hfsplus_trusted_getxattr, .set = hfsplus_trusted_setxattr, }; diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index 5aa0e6dc4a1e..6fc269baf959 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -11,34 +11,25 @@ #include "hfsplus_fs.h" #include "xattr.h" -static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_user_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return hfsplus_getxattr(dentry, name, buffer, size, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } -static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_user_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { return hfsplus_setxattr(dentry, name, buffer, size, flags, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } -static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - const struct xattr_handler hfsplus_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, - .list = hfsplus_user_listxattr, .get = hfsplus_user_getxattr, .set = hfsplus_user_setxattr, }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316adb968b65..de4bdfac0cec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page) * truncation is indicated by end of range being LLONG_MAX * In this case, we first scan the range and release found pages. * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv - * maps and global counts. + * maps and global counts. Page faults can not race with truncation + * in this routine. hugetlb_no_page() prevents page faults in the + * truncated range. It checks i_size before allocation, and again after + * with the page table lock for the page held. The same lock must be + * acquired to unmap a page. * hole punch is indicated if end is not LLONG_MAX * In the hole punch case we scan the range and release found pages. * Only when releasing a page is the associated region/reserv map * deleted. The region/reserv map for ranges without associated - * pages are not modified. + * pages are not modified. Page faults can race with hole punch. + * This is indicated if we find a mapped page. * Note: If the passed end of range value is beyond the end of file, but * not LLONG_MAX this routine still performs a hole punch operation. */ @@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, next = start; while (next < end) { /* - * Make sure to never grab more pages that we - * might possibly need. + * Don't grab more pages than the number left in the range. */ if (end - next < lookup_nr) lookup_nr = end - next; /* - * This pagevec_lookup() may return pages past 'end', - * so we must check for page->index > end. + * When no more pages are found, we are done. */ - if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) { - if (next == start) - break; - next = start; - continue; - } + if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) + break; for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; u32 hash; + /* + * The page (index) could be beyond end. This is + * only possible in the punch hole case as end is + * max page offset in the truncate case. + */ + next = page->index; + if (next >= end) + break; + hash = hugetlb_fault_mutex_hash(h, current->mm, &pseudo_vma, mapping, next, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); lock_page(page); - if (page->index >= end) { - unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); - next = end; /* we are done */ - break; - } - - /* - * If page is mapped, it was faulted in after being - * unmapped. Do nothing in this race case. In the - * normal case page is not mapped. - */ - if (!page_mapped(page)) { + if (likely(!page_mapped(page))) { bool rsv_on_error = !PagePrivate(page); /* * We must free the huge page and remove @@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, hugetlb_fix_reserve_counts( inode, rsv_on_error); } + } else { + /* + * If page is mapped, it was faulted in after + * being unmapped. It indicates a race between + * hole punch and page fault. Do nothing in + * this case. Getting here in a truncate + * operation is a bug. + */ + BUG_ON(truncate_op); } - if (page->index > next) - next = page->index; - - ++next; unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); } + ++next; huge_pagevec_release(&pvec); + cond_resched(); } if (truncate_op) @@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); inode->i_ctime = CURRENT_TIME; - spin_lock(&inode->i_lock); - inode->i_private = NULL; - spin_unlock(&inode->i_lock); out: mutex_unlock(&inode->i_mutex); return error; diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index d4b43fb7adb1..bf12fe5f83d7 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c @@ -48,8 +48,9 @@ int jffs2_init_security(struct inode *inode, struct inode *dir, } /* ---- XATTR Handler for "security.*" ----------------- */ -static int jffs2_security_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int jffs2_security_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -58,8 +59,9 @@ static int jffs2_security_getxattr(struct dentry *dentry, const char *name, name, buffer, size); } -static int jffs2_security_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int jffs2_security_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -68,8 +70,10 @@ static int jffs2_security_setxattr(struct dentry *dentry, const char *name, name, buffer, size, flags); } -static size_t jffs2_security_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t jffs2_security_listxattr(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_size, const char *name, + size_t name_len) { size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index f092fee5be50..4c2c03663533 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1001,11 +1001,12 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size) if (!xhandle) continue; if (buffer) { - rc = xhandle->list(dentry, buffer+len, size-len, - xd->xname, xd->name_len, xd->flags); + rc = xhandle->list(xhandle, dentry, buffer + len, + size - len, xd->xname, + xd->name_len); } else { - rc = xhandle->list(dentry, NULL, 0, xd->xname, - xd->name_len, xd->flags); + rc = xhandle->list(xhandle, dentry, NULL, 0, + xd->xname, xd->name_len); } if (rc < 0) goto out; diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index ceaf9c693225..a562da0d6a26 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c @@ -16,8 +16,9 @@ #include <linux/mtd/mtd.h> #include "nodelist.h" -static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int jffs2_trusted_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -25,8 +26,9 @@ static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name, name, buffer, size); } -static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int jffs2_trusted_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -34,11 +36,16 @@ static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name, name, buffer, size, flags); } -static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t jffs2_trusted_listxattr(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_size, const char *name, + size_t name_len) { size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; + if (!capable(CAP_SYS_ADMIN)) + return 0; + if (list && retlen<=list_size) { strcpy(list, XATTR_TRUSTED_PREFIX); strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name); diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index a71391eba514..cbc0472e59a8 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c @@ -16,8 +16,9 @@ #include <linux/mtd/mtd.h> #include "nodelist.h" -static int jffs2_user_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int jffs2_user_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -25,8 +26,9 @@ static int jffs2_user_getxattr(struct dentry *dentry, const char *name, name, buffer, size); } -static int jffs2_user_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int jffs2_user_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -34,8 +36,10 @@ static int jffs2_user_setxattr(struct dentry *dentry, const char *name, name, buffer, size, flags); } -static size_t jffs2_user_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t jffs2_user_listxattr(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_size, const char *name, + size_t name_len) { size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 969d589c848d..d716c9993a26 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, atomic_inc(&nsm->sm_count); else { host = NULL; - nsm = nsm_get_handle(ni->sap, ni->salen, + nsm = nsm_get_handle(ni->net, ni->sap, ni->salen, ni->hostname, ni->hostname_len); if (unlikely(nsm == NULL)) { dprintk("lockd: %s failed; no nsm handle\n", @@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; + strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: return host; @@ -534,17 +535,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, /** * nlm_host_rebooted - Release all resources held by rebooted host + * @net: network namespace * @info: pointer to decoded results of NLM_SM_NOTIFY call * * We were notified that the specified host has rebooted. Release * all resources held by that peer. */ -void nlm_host_rebooted(const struct nlm_reboot *info) +void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info) { struct nsm_handle *nsm; struct nlm_host *host; - nsm = nsm_reboot_lookup(info); + nsm = nsm_reboot_lookup(net, info); if (unlikely(nsm == NULL)) return; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 47a32b6d9b90..19166d4a8d31 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -42,7 +42,7 @@ struct nsm_args { u32 proc; char *mon_name; - char *nodename; + const char *nodename; }; struct nsm_res { @@ -51,7 +51,6 @@ struct nsm_res { }; static const struct rpc_program nsm_program; -static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); /* @@ -87,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename) return rpc_create(&args); } -static struct rpc_clnt *nsm_client_set(struct lockd_net *ln, - struct rpc_clnt *clnt) -{ - spin_lock(&ln->nsm_clnt_lock); - if (ln->nsm_users == 0) { - if (clnt == NULL) - goto out; - ln->nsm_clnt = clnt; - } - clnt = ln->nsm_clnt; - ln->nsm_users++; -out: - spin_unlock(&ln->nsm_clnt_lock); - return clnt; -} - -static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename) -{ - struct rpc_clnt *clnt, *new; - struct lockd_net *ln = net_generic(net, lockd_net_id); - - clnt = nsm_client_set(ln, NULL); - if (clnt != NULL) - goto out; - - clnt = new = nsm_create(net, nodename); - if (IS_ERR(clnt)) - goto out; - - clnt = nsm_client_set(ln, new); - if (clnt != new) - rpc_shutdown_client(new); -out: - return clnt; -} - -static void nsm_client_put(struct net *net) -{ - struct lockd_net *ln = net_generic(net, lockd_net_id); - struct rpc_clnt *clnt = NULL; - - spin_lock(&ln->nsm_clnt_lock); - ln->nsm_users--; - if (ln->nsm_users == 0) { - clnt = ln->nsm_clnt; - ln->nsm_clnt = NULL; - } - spin_unlock(&ln->nsm_clnt_lock); - if (clnt != NULL) - rpc_shutdown_client(clnt); -} - static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, - struct rpc_clnt *clnt) + const struct nlm_host *host) { int status; + struct rpc_clnt *clnt; struct nsm_args args = { .priv = &nsm->sm_priv, .prog = NLM_PROGRAM, .vers = 3, .proc = NLMPROC_NSM_NOTIFY, .mon_name = nsm->sm_mon_name, - .nodename = clnt->cl_nodename, + .nodename = host->nodename, }; struct rpc_message msg = { .rpc_argp = &args, @@ -158,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, memset(res, 0, sizeof(*res)); + clnt = nsm_create(host->net, host->nodename); + if (IS_ERR(clnt)) { + dprintk("lockd: failed to create NSM upcall transport, " + "status=%ld, net=%p\n", PTR_ERR(clnt), host->net); + return PTR_ERR(clnt); + } + msg.rpc_proc = &clnt->cl_procinfo[proc]; status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); if (status == -ECONNREFUSED) { @@ -171,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, status); else status = 0; + + rpc_shutdown_client(clnt); return status; } @@ -190,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host) struct nsm_handle *nsm = host->h_nsmhandle; struct nsm_res res; int status; - struct rpc_clnt *clnt; - const char *nodename = NULL; dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); if (nsm->sm_monitored) return 0; - if (host->h_rpcclnt) - nodename = host->h_rpcclnt->cl_nodename; - /* * Choose whether to record the caller_name or IP address of * this peer in the local rpc.statd's database. */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - clnt = nsm_client_get(host->net, nodename); - if (IS_ERR(clnt)) { - status = PTR_ERR(clnt); - dprintk("lockd: failed to create NSM upcall transport, " - "status=%d, net=%p\n", status, host->net); - return status; - } - - status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt); + status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host); if (unlikely(res.status != 0)) status = -EIO; if (unlikely(status < 0)) { @@ -247,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host) if (atomic_read(&nsm->sm_count) == 1 && nsm->sm_monitored && !nsm->sm_sticky) { - struct lockd_net *ln = net_generic(host->net, lockd_net_id); - dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); - status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt); + status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host); if (res.status != 0) status = -EIO; if (status < 0) @@ -259,38 +201,38 @@ void nsm_unmonitor(const struct nlm_host *host) nsm->sm_name); else nsm->sm_monitored = 0; - - nsm_client_put(host->net); } } -static struct nsm_handle *nsm_lookup_hostname(const char *hostname, - const size_t len) +static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles, + const char *hostname, const size_t len) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (strlen(nsm->sm_name) == len && memcmp(nsm->sm_name, hostname, len) == 0) return nsm; return NULL; } -static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) +static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles, + const struct sockaddr *sap) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } -static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv) +static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles, + const struct nsm_private *priv) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (memcmp(nsm->sm_priv.data, priv->data, sizeof(priv->data)) == 0) return nsm; @@ -353,6 +295,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, /** * nsm_get_handle - Find or create a cached nsm_handle + * @net: network namespace * @sap: pointer to socket address of handle to find * @salen: length of socket address * @hostname: pointer to C string containing hostname to find @@ -365,11 +308,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, * @hostname cannot be found in the handle cache. Returns NULL if * an error occurs. */ -struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, +struct nsm_handle *nsm_get_handle(const struct net *net, + const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len) { struct nsm_handle *cached, *new = NULL; + struct lockd_net *ln = net_generic(net, lockd_net_id); if (hostname && memchr(hostname, '/', hostname_len) != NULL) { if (printk_ratelimit()) { @@ -384,9 +329,10 @@ retry: spin_lock(&nsm_lock); if (nsm_use_hostnames && hostname != NULL) - cached = nsm_lookup_hostname(hostname, hostname_len); + cached = nsm_lookup_hostname(&ln->nsm_handles, + hostname, hostname_len); else - cached = nsm_lookup_addr(sap); + cached = nsm_lookup_addr(&ln->nsm_handles, sap); if (cached != NULL) { atomic_inc(&cached->sm_count); @@ -400,7 +346,7 @@ retry: } if (new != NULL) { - list_add(&new->sm_link, &nsm_handles); + list_add(&new->sm_link, &ln->nsm_handles); spin_unlock(&nsm_lock); dprintk("lockd: created nsm_handle for %s (%s)\n", new->sm_name, new->sm_addrbuf); @@ -417,19 +363,22 @@ retry: /** * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle + * @net: network namespace * @info: pointer to NLMPROC_SM_NOTIFY arguments * * Returns a matching nsm_handle if found in the nsm cache. The returned * nsm_handle's reference count is bumped. Otherwise returns NULL if some * error occurred. */ -struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info) +struct nsm_handle *nsm_reboot_lookup(const struct net *net, + const struct nlm_reboot *info) { struct nsm_handle *cached; + struct lockd_net *ln = net_generic(net, lockd_net_id); spin_lock(&nsm_lock); - cached = nsm_lookup_priv(&info->priv); + cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv); if (unlikely(cached == NULL)) { spin_unlock(&nsm_lock); dprintk("lockd: never saw rebooted peer '%.*s' before\n", diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 097bfa3adb1c..5426189406c1 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -12,9 +12,7 @@ struct lockd_net { struct delayed_work grace_period_end; struct lock_manager lockd_manager; - spinlock_t nsm_clnt_lock; - unsigned int nsm_users; - struct rpc_clnt *nsm_clnt; + struct list_head nsm_handles; }; extern int lockd_net_id; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index d678bcc3cbcb..5f31ebd96c06 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -592,7 +592,7 @@ static int lockd_init_net(struct net *net) INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_LIST_HEAD(&ln->lockd_manager.list); ln->lockd_manager.block_opens = false; - spin_lock_init(&ln->nsm_clnt_lock); + INIT_LIST_HEAD(&ln->nsm_handles); return 0; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index b147d1ae71fd..09c576f26c7b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, return rpc_system_err; } - nlm_host_rebooted(argp); + nlm_host_rebooted(SVC_NET(rqstp), argp); return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 21171f0c6477..fb26b9f522e7 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, return rpc_system_err; } - nlm_host_rebooted(argp); + nlm_host_rebooted(SVC_NET(rqstp), argp); return rpc_success; } diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 79b113048eac..0a3f9b594602 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg switch (rqdata.cmd) { case NCP_LOCK_EX: case NCP_LOCK_SH: + if (rqdata.timeout < 0) + return -EINVAL; if (rqdata.timeout == 0) rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ff5bddc49a2a..765a03559363 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6248,9 +6248,10 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" -static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key, +static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, const void *buf, size_t buflen, - int flags, int type) + int flags) { if (strcmp(key, "") != 0) return -EINVAL; @@ -6258,8 +6259,9 @@ static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key, return nfs4_proc_set_acl(d_inode(dentry), buf, buflen); } -static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key, - void *buf, size_t buflen, int type) +static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, + void *buf, size_t buflen) { if (strcmp(key, "") != 0) return -EINVAL; @@ -6267,9 +6269,10 @@ static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key, return nfs4_proc_get_acl(d_inode(dentry), buf, buflen); } -static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, +static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_len, const char *name, - size_t name_len, int type) + size_t name_len) { size_t len = sizeof(XATTR_NAME_NFSV4_ACL); @@ -6287,9 +6290,10 @@ static inline int nfs4_server_supports_labels(struct nfs_server *server) return server->caps & NFS_CAP_SECURITY_LABEL; } -static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, - const void *buf, size_t buflen, - int flags, int type) +static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, + const void *buf, size_t buflen, + int flags) { if (security_ismaclabel(key)) return nfs4_set_security_label(dentry, buf, buflen); @@ -6297,17 +6301,19 @@ static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, return -EOPNOTSUPP; } -static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key, - void *buf, size_t buflen, int type) +static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, + void *buf, size_t buflen) { if (security_ismaclabel(key)) return nfs4_get_security_label(d_inode(dentry), buf, buflen); return -EOPNOTSUPP; } -static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list, - size_t list_len, const char *name, - size_t name_len, int type) +static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_len, const char *name, + size_t name_len) { size_t len = 0; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f6e7cbabac5a..00575d776d91 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp) err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version; if (err) { - fhp->fh_post_saved = 0; + fhp->fh_post_saved = false; /* Grab the ctime anyway - set_change_info might use it */ fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; } else - fhp->fh_post_saved = 1; + fhp->fh_post_saved = true; } /* diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ebf90e487c75..9ffef06b30d5 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -201,6 +201,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, INIT_LIST_HEAD(&ls->ls_perfile); spin_lock_init(&ls->ls_lock); INIT_LIST_HEAD(&ls->ls_layouts); + mutex_init(&ls->ls_mutex); ls->ls_layout_type = layout_type; nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, NFSPROC4_CLNT_CB_LAYOUT); @@ -262,19 +263,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, status = nfserr_jukebox; if (!ls) goto out; + mutex_lock(&ls->ls_mutex); } else { ls = container_of(stid, struct nfs4_layout_stateid, ls_stid); status = nfserr_bad_stateid; + mutex_lock(&ls->ls_mutex); if (stateid->si_generation > stid->sc_stateid.si_generation) - goto out_put_stid; + goto out_unlock_stid; if (layout_type != ls->ls_layout_type) - goto out_put_stid; + goto out_unlock_stid; } *lsp = ls; return 0; +out_unlock_stid: + mutex_unlock(&ls->ls_mutex); out_put_stid: nfs4_put_stid(stid); out: @@ -296,8 +301,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); atomic_inc(&ls->ls_stid.sc_count); - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -406,8 +409,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid); spin_unlock(&ls->ls_lock); out: spin_unlock(&fp->fi_lock); @@ -481,11 +483,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } } if (!list_empty(&ls->ls_layouts)) { - if (found) { - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid, - sizeof(stateid_t)); - } + if (found) + nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); lrp->lrs_present = 1; } else { trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); @@ -494,6 +493,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } spin_unlock(&ls->ls_lock); + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); nfsd4_free_layouts(&reaplist); return nfs_ok; @@ -608,6 +608,16 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) } } +static void +nfsd4_cb_layout_prepare(struct nfsd4_callback *cb) +{ + struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + + mutex_lock(&ls->ls_mutex); + nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid); +} + static int nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -649,12 +659,14 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) trace_layout_recall_release(&ls->ls_stid.sc_stateid); + mutex_unlock(&ls->ls_mutex); nfsd4_return_all_layouts(ls, &reaplist); nfsd4_free_layouts(&reaplist); nfs4_put_stid(&ls->ls_stid); } static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { + .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, }; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4ce6b97b31ad..a9f096c7e99f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1309,6 +1309,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, nfserr = nfsd4_insert_layout(lgp, ls); out_put_stid: + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); out: return nfserr; @@ -1362,6 +1363,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, goto out; } + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + if (new_size > i_size_read(inode)) { lcp->lc_size_chg = 1; lcp->lc_newsize = new_size; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f1d5691b795..6b800b5b8fed 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -575,6 +575,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ atomic_set(&stid->sc_count, 1); + spin_lock_init(&stid->sc_lock); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -745,6 +746,18 @@ nfs4_put_stid(struct nfs4_stid *s) put_nfs4_file(fp); } +void +nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) +{ + stateid_t *src = &stid->sc_stateid; + + spin_lock(&stid->sc_lock); + if (unlikely(++src->si_generation == 0)) + src->si_generation = 1; + memcpy(dst, src, sizeof(*dst)); + spin_unlock(&stid->sc_lock); +} + static void nfs4_put_deleg_lease(struct nfs4_file *fp) { struct file *filp = NULL; @@ -765,16 +778,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -2256,15 +2321,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) clid->flags = new->cl_exchange_flags; } +static bool client_has_openowners(struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + + list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) { + if (!list_empty(&oo->oo_owner.so_stateids)) + return true; + } + return false; +} + static bool client_has_state(struct nfs4_client *clp) { - /* - * Note clp->cl_openowners check isn't quite right: there's no - * need to count owners without stateid's. - * - * Also note we should probably be using this in 4.0 case too. - */ - return !list_empty(&clp->cl_openowners) + return client_has_openowners(clp) #ifdef CONFIG_NFSD_PNFS || !list_empty(&clp->cl_lo_states) #endif @@ -3049,7 +3119,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Cases below refer to rfc 3530 section 14.2.33: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); - if (conf) { + if (conf && client_has_state(conf)) { /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) @@ -3136,6 +3206,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } else { /* case 3: normal case; new or rebooted client */ old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { + status = nfserr_clid_inuse; + if (client_has_state(old) + && !same_creds(&unconf->cl_cred, + &old->cl_cred)) + goto out; status = mark_client_expired_locked(old); if (status) { old = NULL; @@ -3317,6 +3392,27 @@ static const struct nfs4_stateowner_operations openowner_ops = { .so_free = nfs4_free_openowner, }; +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *local, *ret = NULL; + struct nfs4_openowner *oo = open->op_openowner; + + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(local, &fp->fi_stateids, st_perfile) { + /* ignore lock owners */ + if (local->st_stateowner->so_is_open_owner == 0) + continue; + if (local->st_stateowner == &oo->oo_owner) { + ret = local; + atomic_inc(&ret->st_stid.sc_count); + break; + } + } + return ret; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3348,9 +3444,20 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, return ret; } -static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { +static struct nfs4_ol_stateid * +init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, + struct nfsd4_open *open) +{ + struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_ol_stateid *retstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&fp->fi_lock); + + retstp = nfsd4_find_existing_open(fp, open); + if (retstp) + goto out_unlock; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3360,12 +3467,14 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - spin_lock(&oo->oo_owner.so_client->cl_lock); + init_rwsem(&stp->st_rwsem); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); + +out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); + return retstp; } /* @@ -3776,27 +3885,6 @@ out: return nfs_ok; } -static struct nfs4_ol_stateid * -nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) -{ - struct nfs4_ol_stateid *local, *ret = NULL; - struct nfs4_openowner *oo = open->op_openowner; - - spin_lock(&fp->fi_lock); - list_for_each_entry(local, &fp->fi_stateids, st_perfile) { - /* ignore lock owners */ - if (local->st_stateowner->so_is_open_owner == 0) - continue; - if (local->st_stateowner == &oo->oo_owner) { - ret = local; - atomic_inc(&ret->st_stid.sc_count); - break; - } - } - spin_unlock(&fp->fi_lock); - return ret; -} - static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; @@ -3945,6 +4033,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3976,16 +4076,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4005,6 +4108,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4023,9 +4135,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -4160,6 +4270,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; + struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4173,7 +4284,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + spin_lock(&fp->fi_lock); stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4187,15 +4300,32 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; - init_open_stateid(stp, fp, open); + swapstp = init_open_stateid(stp, fp, open); + if (swapstp) { + nfs4_put_stid(&stp->st_stid); + stp = swapstp; + down_read(&stp->st_rwsem); + status = nfs4_upgrade_open(rqstp, fp, current_fh, + stp, open); + if (status) { + up_read(&stp->st_rwsem); + goto out; + } + goto upgrade_out; + } + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4205,8 +4335,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); +upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4819,10 +4950,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4869,6 +5003,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4899,11 +5034,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4975,13 +5112,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_deny(od->od_share_deny, stp); - - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5033,8 +5168,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5260,6 +5395,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5428,6 +5564,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5435,6 +5572,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5512,9 +5651,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); switch (-err) { case 0: /* success! */ - update_stateid(&lock_stp->st_stid.sc_stateid); - memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, - sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; break; case (EAGAIN): /* conflock holds conflicting lock */ @@ -5540,6 +5677,8 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5704,11 +5843,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 46ec934f5dee..54cde9a5864e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -63,7 +63,6 @@ static unsigned int longest_chain; static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -static void cache_cleaner_func(struct work_struct *unused); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, @@ -76,13 +75,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { }; /* - * locking for the reply cache: - * A cache entry is "single use" if c_state == RC_INPROG - * Otherwise, it when accessing _prev or _next, the lock must be held. - */ -static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); - -/* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. * @@ -203,7 +195,6 @@ void nfsd_reply_cache_shutdown(void) unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); - cancel_delayed_work_sync(&cache_cleaner); for (i = 0; i < drc_hashsize; i++) { struct list_head *head = &drc_hashtbl[i].lru_head; @@ -217,10 +208,8 @@ void nfsd_reply_cache_shutdown(void) drc_hashtbl = NULL; drc_hashsize = 0; - if (drc_slab) { - kmem_cache_destroy(drc_slab); - drc_slab = NULL; - } + kmem_cache_destroy(drc_slab); + drc_slab = NULL; } /* @@ -232,7 +221,6 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &b->lru_head); - schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } static long @@ -266,7 +254,6 @@ prune_cache_entries(void) { unsigned int i; long freed = 0; - bool cancel = true; for (i = 0; i < drc_hashsize; i++) { struct nfsd_drc_bucket *b = &drc_hashtbl[i]; @@ -275,26 +262,11 @@ prune_cache_entries(void) continue; spin_lock(&b->cache_lock); freed += prune_bucket(b); - if (!list_empty(&b->lru_head)) - cancel = false; spin_unlock(&b->cache_lock); } - - /* - * Conditionally rearm the job to run in RC_EXPIRE since we just - * ran the pruner. - */ - if (!cancel) - mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); return freed; } -static void -cache_cleaner_func(struct work_struct *unused) -{ - prune_cache_entries(); -} - static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 350041a40fe5..c1681ce894c5 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp) fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); -#ifdef CONFIG_NFSD_V3 - fhp->fh_pre_saved = 0; - fhp->fh_post_saved = 0; -#endif + fh_clear_wcc(fhp); } fh_drop_write(fhp); if (exp) { diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 1e90dad4926b..2087bae17582 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -26,16 +26,16 @@ static inline ino_t u32_to_ino_t(__u32 uino) */ typedef struct svc_fh { struct knfsd_fh fh_handle; /* FH data */ + int fh_maxsize; /* max size for fh_handle */ struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ + bool fh_locked; /* inode locked by us */ + bool fh_want_write; /* remount protection taken */ #ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ + bool fh_post_saved; /* post-op attrs saved */ + bool fh_pre_saved; /* pre-op attrs saved */ /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ @@ -213,8 +213,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) static inline void fh_clear_wcc(struct svc_fh *fhp) { - fhp->fh_post_saved = 0; - fhp->fh_pre_saved = 0; + fhp->fh_post_saved = false; + fhp->fh_pre_saved = false; } /* @@ -231,7 +231,7 @@ fill_pre_wcc(struct svc_fh *fhp) fhp->fh_pre_ctime = inode->i_ctime; fhp->fh_pre_size = inode->i_size; fhp->fh_pre_change = inode->i_version; - fhp->fh_pre_saved = 1; + fhp->fh_pre_saved = true; } } @@ -267,7 +267,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) inode = d_inode(dentry); mutex_lock_nested(&inode->i_mutex, subclass); fill_pre_wcc(fhp); - fhp->fh_locked = 1; + fhp->fh_locked = true; } static inline void @@ -285,7 +285,7 @@ fh_unlock(struct svc_fh *fhp) if (fhp->fh_locked) { fill_post_wcc(fhp); mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex); - fhp->fh_locked = 0; + fhp->fh_locked = false; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 583ffc13cae2..77fdf4de91ba 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -84,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -94,11 +94,12 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 - unsigned char sc_type; - stateid_t sc_stateid; - struct nfs4_client *sc_client; - struct nfs4_file *sc_file; - void (*sc_free)(struct nfs4_stid *); + unsigned char sc_type; + stateid_t sc_stateid; + spinlock_t sc_lock; + struct nfs4_client *sc_client; + struct nfs4_file *sc_file; + void (*sc_free)(struct nfs4_stid *); }; /* @@ -364,15 +365,6 @@ struct nfs4_client_reclaim { char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; -static inline void -update_stateid(stateid_t *stateid) -{ - stateid->si_generation++; - /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ - if (stateid->si_generation == 0) - stateid->si_generation = 1; -} - /* A reasonable value for REPLAY_ISIZE was estimated as follows: * The OPEN response, typically the largest, requires * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + @@ -534,15 +526,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) @@ -561,6 +554,7 @@ struct nfs4_layout_stateid { struct nfsd4_callback ls_recall; stateid_t ls_recall_sid; bool ls_recalled; + struct mutex ls_mutex; }; static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) @@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); +void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index 82f89070594c..90967466a1e5 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,5 +1,3 @@ -#include "state.h" - #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c668520c344b..0befe762762b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,6 +9,8 @@ #include <linux/tracepoint.h> +#include "state.h" + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 45c04979e7b3..994d66fbb446 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1631,7 +1631,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 1; + ffhp->fh_locked = tfhp->fh_locked = true; fill_pre_wcc(ffhp); fill_pre_wcc(tfhp); @@ -1681,7 +1681,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fill_post_wcc(ffhp); fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 0; + ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fee2451ae248..fcfc48cbe136 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -112,14 +112,14 @@ static inline int fh_want_write(struct svc_fh *fh) int ret = mnt_want_write(fh->fh_export->ex_path.mnt); if (!ret) - fh->fh_want_write = 1; + fh->fh_want_write = true; return ret; } static inline void fh_drop_write(struct svc_fh *fh) { if (fh->fh_want_write) { - fh->fh_want_write = 0; + fh->fh_want_write = false; mnt_drop_write(fh->fh_export->ex_path.mnt); } } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 9f991007a578..ce7362c88b48 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -632,7 +632,7 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = fhp->fh_post_saved; + cinfo->atomic = (u32)fhp->fh_post_saved; cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); cinfo->before_change = fhp->fh_pre_change; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 54575e3cc1a2..088ba001c6ef 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -109,7 +109,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; file_update_time(vma->vm_file); - ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + ret = block_page_mkwrite(vma, vmf, nilfs_get_block); if (ret) { nilfs_transaction_abort(inode->i_sb); goto out; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ddddef0021a0..709fbbd44c65 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1480,16 +1480,17 @@ static int o2hb_read_block_input(struct o2hb_region *reg, return 0; } -static ssize_t o2hb_region_block_bytes_read(struct o2hb_region *reg, +static ssize_t o2hb_region_block_bytes_show(struct config_item *item, char *page) { - return sprintf(page, "%u\n", reg->hr_block_bytes); + return sprintf(page, "%u\n", to_o2hb_region(item)->hr_block_bytes); } -static ssize_t o2hb_region_block_bytes_write(struct o2hb_region *reg, +static ssize_t o2hb_region_block_bytes_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); int status; unsigned long block_bytes; unsigned int block_bits; @@ -1508,16 +1509,17 @@ static ssize_t o2hb_region_block_bytes_write(struct o2hb_region *reg, return count; } -static ssize_t o2hb_region_start_block_read(struct o2hb_region *reg, +static ssize_t o2hb_region_start_block_show(struct config_item *item, char *page) { - return sprintf(page, "%llu\n", reg->hr_start_block); + return sprintf(page, "%llu\n", to_o2hb_region(item)->hr_start_block); } -static ssize_t o2hb_region_start_block_write(struct o2hb_region *reg, +static ssize_t o2hb_region_start_block_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); unsigned long long tmp; char *p = (char *)page; @@ -1533,16 +1535,16 @@ static ssize_t o2hb_region_start_block_write(struct o2hb_region *reg, return count; } -static ssize_t o2hb_region_blocks_read(struct o2hb_region *reg, - char *page) +static ssize_t o2hb_region_blocks_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", reg->hr_blocks); + return sprintf(page, "%d\n", to_o2hb_region(item)->hr_blocks); } -static ssize_t o2hb_region_blocks_write(struct o2hb_region *reg, +static ssize_t o2hb_region_blocks_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); unsigned long tmp; char *p = (char *)page; @@ -1561,13 +1563,12 @@ static ssize_t o2hb_region_blocks_write(struct o2hb_region *reg, return count; } -static ssize_t o2hb_region_dev_read(struct o2hb_region *reg, - char *page) +static ssize_t o2hb_region_dev_show(struct config_item *item, char *page) { unsigned int ret = 0; - if (reg->hr_bdev) - ret = sprintf(page, "%s\n", reg->hr_dev_name); + if (to_o2hb_region(item)->hr_bdev) + ret = sprintf(page, "%s\n", to_o2hb_region(item)->hr_dev_name); return ret; } @@ -1677,10 +1678,11 @@ out: } /* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */ -static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, +static ssize_t o2hb_region_dev_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); struct task_struct *hb_task; long fd; int sectsize; @@ -1841,9 +1843,9 @@ out: return ret; } -static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, - char *page) +static ssize_t o2hb_region_pid_show(struct config_item *item, char *page) { + struct o2hb_region *reg = to_o2hb_region(item); pid_t pid = 0; spin_lock(&o2hb_live_lock); @@ -1857,92 +1859,23 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, return sprintf(page, "%u\n", pid); } -struct o2hb_region_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2hb_region *, char *); - ssize_t (*store)(struct o2hb_region *, const char *, size_t); -}; - -static struct o2hb_region_attribute o2hb_region_attr_block_bytes = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "block_bytes", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_block_bytes_read, - .store = o2hb_region_block_bytes_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_start_block = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "start_block", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_start_block_read, - .store = o2hb_region_start_block_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_blocks = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "blocks", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_blocks_read, - .store = o2hb_region_blocks_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_dev = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "dev", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_dev_read, - .store = o2hb_region_dev_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_pid = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "pid", - .ca_mode = S_IRUGO | S_IRUSR }, - .show = o2hb_region_pid_read, -}; +CONFIGFS_ATTR(o2hb_region_, block_bytes); +CONFIGFS_ATTR(o2hb_region_, start_block); +CONFIGFS_ATTR(o2hb_region_, blocks); +CONFIGFS_ATTR(o2hb_region_, dev); +CONFIGFS_ATTR_RO(o2hb_region_, pid); static struct configfs_attribute *o2hb_region_attrs[] = { - &o2hb_region_attr_block_bytes.attr, - &o2hb_region_attr_start_block.attr, - &o2hb_region_attr_blocks.attr, - &o2hb_region_attr_dev.attr, - &o2hb_region_attr_pid.attr, + &o2hb_region_attr_block_bytes, + &o2hb_region_attr_start_block, + &o2hb_region_attr_blocks, + &o2hb_region_attr_dev, + &o2hb_region_attr_pid, NULL, }; -static ssize_t o2hb_region_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2hb_region *reg = to_o2hb_region(item); - struct o2hb_region_attribute *o2hb_region_attr = - container_of(attr, struct o2hb_region_attribute, attr); - ssize_t ret = 0; - - if (o2hb_region_attr->show) - ret = o2hb_region_attr->show(reg, page); - return ret; -} - -static ssize_t o2hb_region_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2hb_region *reg = to_o2hb_region(item); - struct o2hb_region_attribute *o2hb_region_attr = - container_of(attr, struct o2hb_region_attribute, attr); - ssize_t ret = -EINVAL; - - if (o2hb_region_attr->store) - ret = o2hb_region_attr->store(reg, page, count); - return ret; -} - static struct configfs_item_operations o2hb_region_item_ops = { .release = o2hb_region_release, - .show_attribute = o2hb_region_show, - .store_attribute = o2hb_region_store, }; static struct config_item_type o2hb_region_type = { @@ -2137,49 +2070,14 @@ unlock: spin_unlock(&o2hb_live_lock); } -struct o2hb_heartbeat_group_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2hb_heartbeat_group *, char *); - ssize_t (*store)(struct o2hb_heartbeat_group *, const char *, size_t); -}; - -static ssize_t o2hb_heartbeat_group_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2hb_heartbeat_group *reg = to_o2hb_heartbeat_group(to_config_group(item)); - struct o2hb_heartbeat_group_attribute *o2hb_heartbeat_group_attr = - container_of(attr, struct o2hb_heartbeat_group_attribute, attr); - ssize_t ret = 0; - - if (o2hb_heartbeat_group_attr->show) - ret = o2hb_heartbeat_group_attr->show(reg, page); - return ret; -} - -static ssize_t o2hb_heartbeat_group_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2hb_heartbeat_group *reg = to_o2hb_heartbeat_group(to_config_group(item)); - struct o2hb_heartbeat_group_attribute *o2hb_heartbeat_group_attr = - container_of(attr, struct o2hb_heartbeat_group_attribute, attr); - ssize_t ret = -EINVAL; - - if (o2hb_heartbeat_group_attr->store) - ret = o2hb_heartbeat_group_attr->store(reg, page, count); - return ret; -} - -static ssize_t o2hb_heartbeat_group_threshold_show(struct o2hb_heartbeat_group *group, - char *page) +static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item, + char *page) { return sprintf(page, "%u\n", o2hb_dead_threshold); } -static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group *group, - const char *page, - size_t count) +static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item, + const char *page, size_t count) { unsigned long tmp; char *p = (char *)page; @@ -2194,17 +2092,15 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group return count; } -static -ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, - char *page) +static ssize_t o2hb_heartbeat_group_mode_show(struct config_item *item, + char *page) { return sprintf(page, "%s\n", o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); } -static -ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, - const char *page, size_t count) +static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item, + const char *page, size_t count) { unsigned int i; int ret; @@ -2229,33 +2125,15 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, } -static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "dead_threshold", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_heartbeat_group_threshold_show, - .store = o2hb_heartbeat_group_threshold_store, -}; - -static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "mode", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_heartbeat_group_mode_show, - .store = o2hb_heartbeat_group_mode_store, -}; +CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold); +CONFIGFS_ATTR(o2hb_heartbeat_group_, mode); static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { - &o2hb_heartbeat_group_attr_threshold.attr, - &o2hb_heartbeat_group_attr_mode.attr, + &o2hb_heartbeat_group_attr_threshold, + &o2hb_heartbeat_group_attr_mode, NULL, }; -static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { - .show_attribute = o2hb_heartbeat_group_show, - .store_attribute = o2hb_heartbeat_group_store, -}; - static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { .make_item = o2hb_heartbeat_group_make_item, .drop_item = o2hb_heartbeat_group_drop_item, @@ -2263,7 +2141,6 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { static struct config_item_type o2hb_heartbeat_group_type = { .ct_group_ops = &o2hb_heartbeat_group_group_ops, - .ct_item_ops = &o2hb_heartbeat_group_item_ops, .ct_attrs = o2hb_heartbeat_group_attrs, .ct_owner = THIS_MODULE, }; diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 441c84e169e6..72afdca3cea7 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -172,9 +172,9 @@ static void o2nm_node_release(struct config_item *item) kfree(node); } -static ssize_t o2nm_node_num_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_num_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", node->nd_num); + return sprintf(page, "%d\n", to_o2nm_node(item)->nd_num); } static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node) @@ -188,15 +188,16 @@ enum { O2NM_NODE_ATTR_NUM = 0, O2NM_NODE_ATTR_PORT, O2NM_NODE_ATTR_ADDRESS, - O2NM_NODE_ATTR_LOCAL, }; -static ssize_t o2nm_node_num_write(struct o2nm_node *node, const char *page, +static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); unsigned long tmp; char *p = (char *)page; + int ret = 0; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) @@ -215,26 +216,30 @@ static ssize_t o2nm_node_num_write(struct o2nm_node *node, const char *page, write_lock(&cluster->cl_nodes_lock); if (cluster->cl_nodes[tmp]) - p = NULL; + ret = -EEXIST; + else if (test_and_set_bit(O2NM_NODE_ATTR_NUM, + &node->nd_set_attributes)) + ret = -EBUSY; else { cluster->cl_nodes[tmp] = node; node->nd_num = tmp; set_bit(tmp, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); - if (p == NULL) - return -EEXIST; + if (ret) + return ret; return count; } -static ssize_t o2nm_node_ipv4_port_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_ipv4_port_show(struct config_item *item, char *page) { - return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port)); + return sprintf(page, "%u\n", ntohs(to_o2nm_node(item)->nd_ipv4_port)); } -static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node, +static ssize_t o2nm_node_ipv4_port_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); unsigned long tmp; char *p = (char *)page; @@ -247,20 +252,23 @@ static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node, if (tmp >= (u16)-1) return -ERANGE; + if (test_and_set_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) + return -EBUSY; node->nd_ipv4_port = htons(tmp); return count; } -static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_ipv4_address_show(struct config_item *item, char *page) { - return sprintf(page, "%pI4\n", &node->nd_ipv4_address); + return sprintf(page, "%pI4\n", &to_o2nm_node(item)->nd_ipv4_address); } -static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, +static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); int ret, i; struct rb_node **p, *parent; @@ -282,6 +290,9 @@ static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, write_lock(&cluster->cl_nodes_lock); if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) ret = -EEXIST; + else if (test_and_set_bit(O2NM_NODE_ATTR_ADDRESS, + &node->nd_set_attributes)) + ret = -EBUSY; else { rb_link_node(&node->nd_ip_node, parent, p); rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); @@ -295,14 +306,15 @@ static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, return count; } -static ssize_t o2nm_node_local_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_local_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", node->nd_local); + return sprintf(page, "%d\n", to_o2nm_node(item)->nd_local); } -static ssize_t o2nm_node_local_write(struct o2nm_node *node, const char *page, +static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); unsigned long tmp; char *p = (char *)page; @@ -349,108 +361,21 @@ static ssize_t o2nm_node_local_write(struct o2nm_node *node, const char *page, return count; } -struct o2nm_node_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2nm_node *, char *); - ssize_t (*store)(struct o2nm_node *, const char *, size_t); -}; - -static struct o2nm_node_attribute o2nm_node_attr_num = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "num", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_num_read, - .store = o2nm_node_num_write, -}; - -static struct o2nm_node_attribute o2nm_node_attr_ipv4_port = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "ipv4_port", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_ipv4_port_read, - .store = o2nm_node_ipv4_port_write, -}; - -static struct o2nm_node_attribute o2nm_node_attr_ipv4_address = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "ipv4_address", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_ipv4_address_read, - .store = o2nm_node_ipv4_address_write, -}; - -static struct o2nm_node_attribute o2nm_node_attr_local = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "local", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_local_read, - .store = o2nm_node_local_write, -}; +CONFIGFS_ATTR(o2nm_node_, num); +CONFIGFS_ATTR(o2nm_node_, ipv4_port); +CONFIGFS_ATTR(o2nm_node_, ipv4_address); +CONFIGFS_ATTR(o2nm_node_, local); static struct configfs_attribute *o2nm_node_attrs[] = { - [O2NM_NODE_ATTR_NUM] = &o2nm_node_attr_num.attr, - [O2NM_NODE_ATTR_PORT] = &o2nm_node_attr_ipv4_port.attr, - [O2NM_NODE_ATTR_ADDRESS] = &o2nm_node_attr_ipv4_address.attr, - [O2NM_NODE_ATTR_LOCAL] = &o2nm_node_attr_local.attr, + &o2nm_node_attr_num, + &o2nm_node_attr_ipv4_port, + &o2nm_node_attr_ipv4_address, + &o2nm_node_attr_local, NULL, }; -static int o2nm_attr_index(struct configfs_attribute *attr) -{ - int i; - for (i = 0; i < ARRAY_SIZE(o2nm_node_attrs); i++) { - if (attr == o2nm_node_attrs[i]) - return i; - } - BUG(); - return 0; -} - -static ssize_t o2nm_node_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_node_attribute *o2nm_node_attr = - container_of(attr, struct o2nm_node_attribute, attr); - ssize_t ret = 0; - - if (o2nm_node_attr->show) - ret = o2nm_node_attr->show(node, page); - return ret; -} - -static ssize_t o2nm_node_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_node_attribute *o2nm_node_attr = - container_of(attr, struct o2nm_node_attribute, attr); - ssize_t ret; - int attr_index = o2nm_attr_index(attr); - - if (o2nm_node_attr->store == NULL) { - ret = -EINVAL; - goto out; - } - - if (test_bit(attr_index, &node->nd_set_attributes)) - return -EBUSY; - - ret = o2nm_node_attr->store(node, page, count); - if (ret < count) - goto out; - - set_bit(attr_index, &node->nd_set_attributes); -out: - return ret; -} - static struct configfs_item_operations o2nm_node_item_ops = { .release = o2nm_node_release, - .show_attribute = o2nm_node_show, - .store_attribute = o2nm_node_store, }; static struct config_item_type o2nm_node_type = { @@ -475,12 +400,6 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group) } #endif -struct o2nm_cluster_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2nm_cluster *, char *); - ssize_t (*store)(struct o2nm_cluster *, const char *, size_t); -}; - static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, unsigned int *val) { @@ -501,15 +420,16 @@ static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, return count; } -static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_idle_timeout_ms_show(struct config_item *item, + char *page) { - return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); + return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_idle_timeout_ms); } -static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_idle_timeout_ms_store(struct config_item *item, + const char *page, size_t count) { + struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; @@ -536,15 +456,17 @@ static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( return ret; } -static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_keepalive_delay_ms_show( + struct config_item *item, char *page) { - return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); + return sprintf(page, "%u\n", + to_o2nm_cluster(item)->cl_keepalive_delay_ms); } -static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_keepalive_delay_ms_store( + struct config_item *item, const char *page, size_t count) { + struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; @@ -571,22 +493,24 @@ static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( return ret; } -static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_reconnect_delay_ms_show( + struct config_item *item, char *page) { - return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); + return sprintf(page, "%u\n", + to_o2nm_cluster(item)->cl_reconnect_delay_ms); } -static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_reconnect_delay_ms_store( + struct config_item *item, const char *page, size_t count) { return o2nm_cluster_attr_write(page, count, - &cluster->cl_reconnect_delay_ms); + &to_o2nm_cluster(item)->cl_reconnect_delay_ms); } -static ssize_t o2nm_cluster_attr_fence_method_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_fence_method_show( + struct config_item *item, char *page) { + struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret = 0; if (cluster) @@ -595,8 +519,8 @@ static ssize_t o2nm_cluster_attr_fence_method_read( return ret; } -static ssize_t o2nm_cluster_attr_fence_method_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_fence_method_store( + struct config_item *item, const char *page, size_t count) { unsigned int i; @@ -608,10 +532,10 @@ static ssize_t o2nm_cluster_attr_fence_method_write( continue; if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) continue; - if (cluster->cl_fence_method != i) { + if (to_o2nm_cluster(item)->cl_fence_method != i) { printk(KERN_INFO "ocfs2: Changing fence method to %s\n", o2nm_fence_method_desc[i]); - cluster->cl_fence_method = i; + to_o2nm_cluster(item)->cl_fence_method = i; } return count; } @@ -620,79 +544,18 @@ bail: return -EINVAL; } -static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "idle_timeout_ms", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_idle_timeout_ms_read, - .store = o2nm_cluster_attr_idle_timeout_ms_write, -}; - -static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "keepalive_delay_ms", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_keepalive_delay_ms_read, - .store = o2nm_cluster_attr_keepalive_delay_ms_write, -}; - -static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "reconnect_delay_ms", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_reconnect_delay_ms_read, - .store = o2nm_cluster_attr_reconnect_delay_ms_write, -}; - -static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "fence_method", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_fence_method_read, - .store = o2nm_cluster_attr_fence_method_write, -}; +CONFIGFS_ATTR(o2nm_cluster_, idle_timeout_ms); +CONFIGFS_ATTR(o2nm_cluster_, keepalive_delay_ms); +CONFIGFS_ATTR(o2nm_cluster_, reconnect_delay_ms); +CONFIGFS_ATTR(o2nm_cluster_, fence_method); static struct configfs_attribute *o2nm_cluster_attrs[] = { - &o2nm_cluster_attr_idle_timeout_ms.attr, - &o2nm_cluster_attr_keepalive_delay_ms.attr, - &o2nm_cluster_attr_reconnect_delay_ms.attr, - &o2nm_cluster_attr_fence_method.attr, + &o2nm_cluster_attr_idle_timeout_ms, + &o2nm_cluster_attr_keepalive_delay_ms, + &o2nm_cluster_attr_reconnect_delay_ms, + &o2nm_cluster_attr_fence_method, NULL, }; -static ssize_t o2nm_cluster_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2nm_cluster *cluster = to_o2nm_cluster(item); - struct o2nm_cluster_attribute *o2nm_cluster_attr = - container_of(attr, struct o2nm_cluster_attribute, attr); - ssize_t ret = 0; - - if (o2nm_cluster_attr->show) - ret = o2nm_cluster_attr->show(cluster, page); - return ret; -} - -static ssize_t o2nm_cluster_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2nm_cluster *cluster = to_o2nm_cluster(item); - struct o2nm_cluster_attribute *o2nm_cluster_attr = - container_of(attr, struct o2nm_cluster_attribute, attr); - ssize_t ret; - - if (o2nm_cluster_attr->store == NULL) { - ret = -EINVAL; - goto out; - } - - ret = o2nm_cluster_attr->store(cluster, page, count); - if (ret < count) - goto out; -out: - return ret; -} static struct config_item *o2nm_node_group_make_item(struct config_group *group, const char *name) @@ -773,8 +636,6 @@ static void o2nm_cluster_release(struct config_item *item) static struct configfs_item_operations o2nm_cluster_item_ops = { .release = o2nm_cluster_release, - .show_attribute = o2nm_cluster_show, - .store_attribute = o2nm_cluster_store, }; static struct config_item_type o2nm_cluster_type = { diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b48ac25d8a7..a03f6f433075 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -372,6 +372,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ebfdea78659b..e9164f09841b 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7229,9 +7229,10 @@ leave: /* * 'security' attributes support */ -static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, +static size_t ocfs2_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, const char *name, - size_t name_len, int type) + size_t name_len) { const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -7244,8 +7245,9 @@ static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, return total_len; } -static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int ocfs2_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7253,8 +7255,9 @@ static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, name, buffer, size); } -static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int ocfs2_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7319,9 +7322,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = { /* * 'trusted' attributes support */ -static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, +static size_t ocfs2_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, const char *name, - size_t name_len, int type) + size_t name_len) { const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -7337,8 +7341,9 @@ static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, return total_len; } -static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7346,8 +7351,9 @@ static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, name, buffer, size); } -static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7366,9 +7372,10 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = { /* * 'user' attributes support */ -static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, +static size_t ocfs2_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, const char *name, - size_t name_len, int type) + size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -7385,8 +7392,9 @@ static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, return total_len; } -static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int ocfs2_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); @@ -7398,8 +7406,9 @@ static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, buffer, size); } -static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int ocfs2_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); diff --git a/fs/pipe.c b/fs/pipe.c index 8865f7963700..42cf8ddf0e55 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -366,18 +366,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { - int error = ops->confirm(pipe, buf); - if (error) + ret = ops->confirm(pipe, buf); + if (ret) goto out; ret = copy_page_from_iter(buf->page, offset, chars, from); if (unlikely(ret < chars)) { - error = -EFAULT; + ret = -EFAULT; goto out; } do_wakeup = 1; - buf->len += chars; - ret = chars; + buf->len += ret; if (!iov_iter_count(from)) goto out; } @@ -693,17 +692,20 @@ int create_pipe_files(struct file **res, int flags) d_instantiate(path.dentry, inode); - err = -ENFILE; f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); - if (IS_ERR(f)) + if (IS_ERR(f)) { + err = PTR_ERR(f); goto err_dentry; + } f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); - if (IS_ERR(res[0])) + if (IS_ERR(res[0])) { + err = PTR_ERR(res[0]); goto err_file; + } path_get(&path); res[0]->private_data = inode->i_pipe; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4fb17ded7d47..4adde1e2cbec 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -762,18 +762,21 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, EXPORT_SYMBOL (posix_acl_to_xattr); static int -posix_acl_xattr_get(struct dentry *dentry, const char *name, - void *value, size_t size, int type) +posix_acl_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *value, size_t size) { struct posix_acl *acl; int error; + if (strcmp(name, "") != 0) + return -EINVAL; if (!IS_POSIXACL(d_backing_inode(dentry))) return -EOPNOTSUPP; if (d_is_symlink(dentry)) return -EOPNOTSUPP; - acl = get_acl(d_backing_inode(dentry), type); + acl = get_acl(d_backing_inode(dentry), handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -786,19 +789,22 @@ posix_acl_xattr_get(struct dentry *dentry, const char *name, } static int -posix_acl_xattr_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +posix_acl_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct inode *inode = d_backing_inode(dentry); struct posix_acl *acl = NULL; int ret; + if (strcmp(name, "") != 0) + return -EINVAL; if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; if (!inode->i_op->set_acl) return -EOPNOTSUPP; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return value ? -EACCES : 0; if (!inode_owner_or_capable(inode)) return -EPERM; @@ -815,28 +821,22 @@ posix_acl_xattr_set(struct dentry *dentry, const char *name, } } - ret = inode->i_op->set_acl(inode, acl, type); + ret = inode->i_op->set_acl(inode, acl, handler->flags); out: posix_acl_release(acl); return ret; } static size_t -posix_acl_xattr_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +posix_acl_xattr_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { - const char *xname; + const char *xname = handler->prefix; size_t size; if (!IS_POSIXACL(d_backing_inode(dentry))) - return -EOPNOTSUPP; - if (d_is_symlink(dentry)) - return -EOPNOTSUPP; - - if (type == ACL_TYPE_ACCESS) - xname = POSIX_ACL_XATTR_ACCESS; - else - xname = POSIX_ACL_XATTR_DEFAULT; + return 0; size = strlen(xname) + 1; if (list && size <= list_size) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e87f9b52bf06..66b26fdfff8d 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -778,7 +778,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; - return handler->get(dentry, name, buffer, size, handler->flags); + return handler->get(handler, dentry, name, buffer, size); } /* @@ -797,7 +797,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; - return handler->set(dentry, name, value, size, flags, handler->flags); + return handler->set(handler, dentry, name, value, size, flags); } /* @@ -814,7 +814,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name) if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; - return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags); + return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE); } struct listxattr_buf { @@ -842,14 +842,14 @@ static int listxattr_filler(struct dir_context *ctx, const char *name, if (!handler) /* Unsupported xattr name */ return 0; if (b->buf) { - size = handler->list(b->dentry, b->buf + b->pos, - b->size, name, namelen, - handler->flags); + size = handler->list(handler, b->dentry, + b->buf + b->pos, b->size, name, + namelen); if (size > b->size) return -ERANGE; } else { - size = handler->list(b->dentry, NULL, 0, name, - namelen, handler->flags); + size = handler->list(handler, b->dentry, + NULL, 0, name, namelen); } b->pos += size; diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 9a3b0616f283..ac659af431ae 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -9,8 +9,8 @@ #include <linux/uaccess.h> static int -security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) +security_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *buffer, size_t size) { if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) return -EINVAL; @@ -22,8 +22,8 @@ security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, } static int -security_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) +security_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *buffer, size_t size, int flags) { if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) return -EINVAL; @@ -34,8 +34,9 @@ security_set(struct dentry *dentry, const char *name, const void *buffer, return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); } -static size_t security_list(struct dentry *dentry, char *list, size_t list_len, - const char *name, size_t namelen, int handler_flags) +static size_t security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_len, + const char *name, size_t namelen) { const size_t len = namelen + 1; diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index e4f1343714e0..a338adf1b8b4 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -8,8 +8,8 @@ #include <linux/uaccess.h> static int -trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) +trusted_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *buffer, size_t size) { if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) return -EINVAL; @@ -21,8 +21,8 @@ trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, } static int -trusted_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) +trusted_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *buffer, size_t size, int flags) { if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) return -EINVAL; @@ -33,8 +33,9 @@ trusted_set(struct dentry *dentry, const char *name, const void *buffer, return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); } -static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags) +static size_t trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t len = name_len + 1; diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index d0b08d3e5689..39c9667191c5 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -7,8 +7,8 @@ #include <linux/uaccess.h> static int -user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) +user_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *buffer, size_t size) { if (strlen(name) < sizeof(XATTR_USER_PREFIX)) @@ -19,8 +19,8 @@ user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, } static int -user_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) +user_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *buffer, size_t size, int flags) { if (strlen(name) < sizeof(XATTR_USER_PREFIX)) return -EINVAL; @@ -30,8 +30,9 @@ user_set(struct dentry *dentry, const char *name, const void *buffer, return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); } -static size_t user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags) +static size_t user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t len = name_len + 1; diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index e5e0ddf5b143..6a4cc344085c 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c @@ -68,8 +68,8 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer, name_size = le16_to_cpu(entry.size); handler = squashfs_xattr_handler(le16_to_cpu(entry.type)); if (handler) - prefix_size = handler->list(d, buffer, rest, NULL, - name_size, handler->flags); + prefix_size = handler->list(handler, d, buffer, rest, + NULL, name_size); if (prefix_size) { if (buffer) { if (prefix_size + name_size + 1 > rest) { @@ -212,88 +212,68 @@ failed: } -/* - * User namespace support - */ -static size_t squashfs_user_list(struct dentry *d, char *list, size_t list_size, - const char *name, size_t name_len, int type) +static size_t squashfs_xattr_handler_list(const struct xattr_handler *handler, + struct dentry *d, char *list, + size_t list_size, const char *name, + size_t name_len) { - if (list && XATTR_USER_PREFIX_LEN <= list_size) - memcpy(list, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); - return XATTR_USER_PREFIX_LEN; + int len = strlen(handler->prefix); + + if (list && len <= list_size) + memcpy(list, handler->prefix, len); + return len; } -static int squashfs_user_get(struct dentry *d, const char *name, void *buffer, - size_t size, int type) +static int squashfs_xattr_handler_get(const struct xattr_handler *handler, + struct dentry *d, const char *name, + void *buffer, size_t size) { if (name[0] == '\0') return -EINVAL; - return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_USER, name, + return squashfs_xattr_get(d_inode(d), handler->flags, name, buffer, size); } +/* + * User namespace support + */ static const struct xattr_handler squashfs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, - .list = squashfs_user_list, - .get = squashfs_user_get + .flags = SQUASHFS_XATTR_USER, + .list = squashfs_xattr_handler_list, + .get = squashfs_xattr_handler_get }; /* * Trusted namespace support */ -static size_t squashfs_trusted_list(struct dentry *d, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t squashfs_trusted_xattr_handler_list(const struct xattr_handler *handler, + struct dentry *d, char *list, + size_t list_size, const char *name, + size_t name_len) { if (!capable(CAP_SYS_ADMIN)) return 0; - - if (list && XATTR_TRUSTED_PREFIX_LEN <= list_size) - memcpy(list, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); - return XATTR_TRUSTED_PREFIX_LEN; -} - -static int squashfs_trusted_get(struct dentry *d, const char *name, - void *buffer, size_t size, int type) -{ - if (name[0] == '\0') - return -EINVAL; - - return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_TRUSTED, name, - buffer, size); + return squashfs_xattr_handler_list(handler, d, list, list_size, name, + name_len); } static const struct xattr_handler squashfs_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .list = squashfs_trusted_list, - .get = squashfs_trusted_get + .flags = SQUASHFS_XATTR_TRUSTED, + .list = squashfs_trusted_xattr_handler_list, + .get = squashfs_xattr_handler_get }; /* * Security namespace support */ -static size_t squashfs_security_list(struct dentry *d, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - if (list && XATTR_SECURITY_PREFIX_LEN <= list_size) - memcpy(list, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); - return XATTR_SECURITY_PREFIX_LEN; -} - -static int squashfs_security_get(struct dentry *d, const char *name, - void *buffer, size_t size, int type) -{ - if (name[0] == '\0') - return -EINVAL; - - return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_SECURITY, name, - buffer, size); -} - static const struct xattr_handler squashfs_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = squashfs_security_list, - .get = squashfs_security_get + .flags = SQUASHFS_XATTR_SECURITY, + .list = squashfs_xattr_handler_list, + .get = squashfs_xattr_handler_get }; static const struct xattr_handler *squashfs_xattr_handler(int type) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index e1236594fffe..dc1358b5ec95 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -73,13 +73,26 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, } if (grp->bin_attrs) { - for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) { + umode_t mode = (*bin_attr)->attr.mode; + if (update) kernfs_remove_by_name(parent, (*bin_attr)->attr.name); + if (grp->is_bin_visible) { + mode = grp->is_bin_visible(kobj, *bin_attr, i); + if (!mode) + continue; + } + + WARN(mode & ~(SYSFS_PREALLOC | 0664), + "Attribute %s: Invalid permissions 0%o\n", + (*bin_attr)->attr.name, mode); + + mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, &(*bin_attr)->attr, true, - (*bin_attr)->attr.mode, NULL); + mode, NULL); if (error) break; } diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index ba66d508006a..7ff7712f284e 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -35,3 +35,18 @@ config UBIFS_FS_ZLIB default y help Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. + +config UBIFS_ATIME_SUPPORT + bool "Access time support" if UBIFS_FS + depends on UBIFS_FS + default n + help + Originally UBIFS did not support atime, because it looked like a bad idea due + increased flash wear. This option adds atime support and it is disabled by default + to preserve the old behavior. If you enable this option, UBIFS starts updating atime, + which means that file-system read operations will cause writes (inode atime + updates). This may affect file-system performance and increase flash device wear, + so be careful. How often atime is updated depends on the selected strategy: + strictatime is the "heavy", relatime is "lighter", etc. + + If unsure, say 'N' diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4c46a9865fa7..595ca0debe11 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2573,7 +2573,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, { int err, failing; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; failing = power_cut_emulated(c, lnum, 1); @@ -2595,7 +2595,7 @@ int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 1)) return -EROFS; @@ -2611,7 +2611,7 @@ int dbg_leb_unmap(struct ubifs_info *c, int lnum) { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 0)) return -EROFS; @@ -2627,7 +2627,7 @@ int dbg_leb_map(struct ubifs_info *c, int lnum) { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 0)) return -EROFS; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c27c66c224a..e49bd2808bf3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -449,13 +449,14 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } out: + kfree(file->private_data); + file->private_data = NULL; + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; } - kfree(file->private_data); - file->private_data = NULL; /* 2 is a special value indicating that there are no more direntries */ ctx->pos = 2; return 0; @@ -787,9 +788,6 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); - if (!new_valid_dev(rdev)) - return -EINVAL; - if (S_ISBLK(mode) || S_ISCHR(mode)) { dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); if (!dev) @@ -1188,6 +1186,9 @@ const struct inode_operations ubifs_dir_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct file_operations ubifs_dir_operations = { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a3dfe2ae79f2..0edc12856147 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1354,6 +1354,47 @@ static inline int mctime_update_needed(const struct inode *inode, return 0; } +#ifdef CONFIG_UBIFS_ATIME_SUPPORT +/** + * ubifs_update_time - update time of inode. + * @inode: inode to update + * + * This function updates time of the inode. + */ +int ubifs_update_time(struct inode *inode, struct timespec *time, + int flags) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; + int iflags = I_DIRTY_TIME; + int err, release; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&ui->ui_mutex); + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + + if (!(inode->i_sb->s_flags & MS_LAZYTIME)) + iflags |= I_DIRTY_SYNC; + + release = ui->dirty; + __mark_inode_dirty(inode, iflags); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_budget(c, &req); + return 0; +} +#endif + /** * update_ctime - update mtime and ctime of an inode. * @inode: inode to update @@ -1537,6 +1578,9 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) if (err) return err; vma->vm_ops = &ubifs_file_vm_ops; +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + file_accessed(file); +#endif return 0; } @@ -1557,6 +1601,9 @@ const struct inode_operations ubifs_file_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct inode_operations ubifs_symlink_inode_operations = { @@ -1568,6 +1615,9 @@ const struct inode_operations ubifs_symlink_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct file_operations ubifs_file_operations = { diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index dc9f27e9d61b..9a517109da0f 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1498,11 +1498,10 @@ static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, } /* nnode is being committed, so copy it */ - n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + n = kmemdup(nnode, sizeof(struct ubifs_nnode), GFP_NOFS); if (unlikely(!n)) return ERR_PTR(-ENOMEM); - memcpy(n, nnode, sizeof(struct ubifs_nnode)); n->cnext = NULL; __set_bit(DIRTY_CNODE, &n->flags); __clear_bit(COW_CNODE, &n->flags); @@ -1549,11 +1548,10 @@ static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, } /* pnode is being committed, so copy it */ - p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + p = kmemdup(pnode, sizeof(struct ubifs_pnode), GFP_NOFS); if (unlikely(!p)) return ERR_PTR(-ENOMEM); - memcpy(p, pnode, sizeof(struct ubifs_pnode)); p->cnext = NULL; __set_bit(DIRTY_CNODE, &p->flags); __clear_bit(COW_CNODE, &p->flags); diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index ee7cb5ebb6e8..8ece6ca58c0b 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -155,13 +155,8 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) */ static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) { - if (new_valid_dev(rdev)) { - dev->new = cpu_to_le32(new_encode_dev(rdev)); - return sizeof(dev->new); - } else { - dev->huge = cpu_to_le64(huge_encode_dev(rdev)); - return sizeof(dev->huge); - } + dev->new = cpu_to_le32(new_encode_dev(rdev)); + return sizeof(dev->new); } /** diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 695fc71d5244..586d59347fff 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -789,7 +789,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ ubifs_err(c, "corruption %d", ret); - ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + ubifs_scan_a_node(c, buf, len, lnum, offs, 0); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; @@ -1331,8 +1331,7 @@ void ubifs_destroy_size_tree(struct ubifs_info *c) struct size_entry *e, *n; rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { - if (e->inode) - iput(e->inode); + iput(e->inode); kfree(e); } @@ -1533,8 +1532,7 @@ int ubifs_recover_size(struct ubifs_info *c) err = fix_size_in_place(c, e); if (err) return err; - if (e->inode) - iput(e->inode); + iput(e->inode); } } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 9547a27868ad..1fd90c079537 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -128,7 +128,10 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) if (err) goto out_ino; - inode->i_flags |= (S_NOCMTIME | S_NOATIME); + inode->i_flags |= S_NOCMTIME; +#ifndef CONFIG_UBIFS_ATIME_SUPPORT + inode->i_flags |= S_NOATIME; +#endif set_nlink(inode, le32_to_cpu(ino->nlink)); i_uid_write(inode, le32_to_cpu(ino->uid)); i_gid_write(inode, le32_to_cpu(ino->gid)); @@ -2037,7 +2040,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; sb->s_op = &ubifs_super_operations; - sb->s_xattr = ubifs_xattr_handlers; mutex_lock(&c->umount_mutex); err = mount_ubifs(c); @@ -2139,7 +2141,12 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, if (err) goto out_deact; /* We do not support atime */ - sb->s_flags |= MS_ACTIVE | MS_NOATIME; + sb->s_flags |= MS_ACTIVE; +#ifndef CONFIG_UBIFS_ATIME_SUPPORT + sb->s_flags |= MS_NOATIME; +#else + ubifs_msg(c, "full atime support is enabled."); +#endif } /* 'fill_super()' opens ubi again so we must close it here */ diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 957f5757f374..fa9a20cc60d6 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -198,11 +198,10 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, { struct ubifs_znode *zn; - zn = kmalloc(c->max_znode_sz, GFP_NOFS); + zn = kmemdup(znode, c->max_znode_sz, GFP_NOFS); if (unlikely(!zn)) return ERR_PTR(-ENOMEM); - memcpy(zn, znode, c->max_znode_sz); zn->cnext = NULL; __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index de759022f3d6..a5697de763f5 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -858,9 +858,9 @@ struct ubifs_compressor { * @mod_dent: non-zero if the operation removes or modifies an existing * directory entry * @new_ino: non-zero if the operation adds a new inode - * @new_ino_d: now much data newly created inode contains + * @new_ino_d: how much data newly created inode contains * @dirtied_ino: how many inodes the operation makes dirty - * @dirtied_ino_d: now much data dirtied inode contains + * @dirtied_ino_d: how much data dirtied inode contains * @idx_growth: how much the index will supposedly grow * @data_growth: how much new data the operation will supposedly add * @dd_growth: how much data that makes other data dirty the operation will @@ -1470,7 +1470,6 @@ extern spinlock_t ubifs_infos_lock; extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; extern const struct super_operations ubifs_super_operations; -extern const struct xattr_handler *ubifs_xattr_handlers[]; extern const struct address_space_operations ubifs_file_address_operations; extern const struct file_operations ubifs_file_operations; extern const struct inode_operations ubifs_file_inode_operations; @@ -1746,6 +1745,9 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); +#ifdef CONFIG_UBIFS_ATIME_SUPPORT +int ubifs_update_time(struct inode *inode, struct timespec *time, int flags); +#endif /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index fd65b3f1923c..e8b01b721e99 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -200,6 +200,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, int err; struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); + void *buf = NULL; struct ubifs_budget_req req = { .dirtied_ino = 2, .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; @@ -208,14 +209,17 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, if (err) return err; - kfree(ui->data); - ui->data = kmemdup(value, size, GFP_NOFS); - if (!ui->data) { + buf = kmemdup(value, size, GFP_NOFS); + if (!buf) { err = -ENOMEM; goto out_free; } + mutex_lock(&ui->ui_mutex); + kfree(ui->data); + ui->data = buf; inode->i_size = ui->ui_size = size; ui->data_len = size; + mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); host->i_ctime = ubifs_current_time(host); @@ -409,6 +413,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, ubifs_assert(inode->i_size == ui->data_len); ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); + mutex_lock(&ui->ui_mutex); if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { @@ -423,6 +428,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, err = ui->data_len; out_iput: + mutex_unlock(&ui->ui_mutex); iput(inode); out_unlock: kfree(xent); @@ -582,46 +588,6 @@ out_free: return err; } -static size_t security_listxattr(struct dentry *d, char *list, size_t list_size, - const char *name, size_t name_len, int flags) -{ - const int prefix_len = XATTR_SECURITY_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(list + prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - - return total_len; -} - -static int security_getxattr(struct dentry *d, const char *name, void *buffer, - size_t size, int flags) -{ - return ubifs_getxattr(d, name, buffer, size); -} - -static int security_setxattr(struct dentry *d, const char *name, - const void *value, size_t size, int flags, - int handler_flags) -{ - return ubifs_setxattr(d, name, value, size, flags); -} - -static const struct xattr_handler ubifs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = security_listxattr, - .get = security_getxattr, - .set = security_setxattr, -}; - -const struct xattr_handler *ubifs_xattr_handlers[] = { - &ubifs_xattr_security_handler, - NULL, -}; - static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { diff --git a/fs/xattr.c b/fs/xattr.c index 072fee1258dd..9b932b95d74e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -720,7 +720,7 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (!handler) return -EOPNOTSUPP; - return handler->get(dentry, name, buffer, size, handler->flags); + return handler->get(handler, dentry, name, buffer, size); } /* @@ -735,15 +735,15 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (!buffer) { for_each_xattr_handler(handlers, handler) { - size += handler->list(dentry, NULL, 0, NULL, 0, - handler->flags); + size += handler->list(handler, dentry, NULL, 0, + NULL, 0); } } else { char *buf = buffer; for_each_xattr_handler(handlers, handler) { - size = handler->list(dentry, buf, buffer_size, - NULL, 0, handler->flags); + size = handler->list(handler, dentry, buf, buffer_size, + NULL, 0); if (size > buffer_size) return -ERANGE; buf += size; @@ -767,7 +767,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (!handler) return -EOPNOTSUPP; - return handler->set(dentry, name, value, size, flags, handler->flags); + return handler->set(handler, dentry, name, value, size, flags); } /* @@ -782,8 +782,7 @@ generic_removexattr(struct dentry *dentry, const char *name) handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (!handler) return -EOPNOTSUPP; - return handler->set(dentry, name, NULL, 0, - XATTR_REPLACE, handler->flags); + return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE); } EXPORT_SYMBOL(generic_getxattr); @@ -791,6 +790,30 @@ EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); +/** + * xattr_full_name - Compute full attribute name from suffix + * + * @handler: handler of the xattr_handler operation + * @name: name passed to the xattr_handler operation + * + * The get and set xattr handler operations are called with the remainder of + * the attribute name after skipping the handler's prefix: for example, "foo" + * is passed to the get operation of a handler with prefix "user." to get + * attribute "user.foo". The full name is still "there" in the name though. + * + * Note: the list xattr handler operation when called from the vfs is passed a + * NULL name; some file systems use this operation internally, with varying + * semantics. + */ +const char *xattr_full_name(const struct xattr_handler *handler, + const char *name) +{ + size_t prefix_len = strlen(handler->prefix); + + return name - prefix_len; +} +EXPORT_SYMBOL(xattr_full_name); + /* * Allocate new xattr and copy in the value; but leave the name to callers. */ diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a096841bd06c..f64639176670 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -84,6 +84,7 @@ xfs-y += xfs_aops.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_stats.o \ xfs_super.o \ xfs_symlink.o \ xfs_sysfs.o \ @@ -118,7 +119,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o -xfs-$(CONFIG_PROC_FS) += xfs_stats.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a7a3a63bb360..686ba6fb20dd 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) return ptr; if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); + "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)", + current->comm, current->pid, + (unsigned int)size, __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } @@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) return ptr; if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", + current->comm, current->pid, + __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ffad7f20342f..3479294c1d58 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -482,7 +482,9 @@ xfs_agfl_verify( be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) return false; } - return true; + + return xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); } static void @@ -651,8 +653,8 @@ xfs_alloc_ag_vextent( -((long)(args->len))); } - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); + XFS_STATS_INC(args->mp, xs_allocx); + XFS_STATS_ADD(args->mp, xs_allocb, args->len); return error; } @@ -1808,8 +1810,8 @@ xfs_free_ag_extent( if (!isfl) xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); + XFS_STATS_INC(mp, xs_freex); + XFS_STATS_ADD(mp, xs_freeb, len); trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); @@ -2259,9 +2261,13 @@ xfs_agf_verify( { struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return false; + if (!xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) + return false; + } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && @@ -2503,7 +2509,7 @@ xfs_alloc_vextent( * Try near allocation first, then anywhere-in-ag after * the first a.g. fails. */ - if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && + if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) && (mp->m_flags & XFS_MOUNT_32BITINODES)) { args->fsbno = XFS_AGB_TO_FSB(mp, ((mp->m_agfrotor / rotorstep) % @@ -2634,6 +2640,14 @@ xfs_alloc_vextent( XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len); #endif + + /* Zero the extent if we were asked to do so */ + if (args->userdata & XFS_ALLOC_USERDATA_ZERO) { + error = xfs_zero_extent(args->ip, args->fsbno, args->len); + if (error) + goto error0; + } + } xfs_perag_put(args->pag); return 0; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index ca1c8168373a..0ecde4d5cac8 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg { struct xfs_mount *mp; /* file system mount point */ struct xfs_buf *agbp; /* buffer for a.g. freelist header */ struct xfs_perag *pag; /* per-ag struct for this agno */ + struct xfs_inode *ip; /* for userdata zeroing method */ xfs_fsblock_t fsbno; /* file system block number */ xfs_agnumber_t agno; /* allocation group number */ xfs_agblock_t agbno; /* allocation group-relative block # */ @@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg { char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ char isfl; /* set if is freelist blocks - !acctg */ - char userdata; /* set if this is user data */ + char userdata; /* mask defining userdata treatment */ xfs_fsblock_t firstblock; /* io first block allocated */ } xfs_alloc_arg_t; /* * Defines for userdata */ -#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ -#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ +#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ +#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ +#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, struct xfs_perag *pag, xfs_extlen_t need); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index ff065578969f..f949818fa1c7 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -125,7 +125,7 @@ xfs_attr_get( uint lock_mode; int error; - XFS_STATS_INC(xs_attr_get); + XFS_STATS_INC(ip->i_mount, xs_attr_get); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; @@ -209,7 +209,7 @@ xfs_attr_set( int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, committed, local; - XFS_STATS_INC(xs_attr_set); + XFS_STATS_INC(mp, xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; @@ -412,7 +412,7 @@ xfs_attr_remove( xfs_fsblock_t firstblock; int error; - XFS_STATS_INC(xs_attr_remove); + XFS_STATS_INC(mp, xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 33df52d97ec7..aa187f7ba2dd 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -41,6 +41,7 @@ #include "xfs_buf_item.h" #include "xfs_cksum.h" #include "xfs_dir2.h" +#include "xfs_log.h" /* @@ -266,6 +267,8 @@ xfs_attr3_leaf_verify( return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) + return false; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) return false; diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index f38f9bd81557..5ab95ffa4ae9 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -107,7 +107,7 @@ xfs_attr3_rmt_verify( if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) return false; if (be32_to_cpu(rmt->rm_offset) + - be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) return false; if (rmt->rm_owner == 0) return false; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8e2010d53b07..119c2422aac7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -948,14 +948,16 @@ xfs_bmap_local_to_extents( bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); /* - * Initialise the block and copy the data + * Initialize the block, copy the data and log the remote buffer. * - * Note: init_fn must set the buffer log item type correctly! + * The callout is responsible for logging because the remote format + * might differ from the local format and thus we don't know how much to + * log here. Note that init_fn must also set the buffer log item type + * correctly. */ init_fn(tp, bp, ip, ifp); - /* account for the change in fork size and log everything */ - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + /* account for the change in fork size */ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); xfs_bmap_local_to_extents_empty(ip, whichfork); flags |= XFS_ILOG_CORE; @@ -1435,7 +1437,7 @@ xfs_bmap_search_extents( xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - XFS_STATS_INC(xs_look_exlist); + XFS_STATS_INC(ip->i_mount, xs_look_exlist); ifp = XFS_IFORK_PTR(ip, fork); ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); @@ -1732,7 +1734,7 @@ xfs_bmap_add_extent_delay_real( ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); #define LEFT r[0] #define RIGHT r[1] @@ -2286,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real( ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); ASSERT(!isnullstartblock(new->br_startblock)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); #define LEFT r[0] #define RIGHT r[1] @@ -2946,7 +2948,7 @@ xfs_bmap_add_extent_hole_real( ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); state = 0; if (whichfork == XFS_ATTR_FORK) @@ -3800,8 +3802,13 @@ xfs_bmap_btalloc( args.wasdel = ap->wasdel; args.isfl = 0; args.userdata = ap->userdata; - if ((error = xfs_alloc_vextent(&args))) + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) + args.ip = ap->ip; + + error = xfs_alloc_vextent(&args); + if (error) return error; + if (tryagain && args.fsbno == NULLFSBLOCK) { /* * Exact allocation failed. Now try with alignment @@ -4036,7 +4043,7 @@ xfs_bmapi_read( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - XFS_STATS_INC(xs_blk_mapr); + XFS_STATS_INC(mp, xs_blk_mapr); ifp = XFS_IFORK_PTR(ip, whichfork); @@ -4221,7 +4228,7 @@ xfs_bmapi_delay( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - XFS_STATS_INC(xs_blk_mapw); + XFS_STATS_INC(mp, xs_blk_mapw); if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); @@ -4300,11 +4307,14 @@ xfs_bmapi_allocate( /* * Indicate if this is the first user data in the file, or just any - * user data. + * user data. And if it is userdata, indicate whether it needs to + * be initialised to zero during allocation. */ if (!(bma->flags & XFS_BMAPI_METADATA)) { bma->userdata = (bma->offset == 0) ? XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; + if (bma->flags & XFS_BMAPI_ZERO) + bma->userdata |= XFS_ALLOC_USERDATA_ZERO; } bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; @@ -4419,6 +4429,17 @@ xfs_bmapi_convert_unwritten( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + /* + * Before insertion into the bmbt, zero the range being converted + * if required. + */ + if (flags & XFS_BMAPI_ZERO) { + error = xfs_zero_extent(bma->ip, mval->br_startblock, + mval->br_blockcount); + if (error) + return error; + } + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, &bma->cur, mval, bma->firstblock, bma->flist, &tmp_logflags); @@ -4512,6 +4533,18 @@ xfs_bmapi_write( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + /* zeroing is for currently only for data extents, not metadata */ + ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != + (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); + /* + * we can allocate unwritten extents or pre-zero allocated blocks, + * but it makes no sense to do both at once. This would result in + * zeroing the unwritten extent twice, but it still being an + * unwritten extent.... + */ + ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != + (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); + if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), @@ -4525,7 +4558,7 @@ xfs_bmapi_write( ifp = XFS_IFORK_PTR(ip, whichfork); - XFS_STATS_INC(xs_blk_mapw); + XFS_STATS_INC(mp, xs_blk_mapw); if (*firstblock == NULLFSBLOCK) { if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) @@ -4718,12 +4751,12 @@ xfs_bmap_del_extent( xfs_filblks_t temp2; /* for indirect length calculations */ int state = 0; - XFS_STATS_INC(xs_del_exlist); + mp = ip->i_mount; + XFS_STATS_INC(mp, xs_del_exlist); if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; - mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); @@ -5070,7 +5103,7 @@ xfs_bunmapi( *done = 1; return 0; } - XFS_STATS_INC(xs_blk_unmap); + XFS_STATS_INC(mp, xs_blk_unmap); isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); start = bno; bno = start + len - 1; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 6aaa0c1c7200..a160f8a5a3fc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -52,9 +52,9 @@ struct xfs_bmalloca { xfs_extlen_t minleft; /* amount must be left after alloc */ bool eof; /* set if allocating past last extent */ bool wasdel; /* replacing a delayed allocation */ - bool userdata;/* set if is user data */ bool aeof; /* allocated space at eof */ bool conv; /* overwriting unwritten extents */ + char userdata;/* userdata mask */ int flags; }; @@ -109,6 +109,14 @@ typedef struct xfs_bmap_free */ #define XFS_BMAPI_CONVERT 0x040 +/* + * allocate zeroed extents - this requires all newly allocated user data extents + * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set. + * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found + * during the allocation range to zeroed written extents. + */ +#define XFS_BMAPI_ZERO 0x080 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -116,7 +124,8 @@ typedef struct xfs_bmap_free { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" } + { XFS_BMAPI_CONVERT, "CONVERT" }, \ + { XFS_BMAPI_ZERO, "ZERO" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index f7d7ee7a2607..af1bbee5586e 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -32,6 +32,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_alloc.h" +#include "xfs_log.h" /* * Cursor allocation zone. @@ -222,7 +223,7 @@ xfs_btree_check_ptr( * long-form btree header. * * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made + * it into the buffer so recovery knows what the last modification was that made * it to disk. */ void @@ -243,8 +244,14 @@ bool xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) + return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + } return true; } @@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc( * short-form btree header. * * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made + * it into the buffer so recovery knows what the last modification was that made * it to disk. */ void @@ -275,8 +282,14 @@ bool xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) + return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + } return true; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 8f18bab73ea5..992dec0638f3 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -84,31 +84,38 @@ union xfs_btree_rec { /* * Generic stats interface */ -#define __XFS_BTREE_STATS_INC(type, stat) \ - XFS_STATS_INC(xs_ ## type ## _2_ ## stat) -#define XFS_BTREE_STATS_INC(cur, stat) \ +#define __XFS_BTREE_STATS_INC(mp, type, stat) \ + XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat) +#define XFS_BTREE_STATS_INC(cur, stat) \ do { \ + struct xfs_mount *__mp = cur->bc_mp; \ switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) -#define __XFS_BTREE_STATS_ADD(type, stat, val) \ - XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) +#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \ + XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val) #define XFS_BTREE_STATS_ADD(cur, stat, val) \ do { \ + struct xfs_mount *__mp = cur->bc_mp; \ switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + case XFS_BTNUM_BNO: \ + __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \ + case XFS_BTNUM_CNT: \ + __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \ + case XFS_BTNUM_BMAP: \ + __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \ + case XFS_BTNUM_INO: \ + __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ + case XFS_BTNUM_FINO: \ + __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index be43248a5822..e89a0f8f827c 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -39,6 +39,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" +#include "xfs_log.h" /* * xfs_da_btree.c @@ -150,6 +151,8 @@ xfs_da3_node_verify( return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) + return false; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) return false; @@ -322,6 +325,7 @@ xfs_da3_node_create( if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); ichdr.magic = XFS_DA3_NODE_MAGIC; hdr3->info.blkno = cpu_to_be64(bp->b_bn); hdr3->info.owner = cpu_to_be64(args->dp->i_ino); diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 9de401d297e5..2fb53a5c0a74 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -271,7 +271,7 @@ xfs_dir_createname( rval = xfs_dir_ino_validate(tp->t_mountp, inum); if (rval) return rval; - XFS_STATS_INC(xs_dir_create); + XFS_STATS_INC(dp->i_mount, xs_dir_create); } args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); @@ -365,7 +365,7 @@ xfs_dir_lookup( int lock_mode; ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_lookup); + XFS_STATS_INC(dp->i_mount, xs_dir_lookup); /* * We need to use KM_NOFS here so that lockdep will not throw false @@ -444,7 +444,7 @@ xfs_dir_removename( int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_remove); + XFS_STATS_INC(dp->i_mount, xs_dir_remove); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 4778d1dd511a..9c10e2b8cfcb 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -34,6 +34,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Local function prototypes. @@ -71,6 +72,8 @@ xfs_dir3_block_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 824131e71bc5..af71a84f343c 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -31,6 +31,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Check the consistency of the data block. @@ -224,6 +225,8 @@ xfs_dir3_data_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index f300240ebb8d..3923e1f94697 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -33,6 +33,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Local function declarations. @@ -164,6 +165,8 @@ xfs_dir3_leaf_verify( return false; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) + return false; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index cc28e924545b..70b0cb2fd556 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -33,6 +33,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Function declarations. @@ -97,6 +98,8 @@ xfs_dir3_free_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9590a069e556..8774498ce0ff 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -60,6 +60,14 @@ struct xfs_ifork; #define XFS_SB_VERSION_MOREBITSBIT 0x8000 /* + * The size of a single extended attribute on disk is limited by + * the size of index values within the attribute entries themselves. + * These are be16 fields, so we can only support attribute data + * sizes up to 2^16 bytes in length. + */ +#define XFS_XATTR_SIZE_MAX (1 << 16) + +/* * Supported feature bit list is just all bits in the versionnum field because * we've used them all up and understand them all. Except, of course, for the * shared superblock bit, which nobody knows what it does and so is unsupported. @@ -1483,13 +1491,17 @@ struct xfs_acl { */ #define XFS_ACL_MAX_ENTRIES(mp) \ (xfs_sb_version_hascrc(&mp->m_sb) \ - ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ + ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ sizeof(struct xfs_acl_entry) \ : 25) -#define XFS_ACL_MAX_SIZE(mp) \ +#define XFS_ACL_SIZE(cnt) \ (sizeof(struct xfs_acl) + \ - sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) + sizeof(struct xfs_acl_entry) * cnt) + +#define XFS_ACL_MAX_SIZE(mp) \ + XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp))) + /* On-disk XFS extended attribute names */ #define SGI_ACL_FILE "SGI_ACL_FILE" diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 89689c6a43e2..b2b73a998d42 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -490,6 +490,16 @@ typedef struct xfs_swapext #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ /* + * ioctl limits + */ +#ifdef XATTR_LIST_MAX +# define XFS_XATTR_LIST_MAX XATTR_LIST_MAX +#else +# define XFS_XATTR_LIST_MAX 65536 +#endif + + +/* * ioctl commands that are used by Linux filesystems */ #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 54deb2d12ac6..70c1db99f6a7 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -38,6 +38,7 @@ #include "xfs_icreate_item.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_log.h" /* @@ -2500,9 +2501,14 @@ xfs_agi_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) + return false; + if (!xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) return false; + } + /* * Validate the magic number of the agi block. */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 47425140f343..a0b071d881a0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -35,6 +35,7 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_log.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -163,6 +164,15 @@ xfs_mount_validate_sb( "Filesystem can not be safely mounted by this kernel."); return -EINVAL; } + } else if (xfs_sb_version_hascrc(sbp)) { + /* + * We can't read verify the sb LSN because the read verifier is + * called before the log is allocated and processed. We know the + * log is set up before write verifier (!check_version) calls, + * so just check it here. + */ + if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) + return -EFSCORRUPTED; } if (xfs_sb_version_has_pquotino(sbp)) { diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 8f8af05b3f13..cb6fd20a4d3d 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -31,6 +31,7 @@ #include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_log.h" /* @@ -60,6 +61,7 @@ xfs_symlink_hdr_set( if (!xfs_sb_version_hascrc(&mp->m_sb)) return 0; + memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr)); dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); dsl->sl_offset = cpu_to_be32(offset); dsl->sl_bytes = cpu_to_be32(size); @@ -116,6 +118,8 @@ xfs_symlink_verify( return false; if (dsl->sl_owner == 0) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) + return false; return true; } @@ -183,6 +187,7 @@ xfs_symlink_local_to_remote( if (!xfs_sb_version_hascrc(&mp->m_sb)) { bp->b_ops = NULL; memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); return; } @@ -198,4 +203,6 @@ xfs_symlink_local_to_remote( buf = bp->b_addr; buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + + ifp->if_bytes - 1); } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 4b641676f258..6bb470fbb8e8 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -37,16 +37,19 @@ STATIC struct posix_acl * xfs_acl_from_disk( - struct xfs_acl *aclp, - int max_entries) + const struct xfs_acl *aclp, + int len, + int max_entries) { struct posix_acl_entry *acl_e; struct posix_acl *acl; - struct xfs_acl_entry *ace; + const struct xfs_acl_entry *ace; unsigned int count, i; + if (len < sizeof(*aclp)) + return ERR_PTR(-EFSCORRUPTED); count = be32_to_cpu(aclp->acl_cnt); - if (count > max_entries) + if (count > max_entries || XFS_ACL_SIZE(count) != len) return ERR_PTR(-EFSCORRUPTED); acl = posix_acl_alloc(count, GFP_KERNEL); @@ -160,10 +163,11 @@ xfs_get_acl(struct inode *inode, int type) */ if (error == -ENOATTR) goto out_update_cache; + acl = ERR_PTR(error); goto out; } - acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); + acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); if (IS_ERR(acl)) goto out; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 3841b07f27bf..52f8255d6bdf 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -20,7 +20,6 @@ struct inode; struct posix_acl; -struct xfs_inode; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); @@ -36,4 +35,7 @@ static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) # define posix_acl_access_exists(inode) 0 # define posix_acl_default_exists(inode) 0 #endif /* CONFIG_XFS_POSIX_ACL */ + +extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags); + #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 50ab2879b9da..29e7e5dd5178 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -172,6 +172,12 @@ xfs_setfilesize_ioend( current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); + /* we abort the update if there was an IO error */ + if (ioend->io_error) { + xfs_trans_cancel(tp); + return ioend->io_error; + } + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); } @@ -212,14 +218,17 @@ xfs_end_io( ioend->io_error = -EIO; goto done; } - if (ioend->io_error) - goto done; /* * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. + * Detecting and handling completion IO errors is done individually + * for each case as different cleanup operations need to be performed + * on error. */ if (ioend->io_type == XFS_IO_UNWRITTEN) { + if (ioend->io_error) + goto done; error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); } else if (ioend->io_append_trans) { @@ -1250,13 +1259,28 @@ xfs_vm_releasepage( * the DIO. There is only going to be one reference to the ioend and its life * cycle is constrained by the DIO completion code. hence we don't need * reference counting here. + * + * Note that for DIO, an IO to the highest supported file block offset (i.e. + * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 + * bit variable. Hence if we see this overflow, we have to assume that the IO is + * extending the file size. We won't know for sure until IO completion is run + * and the actual max write offset is communicated to the IO completion + * routine. + * + * For DAX page faults, we are preparing to never see unwritten extents here, + * nor should we ever extend the inode size. Hence we will soon have nothing to + * do here for this case, ensuring we don't have to provide an IO completion + * callback to free an ioend that we don't actually need for a fault into the + * page at offset (2^63 - 1FSB) bytes. */ + static void xfs_map_direct( struct inode *inode, struct buffer_head *bh_result, struct xfs_bmbt_irec *imap, - xfs_off_t offset) + xfs_off_t offset, + bool dax_fault) { struct xfs_ioend *ioend; xfs_off_t size = bh_result->b_size; @@ -1269,6 +1293,13 @@ xfs_map_direct( trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); + if (dax_fault) { + ASSERT(type == XFS_IO_OVERWRITE); + trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, + imap); + return; + } + if (bh_result->b_private) { ioend = bh_result->b_private; ASSERT(ioend->io_size > 0); @@ -1283,7 +1314,8 @@ xfs_map_direct( ioend->io_size, ioend->io_type, imap); } else if (type == XFS_IO_UNWRITTEN || - offset + size > i_size_read(inode)) { + offset + size > i_size_read(inode) || + offset + size < 0) { ioend = xfs_alloc_ioend(inode, type); ioend->io_offset = offset; ioend->io_size = size; @@ -1345,7 +1377,8 @@ __xfs_get_blocks( sector_t iblock, struct buffer_head *bh_result, int create, - bool direct) + bool direct, + bool dax_fault) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1393,18 +1426,20 @@ __xfs_get_blocks( if (error) goto out_unlock; + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || (imap.br_startblock == HOLESTARTBLOCK || - imap.br_startblock == DELAYSTARTBLOCK))) { + imap.br_startblock == DELAYSTARTBLOCK) || + (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { if (direct || xfs_get_extsz_hint(ip)) { /* - * Drop the ilock in preparation for starting the block - * allocation transaction. It will be retaken - * exclusively inside xfs_iomap_write_direct for the - * actual allocation. + * xfs_iomap_write_direct() expects the shared lock. It + * is unlocked on return. */ - xfs_iunlock(ip, lockmode); + if (lockmode == XFS_ILOCK_EXCL) + xfs_ilock_demote(ip, lockmode); + error = xfs_iomap_write_direct(ip, offset, size, &imap, nimaps); if (error) @@ -1441,6 +1476,12 @@ __xfs_get_blocks( goto out_unlock; } + if (IS_DAX(inode) && create) { + ASSERT(!ISUNWRITTEN(&imap)); + /* zeroing is not needed at a higher layer */ + new = 0; + } + /* trim mapping down to size requested */ if (direct || size > (1 << inode->i_blkbits)) xfs_map_trim_size(inode, iblock, bh_result, @@ -1458,7 +1499,8 @@ __xfs_get_blocks( set_buffer_unwritten(bh_result); /* direct IO needs special help */ if (create && direct) - xfs_map_direct(inode, bh_result, &imap, offset); + xfs_map_direct(inode, bh_result, &imap, offset, + dax_fault); } /* @@ -1505,7 +1547,7 @@ xfs_get_blocks( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, false); + return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); } int @@ -1515,7 +1557,17 @@ xfs_get_blocks_direct( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, true); + return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); +} + +int +xfs_get_blocks_dax_fault( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); } static void @@ -1614,45 +1666,6 @@ xfs_end_io_direct_write( __xfs_end_io_direct_write(inode, ioend, offset, size); } -/* - * For DAX we need a mapping buffer callback for unwritten extent conversion - * when page faults allocate blocks and then zero them. Note that in this - * case the mapping indicated by the ioend may extend beyond EOF. We most - * definitely do not want to extend EOF here, so we trim back the ioend size to - * EOF. - */ -#ifdef CONFIG_FS_DAX -void -xfs_end_io_dax_write( - struct buffer_head *bh, - int uptodate) -{ - struct xfs_ioend *ioend = bh->b_private; - struct inode *inode = ioend->io_inode; - ssize_t size = ioend->io_size; - - ASSERT(IS_DAX(ioend->io_inode)); - - /* if there was an error zeroing, then don't convert it */ - if (!uptodate) - ioend->io_error = -EIO; - - /* - * Trim update to EOF, so we don't extend EOF during unwritten extent - * conversion of partial EOF blocks. - */ - spin_lock(&XFS_I(inode)->i_flags_lock); - if (ioend->io_offset + size > i_size_read(inode)) - size = i_size_read(inode) - ioend->io_offset; - spin_unlock(&XFS_I(inode)->i_flags_lock); - - __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size); - -} -#else -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { } -#endif - static inline ssize_t xfs_vm_do_dio( struct inode *inode, diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 86afd1ac7895..f6ffc9ae5ceb 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -58,7 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); int xfs_get_blocks_direct(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); +int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, + struct buffer_head *map_bh, int create); extern void xfs_count_page_state(struct page *, int *, int *); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 65fb37a18e92..0ef7c2ed3f8a 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -511,7 +511,7 @@ xfs_attr_list_int( xfs_inode_t *dp = context->dp; uint lock_mode; - XFS_STATS_INC(xs_attr_list); + XFS_STATS_INC(dp->i_mount, xs_attr_list); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 3bf4ad0d19e4..dbae6490a79a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -57,6 +57,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) } /* + * Routine to zero an extent on disk allocated to the specific inode. + * + * The VFS functions take a linearised filesystem block offset, so we have to + * convert the sparse xfs fsb to the right format first. + * VFS types are real funky, too. + */ +int +xfs_zero_extent( + struct xfs_inode *ip, + xfs_fsblock_t start_fsb, + xfs_off_t count_fsb) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); + sector_t block = XFS_BB_TO_FSBT(mp, sector); + ssize_t size = XFS_FSB_TO_B(mp, count_fsb); + + if (IS_DAX(VFS_I(ip))) + return dax_clear_blocks(VFS_I(ip), block, size); + + /* + * let the block layer decide on the fastest method of + * implementing the zeroing. + */ + return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); + +} + +/* * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi * caller. Frees all the extents that need freeing, which must be done * last due to locking considerations. We never free any extents in @@ -229,6 +258,13 @@ xfs_bmap_rtalloc( xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); + + /* Zero the extent if we were asked to do so */ + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { + error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); + if (error) + return error; + } } else { ap->length = 0; } @@ -1027,7 +1063,7 @@ xfs_alloc_file_space( xfs_bmap_init(&free_list, &firstfsb); error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, alloc_type, &firstfsb, - 0, imapp, &nimaps, &free_list); + resblks, imapp, &nimaps, &free_list); if (error) { goto error0; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8ecffb35935b..3243cdf97f33 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -201,7 +201,7 @@ _xfs_buf_alloc( atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); - XFS_STATS_INC(xb_create); + XFS_STATS_INC(target->bt_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); return bp; @@ -354,15 +354,16 @@ retry: */ if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", + current->comm, current->pid, __func__, gfp_mask); - XFS_STATS_INC(xb_page_retries); + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } - XFS_STATS_INC(xb_page_found); + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); nbytes = min_t(size_t, size, PAGE_SIZE - offset); size -= nbytes; @@ -516,7 +517,7 @@ _xfs_buf_find( new_bp->b_pag = pag; spin_unlock(&pag->pag_buf_lock); } else { - XFS_STATS_INC(xb_miss_locked); + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); } @@ -529,11 +530,11 @@ found: if (!xfs_buf_trylock(bp)) { if (flags & XBF_TRYLOCK) { xfs_buf_rele(bp); - XFS_STATS_INC(xb_busy_locked); + XFS_STATS_INC(btp->bt_mount, xb_busy_locked); return NULL; } xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); + XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); } /* @@ -549,7 +550,7 @@ found: } trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(xb_get_locked); + XFS_STATS_INC(btp->bt_mount, xb_get_locked); return bp; } @@ -603,7 +604,7 @@ found: } } - XFS_STATS_INC(xb_get); + XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; } @@ -643,7 +644,7 @@ xfs_buf_read_map( trace_xfs_buf_read(bp, flags, _RET_IP_); if (!XFS_BUF_ISDONE(bp)) { - XFS_STATS_INC(xb_get_read); + XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index a989a9c7edb7..642d55d10075 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -666,7 +666,7 @@ xfs_readdir( return -EIO; ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_getdents); + XFS_STATS_INC(dp->i_mount, xs_dir_getdents); args.dp = dp; args.geo = dp->i_mount->m_dir_geo; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 30cb3afb67f0..7ac6c5c586cb 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -75,9 +75,9 @@ xfs_qm_dqdestroy( ASSERT(list_empty(&dqp->q_lru)); mutex_destroy(&dqp->q_qlock); - kmem_zone_free(xfs_qm_dqzone, dqp); - XFS_STATS_DEC(xs_qm_dquot); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); + kmem_zone_free(xfs_qm_dqzone, dqp); } /* @@ -605,7 +605,7 @@ xfs_qm_dqread( break; } - XFS_STATS_INC(xs_qm_dquot); + XFS_STATS_INC(mp, xs_qm_dquot); trace_xfs_dqread(dqp); @@ -747,12 +747,12 @@ restart: mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_hit(dqp); - XFS_STATS_INC(xs_qm_dqcachehits); + XFS_STATS_INC(mp, xs_qm_dqcachehits); *O_dqpp = dqp; return 0; } mutex_unlock(&qi->qi_tree_lock); - XFS_STATS_INC(xs_qm_dqcachemisses); + XFS_STATS_INC(mp, xs_qm_dqcachemisses); /* * Dquot cache miss. We don't want to keep the inode lock across @@ -806,7 +806,7 @@ restart: mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_dup(dqp); xfs_qm_dqdestroy(dqp); - XFS_STATS_INC(xs_qm_dquot_dups); + XFS_STATS_INC(mp, xs_qm_dquot_dups); goto restart; } @@ -846,7 +846,7 @@ xfs_qm_dqput( trace_xfs_dqput_free(dqp); if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) - XFS_STATS_INC(xs_qm_dquot_unused); + XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); } xfs_dqunlock(dqp); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e78feb400e22..f5392ab2def1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -242,19 +242,30 @@ xfs_file_fsync( } /* - * All metadata updates are logged, which means that we just have - * to flush the log up to the latest LSN that touched the inode. + * All metadata updates are logged, which means that we just have to + * flush the log up to the latest LSN that touched the inode. If we have + * concurrent fsync/fdatasync() calls, we need them to all block on the + * log force before we clear the ili_fsync_fields field. This ensures + * that we don't get a racing sync operation that does not wait for the + * metadata to hit the journal before returning. If we race with + * clearing the ili_fsync_fields, then all that will happen is the log + * force will do nothing as the lsn will already be on disk. We can't + * race with setting ili_fsync_fields because that is done under + * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared + * until after the ili_fsync_fields is cleared. */ xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || - (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) + (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (lsn) + if (lsn) { error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); + ip->i_itemp->ili_fsync_fields = 0; + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); /* * If we only have a single device, and the log force about was @@ -287,7 +298,7 @@ xfs_file_read_iter( xfs_fsize_t n; loff_t pos = iocb->ki_pos; - XFS_STATS_INC(xs_read_calls); + XFS_STATS_INC(mp, xs_read_calls); if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; @@ -365,7 +376,7 @@ xfs_file_read_iter( ret = generic_file_read_iter(iocb, to); if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); + XFS_STATS_ADD(mp, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -383,7 +394,7 @@ xfs_file_splice_read( int ioflags = 0; ssize_t ret; - XFS_STATS_INC(xs_read_calls); + XFS_STATS_INC(ip->i_mount, xs_read_calls); if (infilp->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; @@ -401,7 +412,7 @@ xfs_file_splice_read( else ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); + XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -482,6 +493,8 @@ xfs_zero_eof( ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(offset > isize); + trace_xfs_zero_eof(ip, isize, offset - isize); + /* * First handle zeroing the block on which isize resides. * @@ -574,6 +587,7 @@ xfs_file_aio_write_checks( struct xfs_inode *ip = XFS_I(inode); ssize_t error = 0; size_t count = iov_iter_count(from); + bool drained_dio = false; restart: error = generic_write_checks(iocb, from); @@ -611,12 +625,13 @@ restart: bool zero = false; spin_unlock(&ip->i_flags_lock); - if (*iolock == XFS_IOLOCK_SHARED) { - xfs_rw_iunlock(ip, *iolock); - *iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, *iolock); - iov_iter_reexpand(from, count); - + if (!drained_dio) { + if (*iolock == XFS_IOLOCK_SHARED) { + xfs_rw_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, *iolock); + iov_iter_reexpand(from, count); + } /* * We now have an IO submission barrier in place, but * AIO can do EOF updates during IO completion and hence @@ -626,6 +641,7 @@ restart: * no-op. */ inode_dio_wait(inode); + drained_dio = true; goto restart; } error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); @@ -867,7 +883,7 @@ xfs_file_write_iter( ssize_t ret; size_t ocount = iov_iter_count(from); - XFS_STATS_INC(xs_write_calls); + XFS_STATS_INC(ip->i_mount, xs_write_calls); if (ocount == 0) return 0; @@ -883,7 +899,7 @@ xfs_file_write_iter( if (ret > 0) { ssize_t err; - XFS_STATS_ADD(xs_write_bytes, ret); + XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); /* Handle various SYNC-type writes */ err = generic_write_sync(file, iocb->ki_pos - ret, ret); @@ -1477,7 +1493,7 @@ xfs_file_llseek( * * mmap_sem (MM) * sb_start_pagefault(vfs, freeze) - * i_mmap_lock (XFS - truncate serialisation) + * i_mmaplock (XFS - truncate serialisation) * page_lock (MM) * i_lock (XFS - extent map serialisation) */ @@ -1503,10 +1519,9 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, - xfs_end_io_dax_write); + ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL); } else { - ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks); + ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); ret = block_page_mkwrite_return(ret); } @@ -1538,7 +1553,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); + ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); @@ -1546,6 +1561,13 @@ xfs_filemap_fault( return ret; } +/* + * Similar to xfs_filemap_fault(), the DAX fault path can call into here on + * both read and write faults. Hence we need to handle both cases. There is no + * ->pmd_mkwrite callout for huge pages, so we have a single function here to + * handle both cases here. @flags carries the information on the type of fault + * occuring. + */ STATIC int xfs_filemap_pmd_fault( struct vm_area_struct *vma, @@ -1562,15 +1584,54 @@ xfs_filemap_pmd_fault( trace_xfs_filemap_pmd_fault(ip); - sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + if (flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, - xfs_end_io_dax_write); + ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault, + NULL); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - sb_end_pagefault(inode->i_sb); + if (flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + + return ret; +} + +/* + * pfn_mkwrite was originally inteneded to ensure we capture time stamp + * updates on write faults. In reality, it's need to serialise against + * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite() + * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault + * barrier in place. + */ +static int +xfs_filemap_pfn_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + + struct inode *inode = file_inode(vma->vm_file); + struct xfs_inode *ip = XFS_I(inode); + int ret = VM_FAULT_NOPAGE; + loff_t size; + + trace_xfs_filemap_pfn_mkwrite(ip); + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + + /* check if the faulting page hasn't raced with truncate */ + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + sb_end_pagefault(inode->i_sb); return ret; + } static const struct vm_operations_struct xfs_file_vm_ops = { @@ -1578,6 +1639,7 @@ static const struct vm_operations_struct xfs_file_vm_ops = { .pmd_fault = xfs_filemap_pmd_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, + .pfn_mkwrite = xfs_filemap_pfn_mkwrite, }; STATIC int diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0a326bd64d4e..d7a490f24ead 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -63,7 +63,7 @@ xfs_inode_alloc( return NULL; } - XFS_STATS_INC(vn_active); + XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); @@ -129,7 +129,7 @@ xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); - XFS_STATS_DEC(vn_active); + XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } @@ -159,7 +159,7 @@ xfs_iget_cache_hit( spin_lock(&ip->i_flags_lock); if (ip->i_ino != ino) { trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); + XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; goto out_error; } @@ -177,7 +177,7 @@ xfs_iget_cache_hit( */ if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); + XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; goto out_error; } @@ -259,7 +259,7 @@ xfs_iget_cache_hit( xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); - XFS_STATS_INC(xs_ig_found); + XFS_STATS_INC(mp, xs_ig_found); return 0; @@ -342,7 +342,7 @@ xfs_iget_cache_miss( error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); - XFS_STATS_INC(xs_ig_dup); + XFS_STATS_INC(mp, xs_ig_dup); error = -EAGAIN; goto out_preload_end; } @@ -412,7 +412,7 @@ xfs_iget( if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return -EINVAL; - XFS_STATS_INC(xs_ig_attempts); + XFS_STATS_INC(mp, xs_ig_attempts); /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); @@ -429,7 +429,7 @@ again: goto out_error_or_again; } else { rcu_read_unlock(); - XFS_STATS_INC(xs_ig_missed); + XFS_STATS_INC(mp, xs_ig_missed); error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, flags, lock_flags); @@ -965,7 +965,7 @@ reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); - XFS_STATS_INC(xs_ig_reclaims); + XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); /* * Remove the inode from the per-AG radix tree. * diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dc40a6d5ae0d..8ee393996b7d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2365,6 +2365,7 @@ retry: iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; + iip->ili_fsync_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); @@ -3271,8 +3272,8 @@ xfs_iflush_cluster( } if (clcount) { - XFS_STATS_INC(xs_icluster_flushcnt); - XFS_STATS_ADD(xs_icluster_flushinode, clcount); + XFS_STATS_INC(mp, xs_icluster_flushcnt); + XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); } out_free: @@ -3345,7 +3346,7 @@ xfs_iflush( struct xfs_dinode *dip; int error; - XFS_STATS_INC(xs_iflush_count); + XFS_STATS_INC(mp, xs_iflush_count); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3560,6 +3561,7 @@ xfs_iflush_int( */ iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; + iip->ili_fsync_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 62bd80f4edd9..d14b12b8cfef 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -719,6 +719,7 @@ xfs_iflush_abort( * attempted. */ iip->ili_fields = 0; + iip->ili_fsync_fields = 0; } /* * Release the inode's flush lock since we're done with it. diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 488d81254e28..4c7722e325b3 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item { unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ + unsigned int ili_fsync_fields; /* logged since last fsync */ } xfs_inode_log_item_t; static inline int xfs_inode_clean(xfs_inode_t *ip) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ea7d85af5310..d42738deec6d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -40,6 +40,7 @@ #include "xfs_symlink.h" #include "xfs_trans.h" #include "xfs_pnfs.h" +#include "xfs_acl.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -411,7 +412,7 @@ xfs_attrlist_by_handle( if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XATTR_LIST_MAX) + al_hreq.buflen > XFS_XATTR_LIST_MAX) return -EINVAL; /* @@ -455,7 +456,7 @@ xfs_attrmulti_attr_get( unsigned char *kbuf; int error = -EFAULT; - if (*len > XATTR_SIZE_MAX) + if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; kbuf = kmem_zalloc_large(*len, KM_SLEEP); if (!kbuf) @@ -482,17 +483,22 @@ xfs_attrmulti_attr_set( __uint32_t flags) { unsigned char *kbuf; + int error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - if (len > XATTR_SIZE_MAX) + if (len > XFS_XATTR_SIZE_MAX) return -EINVAL; kbuf = memdup_user(ubuf, len); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + if (!error) + xfs_forget_acl(inode, name, flags); + kfree(kbuf); + return error; } int @@ -501,9 +507,14 @@ xfs_attrmulti_attr_remove( unsigned char *name, __uint32_t flags) { + int error; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - return xfs_attr_remove(XFS_I(inode), name, flags); + error = xfs_attr_remove(XFS_I(inode), name, flags); + if (!error) + xfs_forget_acl(inode, name, flags); + return error; } STATIC int @@ -1028,7 +1039,7 @@ xfs_ioctl_setattr_xflags( xfs_diflags_to_linux(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); return 0; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index b88bdc85dd3d..1a05d8ae327d 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle( sizeof(compat_xfs_fsop_attrlist_handlereq_t))) return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XATTR_LIST_MAX) + al_hreq.buflen > XFS_XATTR_LIST_MAX) return -EINVAL; /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1f86033171c8..f4f5b43cf647 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -131,20 +131,30 @@ xfs_iomap_write_direct( uint qblocks, resblks, resrtextents; int committed; int error; - - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; + int lockmode; + int bmapi_flags = XFS_BMAPI_PREALLOC; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); + lockmode = XFS_ILOCK_SHARED; /* locked by caller */ + + ASSERT(xfs_isilocked(ip, lockmode)); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > XFS_ISIZE(ip)) { + /* + * Assert that the in-core extent list is present since this can + * call xfs_iread_extents() and we only have the ilock shared. + * This should be safe because the lock was held around a bmapi + * call in the caller and we only need it to access the in-core + * list. + */ + ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & + XFS_IFEXTENTS); error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) - return error; + goto out_unlock; } else { if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) @@ -174,9 +184,35 @@ xfs_iomap_write_direct( } /* + * Drop the shared lock acquired by the caller, attach the dquot if + * necessary and move on to transaction setup. + */ + xfs_iunlock(ip, lockmode); + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + + /* * Allocate and setup the transaction */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + + /* + * For DAX, we do not allocate unwritten extents, but instead we zero + * the block before we commit the transaction. Ideally we'd like to do + * this outside the transaction context, but if we commit and then crash + * we may not have zeroed the blocks and this will be exposed on + * recovery of the allocation. Hence we must zero before commit. + * Further, if we are mapping unwritten extents here, we need to zero + * and convert them to written so that we don't need an unwritten extent + * callback for DAX. This also means that we need to be able to dip into + * the reserve block pool if there is no space left but we need to do + * unwritten extent conversion. + */ + if (IS_DAX(VFS_I(ip))) { + bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; + tp->t_flags |= XFS_TRANS_RESERVE; + } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, resrtextents); /* @@ -187,7 +223,8 @@ xfs_iomap_write_direct( return error; } - xfs_ilock(ip, XFS_ILOCK_EXCL); + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockmode); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) @@ -202,8 +239,8 @@ xfs_iomap_write_direct( xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_PREALLOC, &firstfsb, 0, - imap, &nimaps, &free_list); + bmapi_flags, &firstfsb, resblks, imap, + &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -213,6 +250,7 @@ xfs_iomap_write_direct( error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto out_bmap_cancel; + error = xfs_trans_commit(tp); if (error) goto out_unlock; @@ -229,7 +267,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; out_bmap_cancel: @@ -670,7 +708,7 @@ xfs_iomap_write_allocate( count_fsb = imap->br_blockcount; map_start_fsb = imap->br_startoff; - XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); + XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* @@ -750,9 +788,9 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, 0, - &first_block, 1, - imap, &nimaps, &free_list); + count_fsb, 0, &first_block, + nres, imap, &nimaps, + &free_list); if (error) goto trans_cancel; @@ -777,7 +815,7 @@ xfs_iomap_write_allocate( if ((offset_fsb >= imap->br_startoff) && (offset_fsb < (imap->br_startoff + imap->br_blockcount))) { - XFS_STATS_INC(xs_xstrat_quick); + XFS_STATS_INC(mp, xs_xstrat_quick); return 0; } @@ -866,8 +904,8 @@ xfs_iomap_write_unwritten( xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_CONVERT, &firstfsb, - 1, &imap, &nimaps, &free_list); + XFS_BMAPI_CONVERT, &firstfsb, resblks, + &imap, &nimaps, &free_list); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8294132e6a3c..245268a0cdf0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -695,7 +695,7 @@ xfs_setattr_nonsize( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); @@ -922,7 +922,7 @@ xfs_setattr_size( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 85f883dd6207..ec0e239a0fa9 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -171,6 +171,13 @@ struct xfs_kobj { struct completion complete; }; +struct xstats { + struct xfsstats __percpu *xs_stats; + struct xfs_kobj xs_kobj; +}; + +extern struct xstats xfsstats; + /* Kernel uid/gid conversion. These are used to convert to/from the on disk * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. * The conversion here is type only, the value will remain the same since we diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index aaadee0969c9..f52c72a1a06f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -268,7 +268,7 @@ xlog_grant_head_wait( __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(&head->lock); - XFS_STATS_INC(xs_sleep_logspace); + XFS_STATS_INC(log->l_mp, xs_sleep_logspace); trace_xfs_log_grant_sleep(log, tic); schedule(); @@ -379,7 +379,7 @@ xfs_log_regrant( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); /* * This is a new transaction on the ticket, so we need to change the @@ -448,7 +448,7 @@ xfs_log_reserve( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); ASSERT(*ticp == NULL); tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, @@ -1768,7 +1768,7 @@ xlog_sync( int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); int size; - XFS_STATS_INC(xs_log_writes); + XFS_STATS_INC(log->l_mp, xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* Add for LR header */ @@ -1805,7 +1805,7 @@ xlog_sync( bp = iclog->ic_bp; XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); - XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); + XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); /* Do we need to split this write into 2 parts? */ if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { @@ -2422,11 +2422,20 @@ xlog_write( &partial_copy_len); xlog_verify_dest_ptr(log, ptr); - /* copy region */ + /* + * Copy region. + * + * Unmount records just log an opheader, so can have + * empty payloads with no data region to copy. Hence we + * only copy the payload if the vector says it has data + * to copy. + */ ASSERT(copy_len >= 0); - memcpy(ptr, reg->i_addr + copy_off, copy_len); - xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); - + if (copy_len > 0) { + memcpy(ptr, reg->i_addr + copy_off, copy_len); + xlog_write_adv_cnt(&ptr, &len, &log_offset, + copy_len); + } copy_len += start_rec_copy + sizeof(xlog_op_header_t); record_cnt++; data_cnt += contwr ? copy_len : 0; @@ -2913,7 +2922,7 @@ restart: iclog = log->l_iclog; if (iclog->ic_state != XLOG_STATE_ACTIVE) { - XFS_STATS_INC(xs_log_noiclogs); + XFS_STATS_INC(log->l_mp, xs_log_noiclogs); /* Wait for log writes to have flushed */ xlog_wait(&log->l_flush_wait, &log->l_icloglock); @@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs( } if (log->l_curr_block >= log->l_logBBsize) { + /* + * Rewind the current block before the cycle is bumped to make + * sure that the combined LSN never transiently moves forward + * when the log wraps to the next cycle. This is to support the + * unlocked sample of these fields from xlog_valid_lsn(). Most + * other cases should acquire l_icloglock. + */ + log->l_curr_block -= log->l_logBBsize; + ASSERT(log->l_curr_block >= 0); + smp_wmb(); log->l_curr_cycle++; if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) log->l_curr_cycle++; - log->l_curr_block -= log->l_logBBsize; - ASSERT(log->l_curr_block >= 0); } ASSERT(iclog == log->l_iclog); log->l_iclog = iclog->ic_next; @@ -3212,7 +3229,7 @@ _xfs_log_force( struct xlog_in_core *iclog; xfs_lsn_t lsn; - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); xlog_cil_force(log); @@ -3297,7 +3314,7 @@ maybe_sleep: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -3362,7 +3379,7 @@ _xfs_log_force_lsn( ASSERT(lsn != 0); - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); lsn = xlog_cil_force_lsn(log, lsn); if (lsn == NULLCOMMITLSN) @@ -3411,7 +3428,7 @@ try_again: (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_prev->ic_write_wait, &log->l_icloglock); @@ -3441,7 +3458,7 @@ try_again: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -4023,3 +4040,45 @@ xlog_iclogs_empty( return 1; } +/* + * Verify that an LSN stamped into a piece of metadata is valid. This is + * intended for use in read verifiers on v5 superblocks. + */ +bool +xfs_log_check_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn) +{ + struct xlog *log = mp->m_log; + bool valid; + + /* + * norecovery mode skips mount-time log processing and unconditionally + * resets the in-core LSN. We can't validate in this mode, but + * modifications are not allowed anyways so just return true. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return true; + + /* + * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is + * handled by recovery and thus safe to ignore here. + */ + if (lsn == NULLCOMMITLSN) + return true; + + valid = xlog_valid_lsn(mp->m_log, lsn); + + /* warn the user about what's gone wrong before verifier failure */ + if (!valid) { + spin_lock(&log->l_icloglock); + xfs_warn(mp, +"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). " +"Please unmount and run xfs_repair (>= v4.3) to resolve.", + CYCLE_LSN(lsn), BLOCK_LSN(lsn), + log->l_curr_cycle, log->l_curr_block); + spin_unlock(&log->l_icloglock); + } + + return valid; +} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 09d91d3166cd..aa533a7d50f2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); void xfs_log_worker(struct work_struct *work); void xfs_log_quiesce(struct xfs_mount *mp); +bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 950f3f94720c..8daba7491b13 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) remove_wait_queue(wq, &wait); } +/* + * The LSN is valid so long as it is behind the current LSN. If it isn't, this + * means that the next log record that includes this metadata could have a + * smaller LSN. In turn, this means that the modification in the log would not + * replay. + */ +static inline bool +xlog_valid_lsn( + struct xlog *log, + xfs_lsn_t lsn) +{ + int cur_cycle; + int cur_block; + bool valid = true; + + /* + * First, sample the current lsn without locking to avoid added + * contention from metadata I/O. The current cycle and block are updated + * (in xlog_state_switch_iclogs()) and read here in a particular order + * to avoid false negatives (e.g., thinking the metadata LSN is valid + * when it is not). + * + * The current block is always rewound before the cycle is bumped in + * xlog_state_switch_iclogs() to ensure the current LSN is never seen in + * a transiently forward state. Instead, we can see the LSN in a + * transiently behind state if we happen to race with a cycle wrap. + */ + cur_cycle = ACCESS_ONCE(log->l_curr_cycle); + smp_rmb(); + cur_block = ACCESS_ONCE(log->l_curr_block); + + if ((CYCLE_LSN(lsn) > cur_cycle) || + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { + /* + * If the metadata LSN appears invalid, it's possible the check + * above raced with a wrap to the next log cycle. Grab the lock + * to check for sure. + */ + spin_lock(&log->l_icloglock); + cur_cycle = log->l_curr_cycle; + cur_block = log->l_curr_block; + spin_unlock(&log->l_icloglock); + + if ((CYCLE_LSN(lsn) > cur_cycle) || + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) + valid = false; + } + + return valid; +} + #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 512a0945d52a..c5ecaacdd218 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3431,7 +3431,7 @@ xlog_recover_add_to_cont_trans( * previous record. Copy the rest of the header. */ if (list_empty(&trans->r_itemq)) { - ASSERT(len < sizeof(struct xfs_trans_header)); + ASSERT(len <= sizeof(struct xfs_trans_header)); if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); return -EIO; @@ -4609,9 +4609,19 @@ xlog_recover( int error; /* find the tail of the log */ - if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) + error = xlog_find_tail(log, &head_blk, &tail_blk); + if (error) return error; + /* + * The superblock was read before the log was available and thus the LSN + * could not be verified. Check the superblock LSN against the current + * LSN now that it's known. + */ + if (xfs_sb_version_hascrc(&log->l_mp->m_sb) && + !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) + return -EINVAL; + if (tail_blk != head_blk) { /* There used to be a comment here: * diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index d8b67547ab34..11792d888e4e 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -17,6 +17,7 @@ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_error.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ + int level; \ \ va_start(args, fmt); \ \ @@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \ \ __xfs_printk(kern_level, mp, &vaf); \ va_end(args); \ + \ + if (!kstrtoint(kern_level, 0, &level) && \ + level <= LOGLEVEL_ERR && \ + xfs_error_level >= XFS_ERRLEVEL_HIGH) \ + xfs_stack_trace(); \ } \ define_xfs_printk_level(xfs_emerg, KERN_EMERG); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index bf92e0c037c7..bb753b359bee 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -47,6 +47,16 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; +void +xfs_uuid_table_free(void) +{ + if (xfs_uuid_table_size == 0) + return; + kmem_free(xfs_uuid_table); + xfs_uuid_table = NULL; + xfs_uuid_table_size = 0; +} + /* * See if the UUID is unique among mounted XFS filesystems. * Mount fails if UUID is nil or a FS with the same UUID is already mounted. @@ -693,10 +703,15 @@ xfs_mountfs( if (error) goto out; - error = xfs_uuid_mount(mp); + error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype, + &mp->m_kobj, "stats"); if (error) goto out_remove_sysfs; + error = xfs_uuid_mount(mp); + if (error) + goto out_del_stats; + /* * Set the minimum read and write sizes */ @@ -971,6 +986,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_del_stats: + xfs_sysfs_del(&mp->m_stats.xs_kobj); out_remove_sysfs: xfs_sysfs_del(&mp->m_kobj); out: @@ -1047,6 +1064,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); + xfs_log_unmount(mp); xfs_da_unmount(mp); xfs_uuid_unmount(mp); @@ -1056,6 +1074,7 @@ xfs_unmountfs( #endif xfs_free_perag(mp); + xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_kobj); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7999e91cd49a..b57098481c10 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -127,6 +127,7 @@ typedef struct xfs_mount { int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ struct xfs_kobj m_kobj; + struct xstats m_stats; /* per-fs stats */ struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_data_workqueue; @@ -312,6 +313,7 @@ typedef struct xfs_perag { int pagb_count; /* pagb slots in use */ } xfs_perag_t; +extern void xfs_uuid_table_free(void); extern int xfs_log_sbcount(xfs_mount_t *); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); @@ -336,4 +338,7 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern void xfs_set_low_space_thresholds(struct xfs_mount *); +int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, + xfs_off_t count_fsb); + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index ab4a6066f7ca..dc6221942b85 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -181,6 +181,11 @@ xfs_fs_map_blocks( ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { + /* + * xfs_iomap_write_direct() expects to take ownership of + * the shared ilock. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps); if (error) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 587174fd4f2c..532ab79d38fe 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -184,7 +184,7 @@ xfs_qm_dqpurge( */ ASSERT(!list_empty(&dqp->q_lru)); list_lru_del(&qi->qi_lru, &dqp->q_lru); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(mp, xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); return 0; @@ -448,11 +448,11 @@ xfs_qm_dquot_isolate( */ if (dqp->q_nrefs) { xfs_dqunlock(dqp); - XFS_STATS_INC(xs_qm_dqwants); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants); trace_xfs_dqreclaim_want(dqp); list_lru_isolate(lru, &dqp->q_lru); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); return LRU_REMOVED; } @@ -496,19 +496,19 @@ xfs_qm_dquot_isolate( ASSERT(dqp->q_nrefs == 0); list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims); return LRU_REMOVED; out_miss_busy: trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); return LRU_SKIP; out_unlock_dirty: trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); xfs_dqunlock(dqp); spin_lock(lru_lock); return LRU_RETRY; diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index f2240383d4bb..8686df6c7609 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -18,20 +18,21 @@ #include "xfs.h" #include <linux/proc_fs.h> -DEFINE_PER_CPU(struct xfsstats, xfsstats); +struct xstats xfsstats; -static int counter_val(int idx) +static int counter_val(struct xfsstats __percpu *stats, int idx) { int val = 0, cpu; for_each_possible_cpu(cpu) - val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); + val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx)); return val; } -static int xfs_stat_proc_show(struct seq_file *m, void *v) +int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { int i, j; + int len = 0; __uint64_t xs_xstrat_bytes = 0; __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; @@ -65,54 +66,59 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) }; /* Loop over all stats groups */ + for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { - seq_printf(m, "%s", xstats[i].desc); + len += snprintf(buf + len, PATH_MAX - len, "%s", + xstats[i].desc); /* inner loop does each group */ for (; j < xstats[i].endpoint; j++) - seq_printf(m, " %u", counter_val(j)); - seq_putc(m, '\n'); + len += snprintf(buf + len, PATH_MAX - len, " %u", + counter_val(stats, j)); + len += snprintf(buf + len, PATH_MAX - len, "\n"); } /* extra precision counters */ for_each_possible_cpu(i) { - xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; - xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; - xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; + xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes; + xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes; + xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes; } - seq_printf(m, "xpc %Lu %Lu %Lu\n", + len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - seq_printf(m, "debug %u\n", + len += snprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); #else 0); #endif - return 0; + + return len; } -static int xfs_stat_proc_open(struct inode *inode, struct file *file) +void xfs_stats_clearall(struct xfsstats __percpu *stats) { - return single_open(file, xfs_stat_proc_show, NULL); + int c; + __uint32_t vn_active; + + xfs_notice(NULL, "Clearing xfsstats"); + for_each_possible_cpu(c) { + preempt_disable(); + /* save vn_active, it's a universal truth! */ + vn_active = per_cpu_ptr(stats, c)->vn_active; + memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); + per_cpu_ptr(stats, c)->vn_active = vn_active; + preempt_enable(); + } } -static const struct file_operations xfs_stat_proc_fops = { - .owner = THIS_MODULE, - .open = xfs_stat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* legacy quota interfaces */ #ifdef CONFIG_XFS_QUOTA static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - 0, - counter_val(XFSSTAT_END_XQMSTAT), - 0, - counter_val(XFSSTAT_END_XQMSTAT + 1)); + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT), + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1)); return 0; } @@ -136,7 +142,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v) seq_printf(m, "qm"); for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) - seq_printf(m, " %u", counter_val(j)); + seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j)); seq_putc(m, '\n'); return 0; } @@ -155,44 +161,35 @@ static const struct file_operations xqmstat_proc_fops = { }; #endif /* CONFIG_XFS_QUOTA */ +#ifdef CONFIG_PROC_FS int xfs_init_procfs(void) { if (!proc_mkdir("fs/xfs", NULL)) + return -ENOMEM; + + if (!proc_symlink("fs/xfs/stat", NULL, + "/sys/fs/xfs/stats/stats")) goto out; - if (!proc_create("fs/xfs/stat", 0, NULL, - &xfs_stat_proc_fops)) - goto out_remove_xfs_dir; #ifdef CONFIG_XFS_QUOTA if (!proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops)) - goto out_remove_stat_file; + goto out; if (!proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops)) - goto out_remove_xqmstat_file; + goto out; #endif return 0; -#ifdef CONFIG_XFS_QUOTA - out_remove_xqmstat_file: - remove_proc_entry("fs/xfs/xqmstat", NULL); - out_remove_stat_file: - remove_proc_entry("fs/xfs/stat", NULL); -#endif - out_remove_xfs_dir: - remove_proc_entry("fs/xfs", NULL); - out: +out: + remove_proc_subtree("fs/xfs", NULL); return -ENOMEM; } void xfs_cleanup_procfs(void) { -#ifdef CONFIG_XFS_QUOTA - remove_proc_entry("fs/xfs/xqm", NULL); - remove_proc_entry("fs/xfs/xqmstat", NULL); -#endif - remove_proc_entry("fs/xfs/stat", NULL); - remove_proc_entry("fs/xfs", NULL); + remove_proc_subtree("fs/xfs", NULL); } +#endif /* CONFIG_PROC_FS */ diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c8f238b8299a..483b0eff1988 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -19,8 +19,6 @@ #define __XFS_STATS_H__ -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - #include <linux/percpu.h> /* @@ -215,15 +213,29 @@ struct xfsstats { __uint64_t xs_read_bytes; }; -DECLARE_PER_CPU(struct xfsstats, xfsstats); +int xfs_stats_format(struct xfsstats __percpu *stats, char *buf); +void xfs_stats_clearall(struct xfsstats __percpu *stats); +extern struct xstats xfsstats; -/* - * We don't disable preempt, not too worried about poking the - * wrong CPU's stat for now (also aggregated before reporting). - */ -#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) -#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) -#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) +#define XFS_STATS_INC(mp, v) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \ +} while (0) + +#define XFS_STATS_DEC(mp, v) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \ +} while (0) + +#define XFS_STATS_ADD(mp, v, inc) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \ +} while (0) + +#if defined(CONFIG_PROC_FS) extern int xfs_init_procfs(void); extern void xfs_cleanup_procfs(void); @@ -231,10 +243,6 @@ extern void xfs_cleanup_procfs(void); #else /* !CONFIG_PROC_FS */ -# define XFS_STATS_INC(count) -# define XFS_STATS_DEC(count) -# define XFS_STATS_ADD(count, inc) - static inline int xfs_init_procfs(void) { return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 904f637cfa5f..36bd8825bfb0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -838,17 +838,18 @@ xfs_init_mount_workqueues( goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, + mp->m_fsname); if (!mp->m_log_workqueue) goto out_destroy_reclaim; mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) goto out_destroy_log; @@ -922,7 +923,7 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); - XFS_STATS_INC(vn_reclaim); + XFS_STATS_INC(ip->i_mount, vn_reclaim); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -983,8 +984,8 @@ xfs_fs_evict_inode( truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); + XFS_STATS_INC(ip->i_mount, vn_rele); + XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); } @@ -1474,9 +1475,16 @@ xfs_fs_fill_super( if (error) goto out_destroy_workqueues; + /* Allocate stats memory before we do operations that might use it */ + mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); + if (!mp->m_stats.xs_stats) { + error = -ENOMEM; + goto out_destroy_counters; + } + error = xfs_readsb(mp, flags); if (error) - goto out_destroy_counters; + goto out_free_stats; error = xfs_finish_flags(mp); if (error) @@ -1545,9 +1553,11 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); + out_free_stats: + free_percpu(mp->m_stats.xs_stats); out_destroy_counters: xfs_destroy_percpu_counters(mp); -out_destroy_workqueues: + out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); out_close_devices: xfs_close_devices(mp); @@ -1574,6 +1584,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + free_percpu(mp->m_stats.xs_stats); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); @@ -1838,19 +1849,32 @@ init_xfs_fs(void) xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); if (!xfs_kset) { error = -ENOMEM; - goto out_sysctl_unregister;; + goto out_sysctl_unregister; } + xfsstats.xs_kobj.kobject.kset = xfs_kset; + + xfsstats.xs_stats = alloc_percpu(struct xfsstats); + if (!xfsstats.xs_stats) { + error = -ENOMEM; + goto out_kset_unregister; + } + + error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, + "stats"); + if (error) + goto out_free_stats; + #ifdef DEBUG xfs_dbg_kobj.kobject.kset = xfs_kset; error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); if (error) - goto out_kset_unregister; + goto out_remove_stats_kobj; #endif error = xfs_qm_init(); if (error) - goto out_remove_kobj; + goto out_remove_dbg_kobj; error = register_filesystem(&xfs_fs_type); if (error) @@ -1859,11 +1883,15 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); - out_remove_kobj: + out_remove_dbg_kobj: #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); - out_kset_unregister: + out_remove_stats_kobj: #endif + xfs_sysfs_del(&xfsstats.xs_kobj); + out_free_stats: + free_percpu(xfsstats.xs_stats); + out_kset_unregister: kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); @@ -1889,6 +1917,8 @@ exit_xfs_fs(void) #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); #endif + xfs_sysfs_del(&xfsstats.xs_kobj); + free_percpu(xfsstats.xs_stats); kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); @@ -1896,6 +1926,7 @@ exit_xfs_fs(void) xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_zones(); + xfs_uuid_table_free(); } module_init(init_xfs_fs); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index a0c8067cea6f..aed74d3f8da9 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -19,6 +19,7 @@ #include <linux/sysctl.h> #include <linux/proc_fs.h> #include "xfs_error.h" +#include "xfs_stats.h" static struct ctl_table_header *xfs_table_header; @@ -31,22 +32,12 @@ xfs_stats_clear_proc_handler( size_t *lenp, loff_t *ppos) { - int c, ret, *valp = ctl->data; - __uint32_t vn_active; + int ret, *valp = ctl->data; ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { - xfs_notice(NULL, "Clearing xfsstats"); - for_each_possible_cpu(c) { - preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu(xfsstats, c).vn_active; - memset(&per_cpu(xfsstats, c), 0, - sizeof(struct xfsstats)); - per_cpu(xfsstats, c).vn_active = vn_active; - preempt_enable(); - } + xfs_stats_clearall(xfsstats.xs_stats); xfs_stats_clear = 0; } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index aa03670851d8..ee70f5dec9dc 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -21,11 +21,13 @@ #include "xfs_log_format.h" #include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_stats.h" struct xfs_sysfs_attr { struct attribute attr; - ssize_t (*show)(char *buf, void *data); - ssize_t (*store)(const char *buf, size_t count, void *data); + ssize_t (*show)(struct kobject *kobject, char *buf); + ssize_t (*store)(struct kobject *kobject, const char *buf, + size_t count); }; static inline struct xfs_sysfs_attr * @@ -38,6 +40,8 @@ to_attr(struct attribute *attr) static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) #define XFS_SYSFS_ATTR_RO(name) \ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) +#define XFS_SYSFS_ATTR_WO(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name) #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr @@ -51,14 +55,42 @@ struct kobj_type xfs_mp_ktype = { .release = xfs_sysfs_release, }; +STATIC ssize_t +xfs_sysfs_object_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0; +} + +STATIC ssize_t +xfs_sysfs_object_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0; +} + +static const struct sysfs_ops xfs_sysfs_ops = { + .show = xfs_sysfs_object_show, + .store = xfs_sysfs_object_store, +}; + #ifdef DEBUG /* debug */ STATIC ssize_t log_recovery_delay_store( + struct kobject *kobject, const char *buf, - size_t count, - void *data) + size_t count) { int ret; int val; @@ -77,8 +109,8 @@ log_recovery_delay_store( STATIC ssize_t log_recovery_delay_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); } @@ -89,52 +121,87 @@ static struct attribute *xfs_dbg_attrs[] = { NULL, }; +struct kobj_type xfs_dbg_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_dbg_attrs, +}; + +#endif /* DEBUG */ + +/* stats */ + +static inline struct xstats * +to_xstats(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xstats, xs_kobj); +} + STATIC ssize_t -xfs_dbg_show( - struct kobject *kobject, - struct attribute *attr, - char *buf) +stats_show( + struct kobject *kobject, + char *buf) { - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + struct xstats *stats = to_xstats(kobject); - return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; + return xfs_stats_format(stats->xs_stats, buf); } +XFS_SYSFS_ATTR_RO(stats); STATIC ssize_t -xfs_dbg_store( - struct kobject *kobject, - struct attribute *attr, - const char *buf, - size_t count) +stats_clear_store( + struct kobject *kobject, + const char *buf, + size_t count) { - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + int ret; + int val; + struct xstats *stats = to_xstats(kobject); + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; - return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; + if (val != 1) + return -EINVAL; + + xfs_stats_clearall(stats->xs_stats); + return count; } +XFS_SYSFS_ATTR_WO(stats_clear); -static struct sysfs_ops xfs_dbg_ops = { - .show = xfs_dbg_show, - .store = xfs_dbg_store, +static struct attribute *xfs_stats_attrs[] = { + ATTR_LIST(stats), + ATTR_LIST(stats_clear), + NULL, }; -struct kobj_type xfs_dbg_ktype = { +struct kobj_type xfs_stats_ktype = { .release = xfs_sysfs_release, - .sysfs_ops = &xfs_dbg_ops, - .default_attrs = xfs_dbg_attrs, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_stats_attrs, }; -#endif /* DEBUG */ - /* xlog */ +static inline struct xlog * +to_xlog(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xlog, l_kobj); +} + STATIC ssize_t log_head_lsn_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int block; + struct xlog *log = to_xlog(kobject); spin_lock(&log->l_icloglock); cycle = log->l_curr_cycle; @@ -147,12 +214,12 @@ XFS_SYSFS_ATTR_RO(log_head_lsn); STATIC ssize_t log_tail_lsn_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int block; + struct xlog *log = to_xlog(kobject); xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); @@ -161,12 +228,13 @@ XFS_SYSFS_ATTR_RO(log_tail_lsn); STATIC ssize_t reserve_grant_head_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) + { - struct xlog *log = data; int cycle; int bytes; + struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); @@ -175,12 +243,12 @@ XFS_SYSFS_ATTR_RO(reserve_grant_head); STATIC ssize_t write_grant_head_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int bytes; + struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); @@ -195,45 +263,8 @@ static struct attribute *xfs_log_attrs[] = { NULL, }; -static inline struct xlog * -to_xlog(struct kobject *kobject) -{ - struct xfs_kobj *kobj = to_kobj(kobject); - return container_of(kobj, struct xlog, l_kobj); -} - -STATIC ssize_t -xfs_log_show( - struct kobject *kobject, - struct attribute *attr, - char *buf) -{ - struct xlog *log = to_xlog(kobject); - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); - - return xfs_attr->show ? xfs_attr->show(buf, log) : 0; -} - -STATIC ssize_t -xfs_log_store( - struct kobject *kobject, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xlog *log = to_xlog(kobject); - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); - - return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; -} - -static struct sysfs_ops xfs_log_ops = { - .show = xfs_log_show, - .store = xfs_log_store, -}; - struct kobj_type xfs_log_ktype = { .release = xfs_sysfs_release, - .sysfs_ops = &xfs_log_ops, + .sysfs_ops = &xfs_sysfs_ops, .default_attrs = xfs_log_attrs, }; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 240eee35f342..be692e59938d 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -22,6 +22,7 @@ extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ extern struct kobj_type xfs_dbg_ktype; /* debug */ extern struct kobj_type xfs_log_ktype; /* xlog */ +extern struct kobj_type xfs_stats_ktype; /* stats */ static inline struct xfs_kobj * to_kobj(struct kobject *kobject) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5ed36b1e04c1..877079eb0f8f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); +DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), @@ -1312,6 +1313,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); +DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a0ab1dae9c31..748b16aff45a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -930,9 +930,9 @@ __xfs_trans_commit( */ if (sync) { error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); - XFS_STATS_INC(xs_trans_sync); + XFS_STATS_INC(mp, xs_trans_sync); } else { - XFS_STATS_INC(xs_trans_async); + XFS_STATS_INC(mp, xs_trans_async); } return error; @@ -955,7 +955,7 @@ out_unreserve: xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); xfs_trans_free(tp); - XFS_STATS_INC(xs_trans_empty); + XFS_STATS_INC(mp, xs_trans_empty); return error; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1098cf490189..aa67339b9537 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -349,7 +349,7 @@ xfsaild_push( xfs_ail_min_lsn(ailp))) { ailp->xa_log_flush = 0; - XFS_STATS_INC(xs_push_ail_flush); + XFS_STATS_INC(mp, xs_push_ail_flush); xfs_log_force(mp, XFS_LOG_SYNC); } @@ -371,7 +371,7 @@ xfsaild_push( goto out_done; } - XFS_STATS_INC(xs_push_ail); + XFS_STATS_INC(mp, xs_push_ail); lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { @@ -385,7 +385,7 @@ xfsaild_push( lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list); switch (lock_result) { case XFS_ITEM_SUCCESS: - XFS_STATS_INC(xs_push_ail_success); + XFS_STATS_INC(mp, xs_push_ail_success); trace_xfs_ail_push(lip); ailp->xa_last_pushed_lsn = lsn; @@ -403,7 +403,7 @@ xfsaild_push( * re-try the flushing relatively soon if most of the * AIL is beeing flushed. */ - XFS_STATS_INC(xs_push_ail_flushing); + XFS_STATS_INC(mp, xs_push_ail_flushing); trace_xfs_ail_flushing(lip); flushing++; @@ -411,14 +411,14 @@ xfsaild_push( break; case XFS_ITEM_PINNED: - XFS_STATS_INC(xs_push_ail_pinned); + XFS_STATS_INC(mp, xs_push_ail_pinned); trace_xfs_ail_pinned(lip); stuck++; ailp->xa_log_flush++; break; case XFS_ITEM_LOCKED: - XFS_STATS_INC(xs_push_ail_locked); + XFS_STATS_INC(mp, xs_push_ail_locked); trace_xfs_ail_locked(lip); stuck++; @@ -497,6 +497,7 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; + set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 17280cd71934..b97f1df910ab 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -108,6 +108,15 @@ xfs_trans_log_inode( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* + * Record the specific change for fdatasync optimisation. This + * allows fdatasync to skip log forces for inodes that are only + * timestamp dirty. We do this before the change count so that + * the core being logged in this case does not impact on fdatasync + * behaviour. + */ + ip->i_itemp->ili_fsync_fields |= flags; + + /* * First time we log the inode in a transaction, bump the inode change * counter if it is configured for this to occur. We don't use * inode_inc_version() because there is no need for extra locking around diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index c036815183cb..839b35ca21c6 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -32,9 +32,10 @@ static int -xfs_xattr_get(struct dentry *dentry, const char *name, - void *value, size_t size, int xflags) +xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *value, size_t size) { + int xflags = handler->flags; struct xfs_inode *ip = XFS_I(d_inode(dentry)); int error, asize = size; @@ -53,11 +54,35 @@ xfs_xattr_get(struct dentry *dentry, const char *name, return asize; } +void +xfs_forget_acl( + struct inode *inode, + const char *name, + int xflags) +{ + /* + * Invalidate any cached ACLs if the user has bypassed the ACL + * interface. We don't validate the content whatsoever so it is caller + * responsibility to provide data in valid format and ensure i_mode is + * consistent. + */ + if (xflags & ATTR_ROOT) { +#ifdef CONFIG_XFS_POSIX_ACL + if (!strcmp(name, SGI_ACL_FILE)) + forget_cached_acl(inode, ACL_TYPE_ACCESS); + else if (!strcmp(name, SGI_ACL_DEFAULT)) + forget_cached_acl(inode, ACL_TYPE_DEFAULT); +#endif + } +} + static int -xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags, int xflags) +xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) { - struct xfs_inode *ip = XFS_I(d_inode(dentry)); + int xflags = handler->flags; + struct xfs_inode *ip = XFS_I(d_inode(dentry)); + int error; if (strcmp(name, "") == 0) return -EINVAL; @@ -70,8 +95,12 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, if (!value) return xfs_attr_remove(ip, (unsigned char *)name, xflags); - return xfs_attr_set(ip, (unsigned char *)name, + error = xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); + if (!error) + xfs_forget_acl(d_inode(dentry), name, xflags); + + return error; } static const struct xattr_handler xfs_xattr_user_handler = { |