diff options
Diffstat (limited to 'fs')
105 files changed, 2097 insertions, 1801 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 664991afe0c0..a6bb530b1ec5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -165,6 +165,7 @@ config HUGETLB_PAGE def_bool HUGETLBFS source "fs/configfs/Kconfig" +source "fs/efivarfs/Kconfig" endmenu @@ -209,7 +210,6 @@ source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" source "fs/f2fs/Kconfig" -source "fs/efivarfs/Kconfig" endif # MISC_FILESYSTEMS @@ -1140,6 +1140,13 @@ static long aio_read_events_ring(struct kioctx *ctx, long ret = 0; int copy_ret; + /* + * The mutex can block and wake us up and that will cause + * wait_event_interruptible_hrtimeout() to schedule without sleeping + * and repeat. This should be rare enough that it doesn't cause + * peformance issues. See the comment in read_events() for more detail. + */ + sched_annotate_sleep(); mutex_lock(&ctx->ring_lock); /* Access to ->ring_pages here is protected by ctx->ring_lock. */ diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index a66768ebc8d1..80e9c18ea64f 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -8,6 +8,7 @@ config BTRFS_FS select LZO_DECOMPRESS select RAID6_PQ select XOR_BLOCKS + select SRCU help Btrfs is a general purpose copy-on-write filesystem with extents, diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2d3e32ebfd15..8729cf68d2fe 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1552,7 +1552,6 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, { int ret; int type; - struct btrfs_tree_block_info *info; struct btrfs_extent_inline_ref *eiref; if (*ptr == (unsigned long)-1) @@ -1573,9 +1572,17 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, } /* we can treat both ref types equally here */ - info = (struct btrfs_tree_block_info *)(ei + 1); *out_root = btrfs_extent_inline_ref_offset(eb, eiref); - *out_level = btrfs_tree_block_level(eb, info); + + if (key->type == BTRFS_EXTENT_ITEM_KEY) { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + *out_level = btrfs_tree_block_level(eb, info); + } else { + ASSERT(key->type == BTRFS_METADATA_ITEM_KEY); + *out_level = (u8)key->offset; + } if (ret == 1) *ptr = (unsigned long)-1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7e607416755a..0b180708bf79 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1171,6 +1171,7 @@ struct btrfs_space_info { struct percpu_counter total_bytes_pinned; struct list_head list; + /* Protected by the spinlock 'lock'. */ struct list_head ro_bgs; struct rw_semaphore groups_sem; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 054577bddaf2..de4e70fb3cbb 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1857,6 +1857,14 @@ int btrfs_delayed_delete_inode_ref(struct inode *inode) { struct btrfs_delayed_node *delayed_node; + /* + * we don't do delayed inode updates during log recovery because it + * leads to enospc problems. This means we also can't do + * delayed inode refs + */ + if (BTRFS_I(inode)->root->fs_info->log_root_recovering) + return -EAGAIN; + delayed_node = btrfs_get_or_create_delayed_node(inode); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a80b97100d90..a684086c3c81 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3139,9 +3139,11 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); - if (ret < 0) + if (ret) { + if (ret > 0) + ret = -ENOENT; goto fail; - BUG_ON(ret); /* Corruption */ + } leaf = path->nodes[0]; bi = btrfs_item_ptr_offset(leaf, path->slots[0]); @@ -3149,11 +3151,9 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); fail: - if (ret) { + if (ret) btrfs_abort_transaction(trans, root, ret); - return ret; - } - return 0; + return ret; } @@ -9422,7 +9422,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * are still on the list after taking the semaphore */ list_del_init(&block_group->list); - list_del_init(&block_group->ro_list); if (list_empty(&block_group->space_info->block_groups[index])) { kobj = block_group->space_info->block_group_kobjs[index]; block_group->space_info->block_group_kobjs[index] = NULL; @@ -9464,6 +9463,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, btrfs_remove_free_space_cache(block_group); spin_lock(&block_group->space_info->lock); + list_del_init(&block_group->ro_list); block_group->space_info->total_bytes -= block_group->key.offset; block_group->space_info->bytes_readonly -= block_group->key.offset; block_group->space_info->disk_total -= block_group->key.offset * factor; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4ebabd237153..790dbae3343c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2190,7 +2190,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) next = next_state(state); - failrec = (struct io_failure_record *)state->private; + failrec = (struct io_failure_record *)(unsigned long)state->private; free_extent_state(state); kfree(failrec); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e687bb0dc73a..8bf326affb94 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6255,8 +6255,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: btrfs_end_transaction(trans, root); - if (drop_on_err) + if (drop_on_err) { + inode_dec_link_count(inode); iput(inode); + } btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f2bb13a23f86..e427cb7ee12c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2607,9 +2607,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, flags, gen, mirror_num, have_csum ? csum : NULL); -skip: if (ret) return ret; +skip: len -= l; logical += l; physical += l; @@ -3053,7 +3053,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ppath = btrfs_alloc_path(); if (!ppath) { - btrfs_free_path(ppath); + btrfs_free_path(path); return -ENOMEM; } @@ -3065,6 +3065,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, path->search_commit_root = 1; path->skip_locking = 1; + ppath->search_commit_root = 1; + ppath->skip_locking = 1; /* * trigger the readahead for extent tree csum tree and wait for * completion. During readahead, the scrub is officially paused diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 60f7cbe815e9..6f49b2872a64 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1000,10 +1000,20 @@ int btrfs_sync_fs(struct super_block *sb, int wait) */ if (fs_info->pending_changes == 0) return 0; + /* + * A non-blocking test if the fs is frozen. We must not + * start a new transaction here otherwise a deadlock + * happens. The pending operations are delayed to the + * next commit after thawing. + */ + if (__sb_start_write(sb, SB_FREEZE_WRITE, false)) + __sb_end_write(sb, SB_FREEZE_WRITE); + else + return 0; trans = btrfs_start_transaction(root, 0); - } else { - return PTR_ERR(trans); } + if (IS_ERR(trans)) + return PTR_ERR(trans); } return btrfs_commit_transaction(trans, root); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a605d4e2f2bc..e88b59d13439 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2118,7 +2118,7 @@ void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) unsigned long prev; unsigned long bit; - prev = cmpxchg(&fs_info->pending_changes, 0, 0); + prev = xchg(&fs_info->pending_changes, 0); if (!prev) return; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9a02da16f2be..1a9585d4380a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2591,6 +2591,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { + blk_finish_plug(&plug); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; goto out; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f5013d92a7e6..c81c0e004588 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1416,7 +1416,7 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, } } - dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n", + dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n", inode, ceph_vinop(inode), len, locked_page); if (len > 0) { diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index c35c5c614e38..06ea5cd05cd9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -239,23 +239,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) return err; } -/** - * Must be called with lock_flocks() already held. Fills in the passed - * counter variables, so you can prepare pagelist metadata before calling - * ceph_encode_locks. +/* + * Fills in the passed counter variables, so you can prepare pagelist metadata + * before calling ceph_encode_locks. */ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) { - struct file_lock *lock; + struct file_lock_context *ctx; *fcntl_count = 0; *flock_count = 0; - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_POSIX) - ++(*fcntl_count); - else if (lock->fl_flags & FL_FLOCK) - ++(*flock_count); + ctx = inode->i_flctx; + if (ctx) { + *fcntl_count = ctx->flc_posix_cnt; + *flock_count = ctx->flc_flock_cnt; } dout("counted %d flock locks and %d fcntl locks", *flock_count, *fcntl_count); @@ -271,6 +269,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int num_fcntl_locks, int num_flock_locks) { struct file_lock *lock; + struct file_lock_context *ctx = inode->i_flctx; int err = 0; int seen_fcntl = 0; int seen_flock = 0; @@ -279,33 +278,34 @@ int ceph_encode_locks_to_buffer(struct inode *inode, dout("encoding %d flock and %d fcntl locks", num_flock_locks, num_fcntl_locks); - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_POSIX) { - ++seen_fcntl; - if (seen_fcntl > num_fcntl_locks) { - err = -ENOSPC; - goto fail; - } - err = lock_to_ceph_filelock(lock, &flocks[l]); - if (err) - goto fail; - ++l; + if (!ctx) + return 0; + + spin_lock(&ctx->flc_lock); + list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + ++seen_fcntl; + if (seen_fcntl > num_fcntl_locks) { + err = -ENOSPC; + goto fail; } + err = lock_to_ceph_filelock(lock, &flocks[l]); + if (err) + goto fail; + ++l; } - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_FLOCK) { - ++seen_flock; - if (seen_flock > num_flock_locks) { - err = -ENOSPC; - goto fail; - } - err = lock_to_ceph_filelock(lock, &flocks[l]); - if (err) - goto fail; - ++l; + list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + ++seen_flock; + if (seen_flock > num_flock_locks) { + err = -ENOSPC; + goto fail; } + err = lock_to_ceph_filelock(lock, &flocks[l]); + if (err) + goto fail; + ++l; } fail: + spin_unlock(&ctx->flc_lock); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d2171f4a6980..5f62fb7a5d0a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2700,20 +2700,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_filelock *flocks; encode_again: - spin_lock(&inode->i_lock); ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - spin_unlock(&inode->i_lock); flocks = kmalloc((num_fcntl_locks+num_flock_locks) * sizeof(struct ceph_filelock), GFP_NOFS); if (!flocks) { err = -ENOMEM; goto out_free; } - spin_lock(&inode->i_lock); err = ceph_encode_locks_to_buffer(inode, flocks, num_fcntl_locks, num_flock_locks); - spin_unlock(&inode->i_lock); if (err) { kfree(flocks); if (err == -ENOSPC) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 9c56ef776407..7febcf2475c5 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -606,9 +606,11 @@ cifs_security_flags_handle_must_flags(unsigned int *flags) *flags = CIFSSEC_MUST_NTLMV2; else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM) *flags = CIFSSEC_MUST_NTLM; - else if ((*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN) + else if (CIFSSEC_MUST_LANMAN && + (*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN) *flags = CIFSSEC_MUST_LANMAN; - else if ((*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT) + else if (CIFSSEC_MUST_PLNTXT && + (*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT) *flags = CIFSSEC_MUST_PLNTXT; *flags |= signflags; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 96b7e9b7706d..c1a86764bbf7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -366,6 +366,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) struct cifsLockInfo *li, *tmp; struct cifs_fid fid; struct cifs_pending_open open; + bool oplock_break_cancelled; spin_lock(&cifs_file_list_lock); if (--cifs_file->count > 0) { @@ -397,7 +398,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) } spin_unlock(&cifs_file_list_lock); - cancel_work_sync(&cifs_file->oplock_break); + oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break); if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; @@ -409,6 +410,9 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) _free_xid(xid); } + if (oplock_break_cancelled) + cifs_done_oplock_break(cifsi); + cifs_del_pending_open(&open); /* @@ -1109,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } -/* copied from fs/locks.c with a name change */ -#define cifs_for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; \ - lockp = &(*lockp)->fl_next) - struct lock_to_push { struct list_head llist; __u64 offset; @@ -1128,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) { struct inode *inode = cfile->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct file_lock *flock, **before; - unsigned int count = 0, i = 0; + struct file_lock *flock; + struct file_lock_context *flctx = inode->i_flctx; + unsigned int i; int rc = 0, xid, type; struct list_head locks_to_send, *el; struct lock_to_push *lck, *tmp; @@ -1137,21 +1137,17 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) xid = get_xid(); - spin_lock(&inode->i_lock); - cifs_for_each_lock(inode, before) { - if ((*before)->fl_flags & FL_POSIX) - count++; - } - spin_unlock(&inode->i_lock); + if (!flctx) + goto out; INIT_LIST_HEAD(&locks_to_send); /* - * Allocating count locks is enough because no FL_POSIX locks can be - * added to the list while we are holding cinode->lock_sem that + * Allocating flc_posix_cnt locks is enough because no FL_POSIX locks + * can be added to the list while we are holding cinode->lock_sem that * protects locking operations of this inode. */ - for (; i < count; i++) { + for (i = 0; i < flctx->flc_posix_cnt; i++) { lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); if (!lck) { rc = -ENOMEM; @@ -1161,11 +1157,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) } el = locks_to_send.next; - spin_lock(&inode->i_lock); - cifs_for_each_lock(inode, before) { - flock = *before; - if ((flock->fl_flags & FL_POSIX) == 0) - continue; + spin_lock(&flctx->flc_lock); + list_for_each_entry(flock, &flctx->flc_posix, fl_list) { if (el == &locks_to_send) { /* * The list ended. We don't have enough allocated @@ -1185,9 +1178,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->length = length; lck->type = type; lck->offset = flock->fl_start; - el = el->next; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { int stored_rc; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 45cb59bcc791..8b7898b7670f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -86,21 +86,16 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, } src_inode = file_inode(src_file.file); + rc = -EINVAL; + if (S_ISDIR(src_inode->i_mode)) + goto out_fput; /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants * server could even support copy of range where source = target */ - - /* so we do not deadlock racing two ioctls on same files */ - if (target_inode < src_inode) { - mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD); - } + lock_two_nondirectories(target_inode, src_inode); /* determine range to clone */ rc = -EINVAL; @@ -124,13 +119,7 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, out_unlock: /* although unlocking in the reverse order from locking is not strictly necessary here it is a little cleaner to be consistent */ - if (target_inode < src_inode) { - mutex_unlock(&src_inode->i_mutex); - mutex_unlock(&target_inode->i_mutex); - } else { - mutex_unlock(&target_inode->i_mutex); - mutex_unlock(&src_inode->i_mutex); - } + unlock_two_nondirectories(src_inode, target_inode); out_fput: fdput(src_file); out_drop_write: diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 6c1566366a66..a4232ec4f2ba 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -221,7 +221,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16, } rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__le16)); - memset(wpwd, 0, 129 * sizeof(__le16)); + memzero_explicit(wpwd, sizeof(wpwd)); return rc; } diff --git a/fs/efivarfs/Kconfig b/fs/efivarfs/Kconfig index 367bbb10c543..c2499ef174a2 100644 --- a/fs/efivarfs/Kconfig +++ b/fs/efivarfs/Kconfig @@ -1,6 +1,7 @@ config EFIVAR_FS tristate "EFI Variable filesystem" depends on EFI + default m help efivarfs is a replacement filesystem for the old EFI variable support via sysfs, as it doesn't suffer from the diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 6dad1176ec52..ddbce42548c9 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -140,7 +140,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, name[len] = '-'; - efi_guid_unparse(&entry->var.VendorGuid, name + len + 1); + efi_guid_to_str(&entry->var.VendorGuid, name + len + 1); name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 9b4e7d750d4f..d4dbf3c259b3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -466,6 +466,8 @@ static void ext3_put_super (struct super_block * sb) } sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); + mutex_destroy(&sbi->s_orphan_lock); + mutex_destroy(&sbi->s_resize_lock); kfree(sbi); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 74c5f53595fb..ac64edbe501d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1046,10 +1046,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, struct path *path); -static int ext4_quota_on_sysfile(struct super_block *sb, int type, - int format_id); static int ext4_quota_off(struct super_block *sb, int type); -static int ext4_quota_off_sysfile(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -1084,16 +1081,6 @@ static const struct quotactl_ops ext4_qctl_operations = { .get_dqblk = dquot_get_dqblk, .set_dqblk = dquot_set_dqblk }; - -static const struct quotactl_ops ext4_qctl_sysfile_operations = { - .quota_on_meta = ext4_quota_on_sysfile, - .quota_off = ext4_quota_off_sysfile, - .quota_sync = dquot_quota_sync, - .get_info = dquot_get_dqinfo, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .set_dqblk = dquot_set_dqblk -}; #endif static const struct super_operations ext4_sops = { @@ -3935,7 +3922,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) - sb->s_qcop = &ext4_qctl_sysfile_operations; + sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &ext4_qctl_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; @@ -5288,21 +5275,6 @@ static int ext4_enable_quotas(struct super_block *sb) return 0; } -/* - * quota_on function that is used when QUOTA feature is set. - */ -static int ext4_quota_on_sysfile(struct super_block *sb, int type, - int format_id) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) - return -EINVAL; - - /* - * USAGE was enabled at mount time. Only need to enable LIMITS now. - */ - return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED); -} - static int ext4_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; @@ -5329,18 +5301,6 @@ out: return dquot_quota_off(sb, type); } -/* - * quota_off function that is used when QUOTA feature is set. - */ -static int ext4_quota_off_sysfile(struct super_block *sb, int type) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) - return -EINVAL; - - /* Disable only the limits. */ - return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); -} - /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and no one else should touch the files) diff --git a/fs/fcntl.c b/fs/fcntl.c index 99d440a4a6ba..ee85cd4e136a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -740,14 +740,15 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( + BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC | O_PATH | __O_TMPFILE + __FMODE_EXEC | O_PATH | __O_TMPFILE | + __FMODE_NONOTIFY )); fasync_cache = kmem_cache_create("fasync_cache", diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ba1107977f2e..ed19a7d622fa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -131,6 +131,13 @@ static void fuse_req_init_context(struct fuse_req *req) req->in.h.pid = current->pid; } +void fuse_set_initialized(struct fuse_conn *fc) +{ + /* Make sure stores before this are seen on another CPU */ + smp_wmb(); + fc->initialized = 1; +} + static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) { return !fc->initialized || (for_background && fc->blocked); @@ -155,6 +162,8 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, if (intr) goto out; } + /* Matches smp_wmb() in fuse_set_initialized() */ + smp_rmb(); err = -ENOTCONN; if (!fc->connected) @@ -253,6 +262,8 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, atomic_inc(&fc->num_waiting); wait_event(fc->blocked_waitq, fc->initialized); + /* Matches smp_wmb() in fuse_set_initialized() */ + smp_rmb(); req = fuse_request_alloc(0); if (!req) req = get_reserved_req(fc, file); @@ -511,6 +522,39 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } EXPORT_SYMBOL_GPL(fuse_request_send); +static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) +{ + if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS) + args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE; + + if (fc->minor < 9) { + switch (args->in.h.opcode) { + case FUSE_LOOKUP: + case FUSE_CREATE: + case FUSE_MKNOD: + case FUSE_MKDIR: + case FUSE_SYMLINK: + case FUSE_LINK: + args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + break; + case FUSE_GETATTR: + case FUSE_SETATTR: + args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + break; + } + } + if (fc->minor < 12) { + switch (args->in.h.opcode) { + case FUSE_CREATE: + args->in.args[0].size = sizeof(struct fuse_open_in); + break; + case FUSE_MKNOD: + args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE; + break; + } + } +} + ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) { struct fuse_req *req; @@ -520,6 +564,9 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) if (IS_ERR(req)) return PTR_ERR(req); + /* Needs to be done after fuse_get_req() so that fc->minor is valid */ + fuse_adjust_compat(fc, args); + req->in.h.opcode = args->in.h.opcode; req->in.h.nodeid = args->in.h.nodeid; req->in.numargs = args->in.numargs; @@ -2127,7 +2174,7 @@ void fuse_abort_conn(struct fuse_conn *fc) if (fc->connected) { fc->connected = 0; fc->blocked = 0; - fc->initialized = 1; + fuse_set_initialized(fc); end_io_requests(fc); end_queued_requests(fc); end_polls(fc); @@ -2146,7 +2193,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; - fc->initialized = 1; + fuse_set_initialized(fc); end_queued_requests(fc); end_polls(fc); wake_up_all(&fc->blocked_waitq); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 252b8a5de8b5..08e7b1a9d5d0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -156,10 +156,7 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, args->in.args[0].size = name->len + 1; args->in.args[0].value = name->name; args->out.numargs = 1; - if (fc->minor < 9) - args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; - else - args->out.args[0].size = sizeof(struct fuse_entry_out); + args->out.args[0].size = sizeof(struct fuse_entry_out); args->out.args[0].value = outarg; } @@ -422,16 +419,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.in.h.opcode = FUSE_CREATE; args.in.h.nodeid = get_node_id(dir); args.in.numargs = 2; - args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : - sizeof(inarg); + args.in.args[0].size = sizeof(inarg); args.in.args[0].value = &inarg; args.in.args[1].size = entry->d_name.len + 1; args.in.args[1].value = entry->d_name.name; args.out.numargs = 2; - if (fc->minor < 9) - args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; - else - args.out.args[0].size = sizeof(outentry); + args.out.args[0].size = sizeof(outentry); args.out.args[0].value = &outentry; args.out.args[1].size = sizeof(outopen); args.out.args[1].value = &outopen; @@ -539,10 +532,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, memset(&outarg, 0, sizeof(outarg)); args->in.h.nodeid = get_node_id(dir); args->out.numargs = 1; - if (fc->minor < 9) - args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; - else - args->out.args[0].size = sizeof(outarg); + args->out.args[0].size = sizeof(outarg); args->out.args[0].value = &outarg; err = fuse_simple_request(fc, args); if (err) @@ -592,8 +582,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, inarg.umask = current_umask(); args.in.h.opcode = FUSE_MKNOD; args.in.numargs = 2; - args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : - sizeof(inarg); + args.in.args[0].size = sizeof(inarg); args.in.args[0].value = &inarg; args.in.args[1].size = entry->d_name.len + 1; args.in.args[1].value = entry->d_name.name; @@ -899,10 +888,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, args.in.args[0].size = sizeof(inarg); args.in.args[0].value = &inarg; args.out.numargs = 1; - if (fc->minor < 9) - args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; - else - args.out.args[0].size = sizeof(outarg); + args.out.args[0].size = sizeof(outarg); args.out.args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) { @@ -1574,10 +1560,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, args->in.args[0].size = sizeof(*inarg_p); args->in.args[0].value = inarg_p; args->out.numargs = 1; - if (fc->minor < 9) - args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; - else - args->out.args[0].size = sizeof(*outarg_p); + args->out.args[0].size = sizeof(*outarg_p); args->out.args[0].value = outarg_p; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e0fc6725d1d0..1cdfb07c1376 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -906,4 +906,6 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file); +void fuse_set_initialized(struct fuse_conn *fc); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6749109f255d..f38256e4476e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -424,8 +424,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) args.in.h.opcode = FUSE_STATFS; args.in.h.nodeid = get_node_id(dentry->d_inode); args.out.numargs = 1; - args.out.args[0].size = - fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); + args.out.args[0].size = sizeof(outarg); args.out.args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) @@ -898,7 +897,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } - fc->initialized = 1; + fuse_set_initialized(fc); wake_up_all(&fc->blocked_waitq); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c8b148bbdc8b..3e193cb36996 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -667,7 +667,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, s64 change, struct gfs2_quota_data *qd, - struct fs_disk_quota *fdq) + struct qc_dqblk *fdq) { struct inode *inode = &ip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -697,16 +697,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, be64_add_cpu(&q.qu_value, change); qd->qd_qb.qb_value = q.qu_value; if (fdq) { - if (fdq->d_fieldmask & FS_DQ_BSOFT) { - q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + if (fdq->d_fieldmask & QC_SPC_SOFT) { + q.qu_warn = cpu_to_be64(fdq->d_spc_softlimit >> sdp->sd_sb.sb_bsize_shift); qd->qd_qb.qb_warn = q.qu_warn; } - if (fdq->d_fieldmask & FS_DQ_BHARD) { - q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + if (fdq->d_fieldmask & QC_SPC_HARD) { + q.qu_limit = cpu_to_be64(fdq->d_spc_hardlimit >> sdp->sd_sb.sb_bsize_shift); qd->qd_qb.qb_limit = q.qu_limit; } - if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + if (fdq->d_fieldmask & QC_SPACE) { + q.qu_value = cpu_to_be64(fdq->d_space >> sdp->sd_sb.sb_bsize_shift); qd->qd_qb.qb_value = q.qu_value; } } @@ -1497,7 +1497,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, } static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_lvb *qlvb; @@ -1505,7 +1505,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, struct gfs2_holder q_gh; int error; - memset(fdq, 0, sizeof(struct fs_disk_quota)); + memset(fdq, 0, sizeof(*fdq)); if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return -ESRCH; /* Crazy XFS error code */ @@ -1522,12 +1522,9 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, goto out; qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; - fdq->d_version = FS_DQUOT_VERSION; - fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA; - fdq->d_id = from_kqid_munged(current_user_ns(), qid); - fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; - fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; - fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; + fdq->d_spc_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_sb.sb_bsize_shift; + fdq->d_spc_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_sb.sb_bsize_shift; + fdq->d_space = be64_to_cpu(qlvb->qb_value) << sdp->sd_sb.sb_bsize_shift; gfs2_glock_dq_uninit(&q_gh); out: @@ -1536,10 +1533,10 @@ out: } /* GFS2 only supports a subset of the XFS fields */ -#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) +#define GFS2_FIELDMASK (QC_SPC_SOFT|QC_SPC_HARD|QC_SPACE) static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); @@ -1583,17 +1580,17 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, goto out_i; /* If nothing has changed, this is a no-op */ - if ((fdq->d_fieldmask & FS_DQ_BSOFT) && - ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) - fdq->d_fieldmask ^= FS_DQ_BSOFT; + if ((fdq->d_fieldmask & QC_SPC_SOFT) && + ((fdq->d_spc_softlimit >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) + fdq->d_fieldmask ^= QC_SPC_SOFT; - if ((fdq->d_fieldmask & FS_DQ_BHARD) && - ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) - fdq->d_fieldmask ^= FS_DQ_BHARD; + if ((fdq->d_fieldmask & QC_SPC_HARD) && + ((fdq->d_spc_hardlimit >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) + fdq->d_fieldmask ^= QC_SPC_HARD; - if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && - ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) - fdq->d_fieldmask ^= FS_DQ_BCOUNT; + if ((fdq->d_fieldmask & QC_SPACE) && + ((fdq->d_space >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_value))) + fdq->d_fieldmask ^= QC_SPACE; if (fdq->d_fieldmask == 0) goto out_i; diff --git a/fs/inode.c b/fs/inode.c index aa149e7262ac..f30872ade6d7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -194,7 +194,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) #ifdef CONFIG_FSNOTIFY inode->i_fsnotify_mask = 0; #endif - + inode->i_flctx = NULL; this_cpu_inc(nr_inodes); return 0; @@ -237,6 +237,7 @@ void __destroy_inode(struct inode *inode) BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); fsnotify_inode_delete(inode); + locks_free_lock_context(inode->i_flctx); if (!inode->i_nlink) { WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); atomic_long_dec(&inode->i_sb->s_remove_count); diff --git a/fs/isofs/util.c b/fs/isofs/util.c index 01e1ee7a998b..005a15cfd30a 100644 --- a/fs/isofs/util.c +++ b/fs/isofs/util.c @@ -2,6 +2,7 @@ * linux/fs/isofs/util.c */ +#include <linux/time.h> #include "isofs.h" /* @@ -17,9 +18,9 @@ int iso_date(char * p, int flag) { int year, month, day, hour, minute, second, tz; - int crtime, days, i; + int crtime; - year = p[0] - 70; + year = p[0]; month = p[1]; day = p[2]; hour = p[3]; @@ -31,18 +32,7 @@ int iso_date(char * p, int flag) if (year < 0) { crtime = 0; } else { - int monlen[12] = {31,28,31,30,31,30,31,31,30,31,30,31}; - - days = year * 365; - if (year > 2) - days += (year+1) / 4; - for (i = 1; i < month; i++) - days += monlen[i-1]; - if (((year+2) % 4) == 0 && month > 2) - days++; - days += day - 1; - crtime = ((((days * 24) + hour) * 60 + minute) * 60) - + second; + crtime = mktime64(year+1900, month, day, hour, minute, second); /* sign extend */ if (tz & 0x80) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 37989f02a226..2d881b381d2b 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -201,10 +201,14 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) static int kernfs_name_compare(unsigned int hash, const char *name, const void *ns, const struct kernfs_node *kn) { - if (hash != kn->hash) - return hash - kn->hash; - if (ns != kn->ns) - return ns - kn->ns; + if (hash < kn->hash) + return -1; + if (hash > kn->hash) + return 1; + if (ns < kn->ns) + return -1; + if (ns > kn->ns) + return 1; return strcmp(name, kn->name); } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e94c887da2d7..55505cbe11af 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -138,10 +138,6 @@ lockd(void *vrqstp) dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); - if (!nlm_timeout) - nlm_timeout = LOCKD_DFLT_TIMEO; - nlmsvc_timeout = nlm_timeout * HZ; - /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. @@ -350,6 +346,10 @@ static struct svc_serv *lockd_create_svc(void) printk(KERN_WARNING "lockd_up: no pid, %d users??\n", nlmsvc_users); + if (!nlm_timeout) + nlm_timeout = LOCKD_DFLT_TIMEO; + nlmsvc_timeout = nlm_timeout * HZ; + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d12ff4e2dbe7..665ef5a05183 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; struct nlm_host *lockhost; + if (!flctx || list_empty_careful(&flctx->flc_posix)) + return 0; again: file->f_locks = 0; - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl; fl = fl->fl_next) { + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { if (fl->fl_lmops != &nlmsvc_lock_operations) continue; @@ -180,7 +183,7 @@ again: if (match(lockhost, host)) { struct file_lock lock = *fl; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; @@ -192,7 +195,7 @@ again: goto again; } } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); return 0; } @@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl; fl = fl->fl_next) { - if (fl->fl_lmops == &nlmsvc_lock_operations) { - spin_unlock(&inode->i_lock); - return 1; + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_lmops == &nlmsvc_lock_operations) { + spin_unlock(&flctx->flc_lock); + return 1; + } } + spin_unlock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); file->f_locks = 0; return 0; } diff --git a/fs/locks.c b/fs/locks.c index 735b8d3fa78c..4d0d41163a50 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl) int leases_enable = 1; int lease_break_time = 45; -#define for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) - /* * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock via * the file_lock_lglock. Note that alterations to the list also require that - * the relevant i_lock is held. + * the relevant flc_lock is held. */ DEFINE_STATIC_LGLOCK(file_lock_lglock); static DEFINE_PER_CPU(struct hlist_head, file_lock_list); @@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, - * we often hold the i_lock as well. In certain cases, when reading the fields + * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the - * i_lock. + * flc_lock. * * In particular, adding an entry to the fl_block list requires that you hold - * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting - * an entry from the list however only requires the file_lock_lock. + * both the flc_lock and the blocked_lock_lock (acquired in that order). + * Deleting an entry from the list however only requires the file_lock_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); +static struct kmem_cache *flctx_cache __read_mostly; static struct kmem_cache *filelock_cache __read_mostly; +static struct file_lock_context * +locks_get_lock_context(struct inode *inode) +{ + struct file_lock_context *new; + + if (likely(inode->i_flctx)) + goto out; + + new = kmem_cache_alloc(flctx_cache, GFP_KERNEL); + if (!new) + goto out; + + spin_lock_init(&new->flc_lock); + INIT_LIST_HEAD(&new->flc_flock); + INIT_LIST_HEAD(&new->flc_posix); + INIT_LIST_HEAD(&new->flc_lease); + + /* + * Assign the pointer if it's not already assigned. If it is, then + * free the context we just allocated. + */ + spin_lock(&inode->i_lock); + if (likely(!inode->i_flctx)) { + inode->i_flctx = new; + new = NULL; + } + spin_unlock(&inode->i_lock); + + if (new) + kmem_cache_free(flctx_cache, new); +out: + return inode->i_flctx; +} + +void +locks_free_lock_context(struct file_lock_context *ctx) +{ + if (ctx) { + WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); + WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); + WARN_ON_ONCE(!list_empty(&ctx->flc_lease)); + kmem_cache_free(flctx_cache, ctx); + } +} + static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_list); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); } @@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private); void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); + BUG_ON(!list_empty(&fl->fl_list)); BUG_ON(!list_empty(&fl->fl_block)); BUG_ON(!hlist_unhashed(&fl->fl_link)); @@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose) struct file_lock *fl; while (!list_empty(dispose)) { - fl = list_first_entry(dispose, struct file_lock, fl_block); - list_del_init(&fl->fl_block); + fl = list_first_entry(dispose, struct file_lock, fl_list); + list_del_init(&fl->fl_list); locks_free_lock(fl); } } @@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner; } -/* Must be called with the i_lock held! */ +/* Must be called with the flc_lock held! */ static void locks_insert_global_locks(struct file_lock *fl) { lg_local_lock(&file_lock_lglock); @@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl) lg_local_unlock(&file_lock_lglock); } -/* Must be called with the i_lock held! */ +/* Must be called with the flc_lock held! */ static void locks_delete_global_locks(struct file_lock *fl) { /* * Avoid taking lock if already unhashed. This is safe since this check - * is done while holding the i_lock, and new insertions into the list + * is done while holding the flc_lock, and new insertions into the list * also require that it be held. */ if (hlist_unhashed(&fl->fl_link)) @@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter) * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. * - * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the blocked_lock_lock, but by ensuring that the - * i_lock is also held on insertions we can avoid taking the blocked_lock_lock - * in some cases when we see that the fl_block list is empty. + * Must be called with both the flc_lock and blocked_lock_lock held. The + * fl_block list itself is protected by the blocked_lock_lock, but by ensuring + * that the flc_lock is also held on insertions we can avoid taking the + * blocked_lock_lock in some cases when we see that the fl_block list is empty. */ static void __locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) @@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker, locks_insert_global_blocked(waiter); } -/* Must be called with i_lock held. */ +/* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { @@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker, /* * Wake up processes blocked waiting for blocker. * - * Must be called with the inode->i_lock held! + * Must be called with the inode->flc_lock held! */ static void locks_wake_up_blocks(struct file_lock *blocker) { /* * Avoid taking global lock if list is empty. This is safe since new - * blocked requests are only added to the list under the i_lock, and - * the i_lock is always held here. Note that removal from the fl_block - * list does not require the i_lock, so we must recheck list_empty() + * blocked requests are only added to the list under the flc_lock, and + * the flc_lock is always held here. Note that removal from the fl_block + * list does not require the flc_lock, so we must recheck list_empty() * after acquiring the blocked_lock_lock. */ if (list_empty(&blocker->fl_block)) @@ -635,63 +680,36 @@ static void locks_wake_up_blocks(struct file_lock *blocker) spin_unlock(&blocked_lock_lock); } -/* Insert file lock fl into an inode's lock list at the position indicated - * by pos. At the same time add the lock to the global file lock list. - * - * Must be called with the i_lock held! - */ -static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) +static void +locks_insert_lock_ctx(struct file_lock *fl, int *counter, + struct list_head *before) { fl->fl_nspid = get_pid(task_tgid(current)); - - /* insert into file's list */ - fl->fl_next = *pos; - *pos = fl; - + list_add_tail(&fl->fl_list, before); + ++*counter; locks_insert_global_locks(fl); } -/** - * locks_delete_lock - Delete a lock and then free it. - * @thisfl_p: pointer that points to the fl_next field of the previous - * inode->i_flock list entry - * - * Unlink a lock from all lists and free the namespace reference, but don't - * free it yet. Wake up processes that are blocked waiting for this lock and - * notify the FS that the lock has been cleared. - * - * Must be called with the i_lock held! - */ -static void locks_unlink_lock(struct file_lock **thisfl_p) +static void +locks_unlink_lock_ctx(struct file_lock *fl, int *counter) { - struct file_lock *fl = *thisfl_p; - locks_delete_global_locks(fl); - - *thisfl_p = fl->fl_next; - fl->fl_next = NULL; - + list_del_init(&fl->fl_list); + --*counter; if (fl->fl_nspid) { put_pid(fl->fl_nspid); fl->fl_nspid = NULL; } - locks_wake_up_blocks(fl); } -/* - * Unlink a lock from all lists and free it. - * - * Must be called with i_lock held! - */ -static void locks_delete_lock(struct file_lock **thisfl_p, - struct list_head *dispose) +static void +locks_delete_lock_ctx(struct file_lock *fl, int *counter, + struct list_head *dispose) { - struct file_lock *fl = *thisfl_p; - - locks_unlink_lock(thisfl_p); + locks_unlink_lock_ctx(fl, counter); if (dispose) - list_add(&fl->fl_block, dispose); + list_add(&fl->fl_list, dispose); else locks_free_lock(fl); } @@ -746,22 +764,27 @@ void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; + struct file_lock_context *ctx; struct inode *inode = file_inode(filp); - spin_lock(&inode->i_lock); - for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { - if (!IS_POSIX(cfl)) - continue; - if (posix_locks_conflict(fl, cfl)) - break; - } - if (cfl) { - locks_copy_conflock(fl, cfl); - if (cfl->fl_nspid) - fl->fl_pid = pid_vnr(cfl->fl_nspid); - } else + ctx = inode->i_flctx; + if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; - spin_unlock(&inode->i_lock); + return; + } + + spin_lock(&ctx->flc_lock); + list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { + if (posix_locks_conflict(fl, cfl)) { + locks_copy_conflock(fl, cfl); + if (cfl->fl_nspid) + fl->fl_pid = pid_vnr(cfl->fl_nspid); + goto out; + } + } + fl->fl_type = F_UNLCK; +out: + spin_unlock(&ctx->flc_lock); return; } EXPORT_SYMBOL(posix_test_lock); @@ -845,34 +868,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, static int flock_lock_file(struct file *filp, struct file_lock *request) { struct file_lock *new_fl = NULL; - struct file_lock **before; - struct inode * inode = file_inode(filp); + struct file_lock *fl; + struct file_lock_context *ctx; + struct inode *inode = file_inode(filp); int error = 0; - int found = 0; + bool found = false; LIST_HEAD(dispose); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { new_fl = locks_alloc_lock(); if (!new_fl) return -ENOMEM; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; - for_each_lock(inode, before) { - struct file_lock *fl = *before; - if (IS_POSIX(fl)) - break; - if (IS_LEASE(fl)) - continue; + list_for_each_entry(fl, &ctx->flc_flock, fl_list) { if (filp != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; - found = 1; - locks_delete_lock(before, &dispose); + found = true; + locks_delete_lock_ctx(fl, &ctx->flc_flock_cnt, &dispose); break; } @@ -887,18 +910,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) { - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); cond_resched(); - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); } find_conflict: - for_each_lock(inode, before) { - struct file_lock *fl = *before; - if (IS_POSIX(fl)) - break; - if (IS_LEASE(fl)) - continue; + list_for_each_entry(fl, &ctx->flc_flock, fl_list) { if (!flock_locks_conflict(request, fl)) continue; error = -EAGAIN; @@ -911,12 +929,12 @@ find_conflict: if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &ctx->flc_flock_cnt, &ctx->flc_flock); new_fl = NULL; error = 0; out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); @@ -925,16 +943,20 @@ out: static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { - struct file_lock *fl; + struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; - struct file_lock **before; + struct file_lock_context *ctx; int error; bool added = false; LIST_HEAD(dispose); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. @@ -948,15 +970,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str new_fl2 = locks_alloc_lock(); } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If * there are any, either return error or put the request on the * blocker's list of waiters and the global blocked_hash. */ if (request->fl_type != F_UNLCK) { - for_each_lock(inode, before) { - fl = *before; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) @@ -986,29 +1007,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (request->fl_flags & FL_ACCESS) goto out; - /* - * Find the first old lock with the same owner as the new lock. - */ - - before = &inode->i_flock; - - /* First skip locks owned by other processes. */ - while ((fl = *before) && (!IS_POSIX(fl) || - !posix_same_owner(request, fl))) { - before = &fl->fl_next; + /* Find the first old lock with the same owner as the new lock */ + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { + if (posix_same_owner(request, fl)) + break; } /* Process locks with this owner. */ - while ((fl = *before) && posix_same_owner(request, fl)) { - /* Detect adjacent or overlapping regions (if same lock type) - */ + list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) { + if (!posix_same_owner(request, fl)) + break; + + /* Detect adjacent or overlapping regions (if same lock type) */ if (request->fl_type == fl->fl_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative. */ if (fl->fl_end < request->fl_start - 1) - goto next_lock; + continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ @@ -1029,18 +1046,18 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str else request->fl_end = fl->fl_end; if (added) { - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, &ctx->flc_posix_cnt, + &dispose); continue; } request = fl; added = true; - } - else { + } else { /* Processing for different lock types is a bit * more complex. */ if (fl->fl_end < request->fl_start) - goto next_lock; + continue; if (fl->fl_start > request->fl_end) break; if (request->fl_type == F_UNLCK) @@ -1059,7 +1076,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str * one (This may happen several times). */ if (added) { - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, + &ctx->flc_posix_cnt, &dispose); continue; } /* @@ -1075,15 +1093,13 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_copy_lock(new_fl, request); request = new_fl; new_fl = NULL; - locks_delete_lock(before, &dispose); - locks_insert_lock(before, request); + locks_insert_lock_ctx(request, + &ctx->flc_posix_cnt, &fl->fl_list); + locks_delete_lock_ctx(fl, + &ctx->flc_posix_cnt, &dispose); added = true; } } - /* Go on to next lock. - */ - next_lock: - before = &fl->fl_next; } /* @@ -1108,7 +1124,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str goto out; } locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &ctx->flc_posix_cnt, + &fl->fl_list); new_fl = NULL; } if (right) { @@ -1119,7 +1136,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str left = new_fl2; new_fl2 = NULL; locks_copy_lock(left, right); - locks_insert_lock(before, left); + locks_insert_lock_ctx(left, &ctx->flc_posix_cnt, + &fl->fl_list); } right->fl_start = request->fl_end + 1; locks_wake_up_blocks(right); @@ -1129,7 +1147,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_wake_up_blocks(left); } out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); /* * Free any unused locks. */ @@ -1199,22 +1217,29 @@ EXPORT_SYMBOL(posix_lock_file_wait); */ int locks_mandatory_locked(struct file *file) { + int ret; struct inode *inode = file_inode(file); + struct file_lock_context *ctx; struct file_lock *fl; + ctx = inode->i_flctx; + if (!ctx || list_empty_careful(&ctx->flc_posix)) + return 0; + /* * Search the lock list for this inode for any POSIX locks. */ - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!IS_POSIX(fl)) - continue; + spin_lock(&ctx->flc_lock); + ret = 0; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (fl->fl_owner != current->files && - fl->fl_owner != file) + fl->fl_owner != file) { + ret = -EAGAIN; break; + } } - spin_unlock(&inode->i_lock); - return fl ? -EAGAIN : 0; + spin_unlock(&ctx->flc_lock); + return ret; } /** @@ -1294,9 +1319,9 @@ static void lease_clear_pending(struct file_lock *fl, int arg) } /* We already had a lease on this file; just change its type */ -int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) +int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) { - struct file_lock *fl = *before; + struct file_lock_context *flctx; int error = assign_type(fl, arg); if (error) @@ -1306,6 +1331,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) if (arg == F_UNLCK) { struct file *filp = fl->fl_file; + flctx = file_inode(filp)->i_flctx; f_delown(filp); filp->f_owner.signum = 0; fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); @@ -1313,7 +1339,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); fl->fl_fasync = NULL; } - locks_delete_lock(before, dispose); + locks_delete_lock_ctx(fl, &flctx->flc_lease_cnt, dispose); } return 0; } @@ -1329,20 +1355,17 @@ static bool past_time(unsigned long then) static void time_out_leases(struct inode *inode, struct list_head *dispose) { - struct file_lock **before; - struct file_lock *fl; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl, *tmp; - lockdep_assert_held(&inode->i_lock); + lockdep_assert_held(&ctx->flc_lock); - before = &inode->i_flock; - while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) - lease_modify(before, F_RDLCK, dispose); + lease_modify(fl, F_RDLCK, dispose); if (past_time(fl->fl_break_time)) - lease_modify(before, F_UNLCK, dispose); - if (fl == *before) /* lease_modify may have freed fl */ - before = &fl->fl_next; + lease_modify(fl, F_UNLCK, dispose); } } @@ -1356,11 +1379,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) static bool any_leases_conflict(struct inode *inode, struct file_lock *breaker) { + struct file_lock_context *ctx = inode->i_flctx; struct file_lock *fl; - lockdep_assert_held(&inode->i_lock); + lockdep_assert_held(&ctx->flc_lock); - for (fl = inode->i_flock ; fl && IS_LEASE(fl); fl = fl->fl_next) { + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (leases_conflict(fl, breaker)) return true; } @@ -1384,7 +1408,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; struct file_lock *new_fl; - struct file_lock *fl, **before; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl; unsigned long break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); @@ -1394,7 +1419,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) return PTR_ERR(new_fl); new_fl->fl_flags = type; - spin_lock(&inode->i_lock); + /* typically we will check that ctx is non-NULL before calling */ + if (!ctx) { + WARN_ON_ONCE(1); + return error; + } + + spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); @@ -1408,9 +1439,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) break_time++; /* so that 0 means no break time */ } - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (!leases_conflict(fl, new_fl)) continue; if (want_write) { @@ -1419,17 +1448,17 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) fl->fl_flags |= FL_UNLOCK_PENDING; fl->fl_break_time = break_time; } else { - if (lease_breaking(inode->i_flock)) + if (lease_breaking(fl)) continue; fl->fl_flags |= FL_DOWNGRADE_PENDING; fl->fl_downgrade_time = break_time; } if (fl->fl_lmops->lm_break(fl)) - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, &ctx->flc_lease_cnt, + &dispose); } - fl = inode->i_flock; - if (!fl || !IS_LEASE(fl)) + if (list_empty(&ctx->flc_lease)) goto out; if (mode & O_NONBLOCK) { @@ -1439,18 +1468,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) } restart: - break_time = inode->i_flock->fl_break_time; + fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list); + break_time = fl->fl_break_time; if (break_time != 0) break_time -= jiffies; if (break_time == 0) break_time++; - locks_insert_block(inode->i_flock, new_fl); + locks_insert_block(fl, new_fl); trace_break_lease_block(inode, new_fl); - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, !new_fl->fl_next, break_time); - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); trace_break_lease_unblock(inode, new_fl); locks_delete_block(new_fl); if (error >= 0) { @@ -1462,12 +1492,10 @@ restart: time_out_leases(inode, &dispose); if (any_leases_conflict(inode, new_fl)) goto restart; - error = 0; } - out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; @@ -1487,14 +1515,18 @@ EXPORT_SYMBOL(__break_lease); void lease_get_mtime(struct inode *inode, struct timespec *time) { bool has_lease = false; - struct file_lock *flock; + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl; - if (inode->i_flock) { - spin_lock(&inode->i_lock); - flock = inode->i_flock; - if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) - has_lease = true; - spin_unlock(&inode->i_lock); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + if (!list_empty(&ctx->flc_lease)) { + fl = list_first_entry(&ctx->flc_lease, + struct file_lock, fl_list); + if (fl->fl_type == F_WRLCK) + has_lease = true; + } + spin_unlock(&ctx->flc_lock); } if (has_lease) @@ -1532,20 +1564,22 @@ int fcntl_getlease(struct file *filp) { struct file_lock *fl; struct inode *inode = file_inode(filp); + struct file_lock_context *ctx = inode->i_flctx; int type = F_UNLCK; LIST_HEAD(dispose); - spin_lock(&inode->i_lock); - time_out_leases(file_inode(filp), &dispose); - for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); - fl = fl->fl_next) { - if (fl->fl_file == filp) { + if (ctx && !list_empty_careful(&ctx->flc_lease)) { + spin_lock(&ctx->flc_lock); + time_out_leases(file_inode(filp), &dispose); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file != filp) + continue; type = target_leasetype(fl); break; } + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); } - spin_unlock(&inode->i_lock); - locks_dispose_list(&dispose); return type; } @@ -1578,9 +1612,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg) static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { - struct file_lock *fl, **before, **my_before = NULL, *lease; + struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; LIST_HEAD(dispose); @@ -1588,6 +1623,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr lease = *flp; trace_generic_add_lease(inode, lease); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + /* * In the delegation case we need mutual exclusion with * a number of operations that take the i_mutex. We trylock @@ -1606,7 +1645,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr return -EINVAL; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(dentry, arg); if (error) @@ -1621,13 +1660,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr * except for this filp. */ error = -EAGAIN; - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file == filp) { - my_before = before; + my_fl = fl; continue; } + /* * No exclusive leases if someone else has a lease on * this file: @@ -1642,9 +1680,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr goto out; } - if (my_before != NULL) { - lease = *my_before; - error = lease->fl_lmops->lm_change(my_before, arg, &dispose); + if (my_fl != NULL) { + error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); if (error) goto out; goto out_setup; @@ -1654,7 +1691,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr if (!leases_enable) goto out; - locks_insert_lock(before, lease); + locks_insert_lock_ctx(lease, &ctx->flc_lease_cnt, &ctx->flc_lease); /* * The check in break_lease() is lockless. It's possible for another * open to race in after we did the earlier check for a conflicting @@ -1666,45 +1703,49 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr */ smp_mb(); error = check_conflicting_open(dentry, arg); - if (error) - goto out_unlink; + if (error) { + locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt); + goto out; + } out_setup: if (lease->fl_lmops->lm_setup) lease->fl_lmops->lm_setup(lease, priv); out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); if (is_deleg) mutex_unlock(&inode->i_mutex); - if (!error && !my_before) + if (!error && !my_fl) *flp = NULL; return error; -out_unlink: - locks_unlink_lock(before); - goto out; } static int generic_delete_lease(struct file *filp) { int error = -EAGAIN; - struct file_lock *fl, **before; + struct file_lock *fl, *victim = NULL; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + struct file_lock_context *ctx = inode->i_flctx; LIST_HEAD(dispose); - spin_lock(&inode->i_lock); - time_out_leases(inode, &dispose); - for (before = &inode->i_flock; - ((fl = *before) != NULL) && IS_LEASE(fl); - before = &fl->fl_next) { - if (fl->fl_file == filp) + if (!ctx) { + trace_generic_delete_lease(inode, NULL); + return error; + } + + spin_lock(&ctx->flc_lock); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file == filp) { + victim = fl; break; + } } trace_generic_delete_lease(inode, fl); - if (fl) - error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose); - spin_unlock(&inode->i_lock); + if (victim) + error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); return error; } @@ -2171,7 +2212,7 @@ again: */ /* * we need that spin_lock here - it prevents reordering between - * update of inode->i_flock and check for it done in close(). + * update of i_flctx->flc_posix and check for it done in close(). * rcu_read_lock() wouldn't do. */ spin_lock(¤t->files->file_lock); @@ -2331,13 +2372,14 @@ out: void locks_remove_posix(struct file *filp, fl_owner_t owner) { struct file_lock lock; + struct file_lock_context *ctx = file_inode(filp)->i_flctx; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - if (!file_inode(filp)->i_flock) + if (!ctx || list_empty(&ctx->flc_posix)) return; lock.fl_type = F_UNLCK; @@ -2358,67 +2400,67 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) EXPORT_SYMBOL(locks_remove_posix); +/* The i_flctx must be valid when calling into here */ +static void +locks_remove_flock(struct file *filp) +{ + struct file_lock fl = { + .fl_owner = filp, + .fl_pid = current->tgid, + .fl_file = filp, + .fl_flags = FL_FLOCK, + .fl_type = F_UNLCK, + .fl_end = OFFSET_MAX, + }; + struct file_lock_context *flctx = file_inode(filp)->i_flctx; + + if (list_empty(&flctx->flc_flock)) + return; + + if (filp->f_op->flock) + filp->f_op->flock(filp, F_SETLKW, &fl); + else + flock_lock_file(filp, &fl); + + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); +} + +/* The i_flctx must be valid when calling into here */ +static void +locks_remove_lease(struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct file_lock_context *ctx = inode->i_flctx; + struct file_lock *fl, *tmp; + LIST_HEAD(dispose); + + if (list_empty(&ctx->flc_lease)) + return; + + spin_lock(&ctx->flc_lock); + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) + lease_modify(fl, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); + locks_dispose_list(&dispose); +} + /* * This function is called on the last close of an open file. */ void locks_remove_file(struct file *filp) { - struct inode * inode = file_inode(filp); - struct file_lock *fl; - struct file_lock **before; - LIST_HEAD(dispose); - - if (!inode->i_flock) + if (!file_inode(filp)->i_flctx) return; + /* remove any OFD locks */ locks_remove_posix(filp, filp); - if (filp->f_op->flock) { - struct file_lock fl = { - .fl_owner = filp, - .fl_pid = current->tgid, - .fl_file = filp, - .fl_flags = FL_FLOCK, - .fl_type = F_UNLCK, - .fl_end = OFFSET_MAX, - }; - filp->f_op->flock(filp, F_SETLKW, &fl); - if (fl.fl_ops && fl.fl_ops->fl_release_private) - fl.fl_ops->fl_release_private(&fl); - } - - spin_lock(&inode->i_lock); - before = &inode->i_flock; - - while ((fl = *before) != NULL) { - if (fl->fl_file == filp) { - if (IS_LEASE(fl)) { - lease_modify(before, F_UNLCK, &dispose); - continue; - } - - /* - * There's a leftover lock on the list of a type that - * we didn't expect to see. Most likely a classic - * POSIX lock that ended up not getting released - * properly, or that raced onto the list somehow. Log - * some info about it and then just remove it from - * the list. - */ - WARN(!IS_FLOCK(fl), - "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", - MAJOR(inode->i_sb->s_dev), - MINOR(inode->i_sb->s_dev), inode->i_ino, - fl->fl_type, fl->fl_flags, - fl->fl_start, fl->fl_end); + /* remove flock locks */ + locks_remove_flock(filp); - locks_delete_lock(before, &dispose); - continue; - } - before = &fl->fl_next; - } - spin_unlock(&inode->i_lock); - locks_dispose_list(&dispose); + /* remove any leases */ + locks_remove_lease(filp); } /** @@ -2621,6 +2663,9 @@ static int __init filelock_init(void) { int i; + flctx_cache = kmem_cache_create("file_lock_ctx", + sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); + filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f3f60641344..8cdb2b28a104 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ { struct inode *inode = state->inode; struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; + struct list_head *list; int status = 0; - if (inode->i_flock == NULL) + if (flctx == NULL) goto out; - /* Protect inode->i_flock using the i_lock */ - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) - continue; + list = &flctx->flc_posix; + spin_lock(&flctx->flc_lock); +restart: + list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file) != ctx) continue; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); + if (list == &flctx->flc_posix) { + list = &flctx->flc_flock; + goto restart; + } + spin_unlock(&flctx->flc_lock); out: return status; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 10bf07280f4a..294692ff83b1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -212,6 +212,12 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { + struct inode *inode = iocb->ki_filp->f_mapping->host; + + /* we only support swap file calling nfs_direct_IO */ + if (!IS_SWAPFILE(inode)) + return 0; + #ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp, (long long) pos, iter->nr_segs); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4bffe637ea32..2211f6ba8736 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -352,8 +352,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_attr_check_mountpoint(sb, fattr); - if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) && - !nfs_attr_use_mounted_on_fileid(fattr)) + if (nfs_attr_use_mounted_on_fileid(fattr)) + fattr->fileid = fattr->mounted_on_fileid; + else if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) goto out_no_inode; if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index efaa31c70fbe..b6f34bfa6fe8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -31,8 +31,6 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) && ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0))) return 0; - - fattr->fileid = fattr->mounted_on_fileid; return 1; } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 03311259b0c4..706ad10b8186 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -228,6 +228,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) kfree(clp->cl_serverowner); kfree(clp->cl_serverscope); kfree(clp->cl_implid); + kfree(clp->cl_owner_id); } void nfs4_free_client(struct nfs_client *clp) @@ -452,6 +453,14 @@ static void nfs4_swap_callback_idents(struct nfs_client *keep, spin_unlock(&nn->nfs_client_lock); } +static bool nfs4_match_client_owner_id(const struct nfs_client *clp1, + const struct nfs_client *clp2) +{ + if (clp1->cl_owner_id == NULL || clp2->cl_owner_id == NULL) + return true; + return strcmp(clp1->cl_owner_id, clp2->cl_owner_id) == 0; +} + /** * nfs40_walk_client_list - Find server that recognizes a client ID * @@ -483,9 +492,6 @@ int nfs40_walk_client_list(struct nfs_client *new, if (pos->rpc_ops != new->rpc_ops) continue; - if (pos->cl_proto != new->cl_proto) - continue; - if (pos->cl_minorversion != new->cl_minorversion) continue; @@ -510,6 +516,9 @@ int nfs40_walk_client_list(struct nfs_client *new, if (pos->cl_clientid != new->cl_clientid) continue; + if (!nfs4_match_client_owner_id(pos, new)) + continue; + atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -566,20 +575,14 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) } /* - * Returns true if the server owners match + * Returns true if the server major ids match */ static bool -nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b) { struct nfs41_server_owner *o1 = a->cl_serverowner; struct nfs41_server_owner *o2 = b->cl_serverowner; - if (o1->minor_id != o2->minor_id) { - dprintk("NFS: --> %s server owner minor IDs do not match\n", - __func__); - return false; - } - if (o1->major_id_sz != o2->major_id_sz) goto out_major_mismatch; if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) @@ -621,9 +624,6 @@ int nfs41_walk_client_list(struct nfs_client *new, if (pos->rpc_ops != new->rpc_ops) continue; - if (pos->cl_proto != new->cl_proto) - continue; - if (pos->cl_minorversion != new->cl_minorversion) continue; @@ -639,7 +639,7 @@ int nfs41_walk_client_list(struct nfs_client *new, prev = pos; status = nfs_wait_client_init_complete(pos); - if (status == 0) { + if (pos->cl_cons_state == NFS_CS_SESSION_INITING) { nfs4_schedule_lease_recovery(pos); status = nfs4_wait_clnt_recover(pos); } @@ -654,7 +654,19 @@ int nfs41_walk_client_list(struct nfs_client *new, if (!nfs4_match_clientids(pos, new)) continue; - if (!nfs4_match_serverowners(pos, new)) + /* + * Note that session trunking is just a special subcase of + * client id trunking. In either case, we want to fall back + * to using the existing nfs_client. + */ + if (!nfs4_check_clientid_trunking(pos, new)) + continue; + + /* Unlike NFSv4.0, we know that NFSv4.1 always uses the + * uniform string, however someone might switch the + * uniquifier string on us. + */ + if (!nfs4_match_client_owner_id(pos, new)) continue; atomic_inc(&pos->cl_count); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e7f8d5ff2581..c347705b0161 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1117,8 +1117,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) return 0; if ((delegation->type & fmode) != fmode) return 0; - if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) - return 0; if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return 0; nfs_mark_delegation_referenced(delegation); @@ -4917,11 +4915,14 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, } static unsigned int -nfs4_init_nonuniform_client_string(const struct nfs_client *clp, +nfs4_init_nonuniform_client_string(struct nfs_client *clp, char *buf, size_t len) { unsigned int result; + if (clp->cl_owner_id != NULL) + return strlcpy(buf, clp->cl_owner_id, len); + rcu_read_lock(); result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", clp->cl_ipaddr, @@ -4930,24 +4931,32 @@ nfs4_init_nonuniform_client_string(const struct nfs_client *clp, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); rcu_read_unlock(); + clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); return result; } static unsigned int -nfs4_init_uniform_client_string(const struct nfs_client *clp, +nfs4_init_uniform_client_string(struct nfs_client *clp, char *buf, size_t len) { const char *nodename = clp->cl_rpcclient->cl_nodename; + unsigned int result; + + if (clp->cl_owner_id != NULL) + return strlcpy(buf, clp->cl_owner_id, len); if (nfs4_client_id_uniquifier[0] != '\0') - return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", + result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", clp->rpc_ops->version, clp->cl_minorversion, nfs4_client_id_uniquifier, nodename); - return scnprintf(buf, len, "Linux NFSv%u.%u %s", + else + result = scnprintf(buf, len, "Linux NFSv%u.%u %s", clp->rpc_ops->version, clp->cl_minorversion, nodename); + clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); + return result; } /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5194933ed419..a3bb22ab68c5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1366,49 +1366,55 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ struct nfs_inode *nfsi = NFS_I(inode); struct file_lock *fl; int status = 0; + struct file_lock_context *flctx = inode->i_flctx; + struct list_head *list; - if (inode->i_flock == NULL) + if (flctx == NULL) return 0; + list = &flctx->flc_posix; + /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); - /* Protect inode->i_flock using the BKL */ - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) - continue; + spin_lock(&flctx->flc_lock); +restart: + list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file)->state != state) continue; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); status = ops->recover_lock(state, fl); switch (status) { - case 0: - break; - case -ESTALE: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - goto out; - default: - printk(KERN_ERR "NFS: %s: unhandled error %d\n", - __func__, status); - case -ENOMEM: - case -NFS4ERR_DENIED: - case -NFS4ERR_RECLAIM_BAD: - case -NFS4ERR_RECLAIM_CONFLICT: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - status = 0; + case 0: + break; + case -ESTALE: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto out; + default: + pr_err("NFS: %s: unhandled error %d\n", + __func__, status); + case -ENOMEM: + case -NFS4ERR_DENIED: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + status = 0; } - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); + if (list == &flctx->flc_posix) { + list = &flctx->flc_flock; + goto restart; + } + spin_unlock(&flctx->flc_lock); out: up_write(&nfsi->rwsem); return status; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2b5e769beb16..29c7f33c9cf1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -826,11 +826,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct nfs_pageio_descriptor *pgio) { size_t size; + struct file_lock_context *flctx; if (prev) { if (!nfs_match_open_context(req->wb_context, prev->wb_context)) return false; - if (req->wb_context->dentry->d_inode->i_flock != NULL && + flctx = req->wb_context->dentry->d_inode->i_flctx; + if (flctx != NULL && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock)) && !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) return false; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..4ae66f416eb9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_lock_context *l_ctx; + struct file_lock_context *flctx = file_inode(file)->i_flctx; struct nfs_page *req; int do_flush, status; /* @@ -1109,7 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) do_flush = req->wb_page != page || req->wb_context != ctx; /* for now, flush if more than 1 request in page_group */ do_flush |= req->wb_this_page != req; - if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { + if (l_ctx && flctx && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock))) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } @@ -1170,6 +1173,13 @@ out: return PageUptodate(page) != 0; } +static bool +is_whole_file_wrlock(struct file_lock *fl) +{ + return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && + fl->fl_type == F_WRLCK; +} + /* If we know the page is up to date, and we're not using byte range locks (or * if we have the whole file locked for writing), it may be more efficient to * extend the write to cover the entire page in order to avoid fragmentation @@ -1180,17 +1190,36 @@ out: */ static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) { + int ret; + struct file_lock_context *flctx = inode->i_flctx; + struct file_lock *fl; + if (file->f_flags & O_DSYNC) return 0; if (!nfs_write_pageuptodate(page, inode)) return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; - if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && - inode->i_flock->fl_end == OFFSET_MAX && - inode->i_flock->fl_type != F_RDLCK)) - return 1; - return 0; + if (!flctx || (list_empty_careful(&flctx->flc_flock) && + list_empty_careful(&flctx->flc_posix))) + return 0; + + /* Check to see if there are whole file write locks */ + ret = 0; + spin_lock(&flctx->flc_lock); + if (!list_empty(&flctx->flc_posix)) { + fl = list_first_entry(&flctx->flc_posix, struct file_lock, + fl_list); + if (is_whole_file_wrlock(fl)) + ret = 1; + } else if (!list_empty(&flctx->flc_flock)) { + fl = list_first_entry(&flctx->flc_flock, struct file_lock, + fl_list); + if (fl->fl_type == F_WRLCK) + ret = 1; + } + spin_unlock(&flctx->flc_lock); + return ret; } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3550a9c87616..532a60cca2fb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3477,7 +3477,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) } static int -nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) +nfsd_change_deleg_cb(struct file_lock *onlist, int arg, + struct list_head *dispose) { if (arg & F_UNLCK) return lease_modify(onlist, arg, dispose); @@ -3897,11 +3898,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = nfs4_setlease(dp); goto out; } - atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { status = -EAGAIN; goto out_unlock; } + atomic_inc(&fp->fi_delegees); hash_delegation_locked(dp, fp); status = 0; out_unlock: @@ -5556,10 +5557,11 @@ out_nfserr: static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { - struct file_lock **flpp; + struct file_lock *fl; int status = false; struct file *filp = find_any_file(fp); struct inode *inode; + struct file_lock_context *flctx; if (!filp) { /* Any valid lock stateid should have some sort of access */ @@ -5568,15 +5570,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } inode = file_inode(filp); + flctx = inode->i_flctx; - spin_lock(&inode->i_lock); - for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { - if ((*flpp)->fl_owner == (fl_owner_t)lowner) { - status = true; - break; + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&flctx->flc_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_owner == (fl_owner_t)lowner) { + status = true; + break; + } } + spin_unlock(&flctx->flc_lock); } - spin_unlock(&inode->i_lock); fput(filp); return status; } diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 91093cd74f0d..385704027575 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -141,7 +141,6 @@ enum { * @ti_save: Backup of journal_info field of task_struct * @ti_flags: Flags * @ti_count: Nest level - * @ti_garbage: List of inode to be put when releasing semaphore */ struct nilfs_transaction_info { u32 ti_magic; @@ -150,7 +149,6 @@ struct nilfs_transaction_info { one of other filesystems has a bug. */ unsigned short ti_flags; unsigned short ti_count; - struct list_head ti_garbage; }; /* ti_magic */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 7ef18fc656c2..469086b9f99b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -305,7 +305,6 @@ static void nilfs_transaction_lock(struct super_block *sb, ti->ti_count = 0; ti->ti_save = cur_ti; ti->ti_magic = NILFS_TI_MAGIC; - INIT_LIST_HEAD(&ti->ti_garbage); current->journal_info = ti; for (;;) { @@ -332,8 +331,6 @@ static void nilfs_transaction_unlock(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; - if (!list_empty(&ti->ti_garbage)) - nilfs_dispose_list(nilfs, &ti->ti_garbage, 0); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, @@ -746,6 +743,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs, } } +static void nilfs_iput_work_func(struct work_struct *work) +{ + struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info, + sc_iput_work); + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0); +} + static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, struct nilfs_root *root) { @@ -1900,8 +1906,8 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; + int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { @@ -1912,9 +1918,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_move_tail(&ii->i_dirty, &ti->ti_garbage); + list_del_init(&ii->i_dirty); + if (!ii->vfs_inode.i_nlink) { + /* + * Defer calling iput() to avoid a deadlock + * over I_SYNC flag for inodes with i_nlink == 0 + */ + list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); + defer_iput = true; + } else { + spin_unlock(&nilfs->ns_inode_lock); + iput(&ii->vfs_inode); + spin_lock(&nilfs->ns_inode_lock); + } } spin_unlock(&nilfs->ns_inode_lock); + + if (defer_iput) + schedule_work(&sci->sc_iput_work); } /* @@ -2583,6 +2604,8 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); + INIT_LIST_HEAD(&sci->sc_iput_queue); + INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); init_timer(&sci->sc_timer); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; @@ -2609,6 +2632,8 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) ret = nilfs_segctor_construct(sci, SC_LSEG_SR); nilfs_transaction_unlock(sci->sc_super); + flush_work(&sci->sc_iput_work); + } while (ret && retrycount-- > 0); } @@ -2633,6 +2658,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + if (flush_work(&sci->sc_iput_work)) + flag = true; + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); @@ -2642,6 +2670,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } + if (!list_empty(&sci->sc_iput_queue)) { + nilfs_warning(sci->sc_super, __func__, + "iput queue is not empty\n"); + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); + } + WARN_ON(!list_empty(&sci->sc_segbufs)); WARN_ON(!list_empty(&sci->sc_write_logs)); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 38a1d0013314..a48d6de1e02c 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/fs.h> #include <linux/buffer_head.h> +#include <linux/workqueue.h> #include <linux/nilfs2_fs.h> #include "nilfs.h" @@ -92,6 +93,8 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written + * @sc_iput_queue: list of inodes for which iput should be done + * @sc_iput_work: work struct to defer iput call * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -135,6 +138,8 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; + struct list_head sc_iput_queue; + struct work_struct sc_iput_work; __u64 *sc_freesegs; size_t sc_nfreesegs; diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..2a24249b30af 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -1,5 +1,6 @@ config FSNOTIFY def_bool n + select SRCU source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c991616acca9..bff8567aa42d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -259,16 +259,15 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, struct fsnotify_event *kevent; char __user *start; int ret; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; pr_debug("%s: group=%p\n", __func__, group); + add_wait_queue(&group->notification_waitq, &wait); while (1) { - prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&group->notification_mutex); kevent = get_one_event(group, count); mutex_unlock(&group->notification_mutex); @@ -289,7 +288,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, if (start != buf) break; - schedule(); + + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); continue; } @@ -318,8 +318,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, buf += ret; count -= ret; } + remove_wait_queue(&group->notification_waitq, &wait); - finish_wait(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 79b5af5e6a7b..cecd875653e4 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2023,11 +2023,8 @@ leave: dlm_lockres_drop_inflight_ref(dlm, res); spin_unlock(&res->spinlock); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - if (newlock) - dlm_lock_put(newlock); - } return ret; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b931e04e3388..914c121ec890 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -94,6 +94,14 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, struct inode *inode, const char *symname); +static int ocfs2_double_lock(struct ocfs2_super *osb, + struct buffer_head **bh1, + struct inode *inode1, + struct buffer_head **bh2, + struct inode *inode2, + int rename); + +static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2); /* An orphan dir name is an 8 byte value, printed as a hex string */ #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) @@ -678,8 +686,10 @@ static int ocfs2_link(struct dentry *old_dentry, { handle_t *handle; struct inode *inode = old_dentry->d_inode; + struct inode *old_dir = old_dentry->d_parent->d_inode; int err; struct buffer_head *fe_bh = NULL; + struct buffer_head *old_dir_bh = NULL; struct buffer_head *parent_fe_bh = NULL; struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); @@ -696,19 +706,33 @@ static int ocfs2_link(struct dentry *old_dentry, dquot_initialize(dir); - err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); + err = ocfs2_double_lock(osb, &old_dir_bh, old_dir, + &parent_fe_bh, dir, 0); if (err < 0) { if (err != -ENOENT) mlog_errno(err); return err; } + /* make sure both dirs have bhs + * get an extra ref on old_dir_bh if old==new */ + if (!parent_fe_bh) { + if (old_dir_bh) { + parent_fe_bh = old_dir_bh; + get_bh(parent_fe_bh); + } else { + mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str); + err = -EIO; + goto out; + } + } + if (!dir->i_nlink) { err = -ENOENT; goto out; } - err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name, + err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name, old_dentry->d_name.len, &old_de_ino); if (err) { err = -ENOENT; @@ -801,10 +825,11 @@ out_unlock_inode: ocfs2_inode_unlock(inode, 1); out: - ocfs2_inode_unlock(dir, 1); + ocfs2_double_unlock(old_dir, dir); brelse(fe_bh); brelse(parent_fe_bh); + brelse(old_dir_bh); ocfs2_free_dir_lookup_result(&lookup); @@ -1072,14 +1097,15 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb, } /* - * The only place this should be used is rename! + * The only place this should be used is rename and link! * if they have the same id, then the 1st one is the only one locked. */ static int ocfs2_double_lock(struct ocfs2_super *osb, struct buffer_head **bh1, struct inode *inode1, struct buffer_head **bh2, - struct inode *inode2) + struct inode *inode2, + int rename) { int status; int inode1_is_ancestor, inode2_is_ancestor; @@ -1127,7 +1153,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id2 */ status = ocfs2_inode_lock_nested(inode2, bh2, 1, - OI_LS_RENAME1); + rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -1136,7 +1162,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2); + status = ocfs2_inode_lock_nested(inode1, bh1, 1, + rename == 1 ? OI_LS_RENAME2 : OI_LS_PARENT); if (status < 0) { /* * An error return must mean that no cluster locks @@ -1252,7 +1279,7 @@ static int ocfs2_rename(struct inode *old_dir, /* if old and new are the same, this'll just do one lock. */ status = ocfs2_double_lock(osb, &old_dir_bh, old_dir, - &new_dir_bh, new_dir); + &new_dir_bh, new_dir, 1); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 1eae330193a6..b6d51333ad02 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -48,6 +48,7 @@ struct ocfs2_quota_recovery { /* In-memory structure with quota header information */ struct ocfs2_mem_dqinfo { unsigned int dqi_type; /* Quota type this structure describes */ + unsigned int dqi_flags; /* Flags OLQF_* */ unsigned int dqi_chunks; /* Number of chunks in local quota file */ unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ unsigned int dqi_syncms; /* How often should we sync with other nodes */ diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 10b653930ee2..89c0b2620814 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -292,7 +292,7 @@ static void olq_update_info(struct buffer_head *bh, void *private) ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + OCFS2_LOCAL_INFO_OFF); spin_lock(&dq_data_lock); - ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + ldinfo->dqi_flags = cpu_to_le32(oinfo->dqi_flags); ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); spin_unlock(&dq_data_lock); @@ -701,8 +701,8 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) /* We don't need the lock and we have to acquire quota file locks * which will later depend on this lock */ mutex_unlock(&sb_dqopt(sb)->dqio_mutex); - info->dqi_maxblimit = 0x7fffffffffffffffLL; - info->dqi_maxilimit = 0x7fffffffffffffffLL; + info->dqi_max_spc_limit = 0x7fffffffffffffffLL; + info->dqi_max_ino_limit = 0x7fffffffffffffffLL; oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); if (!oinfo) { mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" @@ -737,13 +737,13 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) } ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + OCFS2_LOCAL_INFO_OFF); - info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); + oinfo->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); oinfo->dqi_libh = bh; /* We crashed when using local quota file? */ - if (!(info->dqi_flags & OLQF_CLEAN)) { + if (!(oinfo->dqi_flags & OLQF_CLEAN)) { rec = OCFS2_SB(sb)->quota_rec; if (!rec) { rec = ocfs2_alloc_quota_recovery(); @@ -772,7 +772,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) } /* Now mark quota file as used */ - info->dqi_flags &= ~OLQF_CLEAN; + oinfo->dqi_flags &= ~OLQF_CLEAN; status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info); if (status < 0) { mlog_errno(status); @@ -857,7 +857,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) goto out; /* Mark local file as clean */ - info->dqi_flags |= OLQF_CLEAN; + oinfo->dqi_flags |= OLQF_CLEAN; status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], oinfo->dqi_libh, olq_update_info, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 83723179e1ec..706c71c2955d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1000,36 +1000,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) } } -/* Handle quota on quotactl */ -static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) -{ - unsigned int feature[OCFS2_MAXQUOTAS] = { - OCFS2_FEATURE_RO_COMPAT_USRQUOTA, - OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; - - if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) - return -EINVAL; - - return dquot_enable(sb_dqopt(sb)->files[type], type, - format_id, DQUOT_LIMITS_ENABLED); -} - -/* Handle quota off quotactl */ -static int ocfs2_quota_off(struct super_block *sb, int type) -{ - return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); -} - -static const struct quotactl_ops ocfs2_quotactl_ops = { - .quota_on_meta = ocfs2_quota_on, - .quota_off = ocfs2_quota_off, - .quota_sync = dquot_quota_sync, - .get_info = dquot_get_dqinfo, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .set_dqblk = dquot_set_dqblk, -}; - static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -2079,7 +2049,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_op = &ocfs2_sops; sb->s_d_op = &ocfs2_dentry_ops; sb->s_export_op = &ocfs2_export_ops; - sb->s_qcop = &ocfs2_quotactl_ops; + sb->s_qcop = &dquot_quotactl_sysfile_ops; sb->dq_op = &ocfs2_quota_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_xattr = ocfs2_xattr_handlers; diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index c51df1dd237e..4a09975aac90 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -5,6 +5,7 @@ config QUOTA bool "Quota support" select QUOTACTL + select SRCU help If you say Y here, you will be able to set per user limits for disk usage (also called disk quotas). Currently, it works for the diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8f0acef3d184..0ccd4ba3a246 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1248,7 +1248,7 @@ static int ignore_hardlimit(struct dquot *dquot) return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || - !(info->dqi_flags & V1_DQF_RSQUASH)); + !(info->dqi_flags & DQF_ROOT_SQUASH)); } /* needs dq_data_lock */ @@ -2385,41 +2385,106 @@ out: } EXPORT_SYMBOL(dquot_quota_on_mount); -static inline qsize_t qbtos(qsize_t blocks) +static int dquot_quota_enable(struct super_block *sb, unsigned int flags) { - return blocks << QIF_DQBLKSIZE_BITS; + int ret; + int type; + struct quota_info *dqopt = sb_dqopt(sb); + + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) + return -ENOSYS; + /* Accounting cannot be turned on while fs is mounted */ + flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT); + if (!flags) + return -EINVAL; + for (type = 0; type < MAXQUOTAS; type++) { + if (!(flags & qtype_enforce_flag(type))) + continue; + /* Can't enforce without accounting */ + if (!sb_has_quota_usage_enabled(sb, type)) + return -EINVAL; + ret = dquot_enable(dqopt->files[type], type, + dqopt->info[type].dqi_fmt_id, + DQUOT_LIMITS_ENABLED); + if (ret < 0) + goto out_err; + } + return 0; +out_err: + /* Backout enforcement enablement we already did */ + for (type--; type >= 0; type--) { + if (flags & qtype_enforce_flag(type)) + dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); + } + /* Error code translation for better compatibility with XFS */ + if (ret == -EBUSY) + ret = -EEXIST; + return ret; } -static inline qsize_t stoqb(qsize_t space) +static int dquot_quota_disable(struct super_block *sb, unsigned int flags) { - return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; + int ret; + int type; + struct quota_info *dqopt = sb_dqopt(sb); + + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) + return -ENOSYS; + /* + * We don't support turning off accounting via quotactl. In principle + * quota infrastructure can do this but filesystems don't expect + * userspace to be able to do it. + */ + if (flags & + (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT)) + return -EOPNOTSUPP; + + /* Filter out limits not enabled */ + for (type = 0; type < MAXQUOTAS; type++) + if (!sb_has_quota_limits_enabled(sb, type)) + flags &= ~qtype_enforce_flag(type); + /* Nothing left? */ + if (!flags) + return -EEXIST; + for (type = 0; type < MAXQUOTAS; type++) { + if (flags & qtype_enforce_flag(type)) { + ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); + if (ret < 0) + goto out_err; + } + } + return 0; +out_err: + /* Backout enforcement disabling we already did */ + for (type--; type >= 0; type--) { + if (flags & qtype_enforce_flag(type)) + dquot_enable(dqopt->files[type], type, + dqopt->info[type].dqi_fmt_id, + DQUOT_LIMITS_ENABLED); + } + return ret; } /* Generic routine for getting common part of quota structure */ -static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) +static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; memset(di, 0, sizeof(*di)); - di->d_version = FS_DQUOT_VERSION; - di->d_flags = dquot->dq_id.type == USRQUOTA ? - FS_USER_QUOTA : FS_GROUP_QUOTA; - di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id); - spin_lock(&dq_data_lock); - di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); - di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); + di->d_spc_hardlimit = dm->dqb_bhardlimit; + di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit; di->d_ino_softlimit = dm->dqb_isoftlimit; - di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace; - di->d_icount = dm->dqb_curinodes; - di->d_btimer = dm->dqb_btime; - di->d_itimer = dm->dqb_itime; + di->d_space = dm->dqb_curspace + dm->dqb_rsvspace; + di->d_ino_count = dm->dqb_curinodes; + di->d_spc_timer = dm->dqb_btime; + di->d_ino_timer = dm->dqb_itime; spin_unlock(&dq_data_lock); } int dquot_get_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *di) + struct qc_dqblk *di) { struct dquot *dquot; @@ -2433,70 +2498,70 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid, } EXPORT_SYMBOL(dquot_get_dqblk); -#define VFS_FS_DQ_MASK \ - (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ - FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \ - FS_DQ_BTIMER | FS_DQ_ITIMER) +#define VFS_QC_MASK \ + (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \ + QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \ + QC_SPC_TIMER | QC_INO_TIMER) /* Generic routine for setting common part of quota structure */ -static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) +static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; - if (di->d_fieldmask & ~VFS_FS_DQ_MASK) + if (di->d_fieldmask & ~VFS_QC_MASK) return -EINVAL; - if (((di->d_fieldmask & FS_DQ_BSOFT) && - (di->d_blk_softlimit > dqi->dqi_maxblimit)) || - ((di->d_fieldmask & FS_DQ_BHARD) && - (di->d_blk_hardlimit > dqi->dqi_maxblimit)) || - ((di->d_fieldmask & FS_DQ_ISOFT) && - (di->d_ino_softlimit > dqi->dqi_maxilimit)) || - ((di->d_fieldmask & FS_DQ_IHARD) && - (di->d_ino_hardlimit > dqi->dqi_maxilimit))) + if (((di->d_fieldmask & QC_SPC_SOFT) && + di->d_spc_softlimit > dqi->dqi_max_spc_limit) || + ((di->d_fieldmask & QC_SPC_HARD) && + di->d_spc_hardlimit > dqi->dqi_max_spc_limit) || + ((di->d_fieldmask & QC_INO_SOFT) && + (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) || + ((di->d_fieldmask & QC_INO_HARD) && + (di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) return -ERANGE; spin_lock(&dq_data_lock); - if (di->d_fieldmask & FS_DQ_BCOUNT) { - dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; + if (di->d_fieldmask & QC_SPACE) { + dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_BSOFT) - dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit); - if (di->d_fieldmask & FS_DQ_BHARD) - dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit); - if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) { + if (di->d_fieldmask & QC_SPC_SOFT) + dm->dqb_bsoftlimit = di->d_spc_softlimit; + if (di->d_fieldmask & QC_SPC_HARD) + dm->dqb_bhardlimit = di->d_spc_hardlimit; + if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_ICOUNT) { - dm->dqb_curinodes = di->d_icount; + if (di->d_fieldmask & QC_INO_COUNT) { + dm->dqb_curinodes = di->d_ino_count; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_ISOFT) + if (di->d_fieldmask & QC_INO_SOFT) dm->dqb_isoftlimit = di->d_ino_softlimit; - if (di->d_fieldmask & FS_DQ_IHARD) + if (di->d_fieldmask & QC_INO_HARD) dm->dqb_ihardlimit = di->d_ino_hardlimit; - if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) { + if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_BTIMER) { - dm->dqb_btime = di->d_btimer; + if (di->d_fieldmask & QC_SPC_TIMER) { + dm->dqb_btime = di->d_spc_timer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_ITIMER) { - dm->dqb_itime = di->d_itimer; + if (di->d_fieldmask & QC_INO_TIMER) { + dm->dqb_itime = di->d_ino_timer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } @@ -2506,7 +2571,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) dm->dqb_curspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); - } else if (!(di->d_fieldmask & FS_DQ_BTIMER)) + } else if (!(di->d_fieldmask & QC_SPC_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; } @@ -2515,7 +2580,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) dm->dqb_curinodes < dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); - } else if (!(di->d_fieldmask & FS_DQ_ITIMER)) + } else if (!(di->d_fieldmask & QC_INO_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = get_seconds() + dqi->dqi_igrace; } @@ -2531,7 +2596,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) } int dquot_set_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *di) + struct qc_dqblk *di) { struct dquot *dquot; int rc; @@ -2582,6 +2647,14 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) goto out; } mi = sb_dqopt(sb)->info + type; + if (ii->dqi_valid & IIF_FLAGS) { + if (ii->dqi_flags & ~DQF_SETINFO_MASK || + (ii->dqi_flags & DQF_ROOT_SQUASH && + mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) { + err = -EINVAL; + goto out; + } + } spin_lock(&dq_data_lock); if (ii->dqi_valid & IIF_BGRACE) mi->dqi_bgrace = ii->dqi_bgrace; @@ -2611,6 +2684,17 @@ const struct quotactl_ops dquot_quotactl_ops = { }; EXPORT_SYMBOL(dquot_quotactl_ops); +const struct quotactl_ops dquot_quotactl_sysfile_ops = { + .quota_enable = dquot_quota_enable, + .quota_disable = dquot_quota_disable, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk +}; +EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); + static int do_proc_dqstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2aa4151f99d2..d14a799c7785 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -66,18 +66,40 @@ static int quota_sync_all(int type) return ret; } +unsigned int qtype_enforce_flag(int type) +{ + switch (type) { + case USRQUOTA: + return FS_QUOTA_UDQ_ENFD; + case GRPQUOTA: + return FS_QUOTA_GDQ_ENFD; + case PRJQUOTA: + return FS_QUOTA_PDQ_ENFD; + } + return 0; +} + static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, struct path *path) { - if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_on_meta) + if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) return -ENOSYS; - if (sb->s_qcop->quota_on_meta) - return sb->s_qcop->quota_on_meta(sb, type, id); + if (sb->s_qcop->quota_enable) + return sb->s_qcop->quota_enable(sb, qtype_enforce_flag(type)); if (IS_ERR(path)) return PTR_ERR(path); return sb->s_qcop->quota_on(sb, type, id, path); } +static int quota_quotaoff(struct super_block *sb, int type) +{ + if (!sb->s_qcop->quota_off && !sb->s_qcop->quota_disable) + return -ENOSYS; + if (sb->s_qcop->quota_disable) + return sb->s_qcop->quota_disable(sb, qtype_enforce_flag(type)); + return sb->s_qcop->quota_off(sb, type); +} + static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; @@ -118,17 +140,27 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) return sb->s_qcop->set_info(sb, type, &info); } -static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) +static inline qsize_t qbtos(qsize_t blocks) +{ + return blocks << QIF_DQBLKSIZE_BITS; +} + +static inline qsize_t stoqb(qsize_t space) +{ + return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; +} + +static void copy_to_if_dqblk(struct if_dqblk *dst, struct qc_dqblk *src) { memset(dst, 0, sizeof(*dst)); - dst->dqb_bhardlimit = src->d_blk_hardlimit; - dst->dqb_bsoftlimit = src->d_blk_softlimit; - dst->dqb_curspace = src->d_bcount; + dst->dqb_bhardlimit = stoqb(src->d_spc_hardlimit); + dst->dqb_bsoftlimit = stoqb(src->d_spc_softlimit); + dst->dqb_curspace = src->d_space; dst->dqb_ihardlimit = src->d_ino_hardlimit; dst->dqb_isoftlimit = src->d_ino_softlimit; - dst->dqb_curinodes = src->d_icount; - dst->dqb_btime = src->d_btimer; - dst->dqb_itime = src->d_itimer; + dst->dqb_curinodes = src->d_ino_count; + dst->dqb_btime = src->d_spc_timer; + dst->dqb_itime = src->d_ino_timer; dst->dqb_valid = QIF_ALL; } @@ -136,7 +168,7 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; - struct fs_disk_quota fdq; + struct qc_dqblk fdq; struct if_dqblk idq; int ret; @@ -154,36 +186,36 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, return 0; } -static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) +static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { - dst->d_blk_hardlimit = src->dqb_bhardlimit; - dst->d_blk_softlimit = src->dqb_bsoftlimit; - dst->d_bcount = src->dqb_curspace; + dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); + dst->d_spc_softlimit = qbtos(src->dqb_bsoftlimit); + dst->d_space = src->dqb_curspace; dst->d_ino_hardlimit = src->dqb_ihardlimit; dst->d_ino_softlimit = src->dqb_isoftlimit; - dst->d_icount = src->dqb_curinodes; - dst->d_btimer = src->dqb_btime; - dst->d_itimer = src->dqb_itime; + dst->d_ino_count = src->dqb_curinodes; + dst->d_spc_timer = src->dqb_btime; + dst->d_ino_timer = src->dqb_itime; dst->d_fieldmask = 0; if (src->dqb_valid & QIF_BLIMITS) - dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; + dst->d_fieldmask |= QC_SPC_SOFT | QC_SPC_HARD; if (src->dqb_valid & QIF_SPACE) - dst->d_fieldmask |= FS_DQ_BCOUNT; + dst->d_fieldmask |= QC_SPACE; if (src->dqb_valid & QIF_ILIMITS) - dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; + dst->d_fieldmask |= QC_INO_SOFT | QC_INO_HARD; if (src->dqb_valid & QIF_INODES) - dst->d_fieldmask |= FS_DQ_ICOUNT; + dst->d_fieldmask |= QC_INO_COUNT; if (src->dqb_valid & QIF_BTIME) - dst->d_fieldmask |= FS_DQ_BTIMER; + dst->d_fieldmask |= QC_SPC_TIMER; if (src->dqb_valid & QIF_ITIME) - dst->d_fieldmask |= FS_DQ_ITIMER; + dst->d_fieldmask |= QC_INO_TIMER; } static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { - struct fs_disk_quota fdq; + struct qc_dqblk fdq; struct if_dqblk idq; struct kqid qid; @@ -198,15 +230,26 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id, return sb->s_qcop->set_dqblk(sb, qid, &fdq); } -static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) +static int quota_enable(struct super_block *sb, void __user *addr) +{ + __u32 flags; + + if (copy_from_user(&flags, addr, sizeof(flags))) + return -EFAULT; + if (!sb->s_qcop->quota_enable) + return -ENOSYS; + return sb->s_qcop->quota_enable(sb, flags); +} + +static int quota_disable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; - if (!sb->s_qcop->set_xstate) + if (!sb->s_qcop->quota_disable) return -ENOSYS; - return sb->s_qcop->set_xstate(sb, flags, cmd); + return sb->s_qcop->quota_disable(sb, flags); } static int quota_getxstate(struct super_block *sb, void __user *addr) @@ -247,10 +290,78 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr) return ret; } +/* + * XFS defines BBTOB and BTOBB macros inside fs/xfs/ and we cannot move them + * out of there as xfsprogs rely on definitions being in that header file. So + * just define same functions here for quota purposes. + */ +#define XFS_BB_SHIFT 9 + +static inline u64 quota_bbtob(u64 blocks) +{ + return blocks << XFS_BB_SHIFT; +} + +static inline u64 quota_btobb(u64 bytes) +{ + return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT; +} + +static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src) +{ + dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit); + dst->d_spc_softlimit = quota_bbtob(src->d_blk_softlimit); + dst->d_ino_hardlimit = src->d_ino_hardlimit; + dst->d_ino_softlimit = src->d_ino_softlimit; + dst->d_space = quota_bbtob(src->d_bcount); + dst->d_ino_count = src->d_icount; + dst->d_ino_timer = src->d_itimer; + dst->d_spc_timer = src->d_btimer; + dst->d_ino_warns = src->d_iwarns; + dst->d_spc_warns = src->d_bwarns; + dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit); + dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit); + dst->d_rt_space = quota_bbtob(src->d_rtbcount); + dst->d_rt_spc_timer = src->d_rtbtimer; + dst->d_rt_spc_warns = src->d_rtbwarns; + dst->d_fieldmask = 0; + if (src->d_fieldmask & FS_DQ_ISOFT) + dst->d_fieldmask |= QC_INO_SOFT; + if (src->d_fieldmask & FS_DQ_IHARD) + dst->d_fieldmask |= QC_INO_HARD; + if (src->d_fieldmask & FS_DQ_BSOFT) + dst->d_fieldmask |= QC_SPC_SOFT; + if (src->d_fieldmask & FS_DQ_BHARD) + dst->d_fieldmask |= QC_SPC_HARD; + if (src->d_fieldmask & FS_DQ_RTBSOFT) + dst->d_fieldmask |= QC_RT_SPC_SOFT; + if (src->d_fieldmask & FS_DQ_RTBHARD) + dst->d_fieldmask |= QC_RT_SPC_HARD; + if (src->d_fieldmask & FS_DQ_BTIMER) + dst->d_fieldmask |= QC_SPC_TIMER; + if (src->d_fieldmask & FS_DQ_ITIMER) + dst->d_fieldmask |= QC_INO_TIMER; + if (src->d_fieldmask & FS_DQ_RTBTIMER) + dst->d_fieldmask |= QC_RT_SPC_TIMER; + if (src->d_fieldmask & FS_DQ_BWARNS) + dst->d_fieldmask |= QC_SPC_WARNS; + if (src->d_fieldmask & FS_DQ_IWARNS) + dst->d_fieldmask |= QC_INO_WARNS; + if (src->d_fieldmask & FS_DQ_RTBWARNS) + dst->d_fieldmask |= QC_RT_SPC_WARNS; + if (src->d_fieldmask & FS_DQ_BCOUNT) + dst->d_fieldmask |= QC_SPACE; + if (src->d_fieldmask & FS_DQ_ICOUNT) + dst->d_fieldmask |= QC_INO_COUNT; + if (src->d_fieldmask & FS_DQ_RTBCOUNT) + dst->d_fieldmask |= QC_RT_SPACE; +} + static int quota_setxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; + struct qc_dqblk qdq; struct kqid qid; if (copy_from_user(&fdq, addr, sizeof(fdq))) @@ -260,13 +371,44 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, qid = make_kqid(current_user_ns(), type, id); if (!qid_valid(qid)) return -EINVAL; - return sb->s_qcop->set_dqblk(sb, qid, &fdq); + copy_from_xfs_dqblk(&qdq, &fdq); + return sb->s_qcop->set_dqblk(sb, qid, &qdq); +} + +static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src, + int type, qid_t id) +{ + memset(dst, 0, sizeof(*dst)); + dst->d_version = FS_DQUOT_VERSION; + dst->d_id = id; + if (type == USRQUOTA) + dst->d_flags = FS_USER_QUOTA; + else if (type == PRJQUOTA) + dst->d_flags = FS_PROJ_QUOTA; + else + dst->d_flags = FS_GROUP_QUOTA; + dst->d_blk_hardlimit = quota_btobb(src->d_spc_hardlimit); + dst->d_blk_softlimit = quota_btobb(src->d_spc_softlimit); + dst->d_ino_hardlimit = src->d_ino_hardlimit; + dst->d_ino_softlimit = src->d_ino_softlimit; + dst->d_bcount = quota_btobb(src->d_space); + dst->d_icount = src->d_ino_count; + dst->d_itimer = src->d_ino_timer; + dst->d_btimer = src->d_spc_timer; + dst->d_iwarns = src->d_ino_warns; + dst->d_bwarns = src->d_spc_warns; + dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit); + dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit); + dst->d_rtbcount = quota_btobb(src->d_rt_space); + dst->d_rtbtimer = src->d_rt_spc_timer; + dst->d_rtbwarns = src->d_rt_spc_warns; } static int quota_getxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; + struct qc_dqblk qdq; struct kqid qid; int ret; @@ -275,8 +417,11 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, qid = make_kqid(current_user_ns(), type, id); if (!qid_valid(qid)) return -EINVAL; - ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); - if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) + ret = sb->s_qcop->get_dqblk(sb, qid, &qdq); + if (ret) + return ret; + copy_to_xfs_dqblk(&fdq, &qdq, type, id); + if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } @@ -317,9 +462,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_QUOTAON: return quota_quotaon(sb, type, cmd, id, path); case Q_QUOTAOFF: - if (!sb->s_qcop->quota_off) - return -ENOSYS; - return sb->s_qcop->quota_off(sb, type); + return quota_quotaoff(sb, type); case Q_GETFMT: return quota_getfmt(sb, type, addr); case Q_GETINFO: @@ -335,8 +478,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return -ENOSYS; return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: + return quota_enable(sb, addr); case Q_XQUOTAOFF: - return quota_setxstate(sb, cmd, addr); + return quota_disable(sb, addr); case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 469c6848b322..8fe79beced5c 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -169,8 +169,8 @@ static int v1_read_file_info(struct super_block *sb, int type) } ret = 0; /* limits are stored as unsigned 32-bit data */ - dqopt->info[type].dqi_maxblimit = 0xffffffff; - dqopt->info[type].dqi_maxilimit = 0xffffffff; + dqopt->info[type].dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS; + dqopt->info[type].dqi_max_ino_limit = 0xffffffff; dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; dqopt->info[type].dqi_bgrace = diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 02751ec695c5..9cb10d7197f7 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -117,16 +117,17 @@ static int v2_read_file_info(struct super_block *sb, int type) qinfo = info->dqi_priv; if (version == 0) { /* limits are stored as unsigned 32-bit data */ - info->dqi_maxblimit = 0xffffffff; - info->dqi_maxilimit = 0xffffffff; + info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS; + info->dqi_max_ino_limit = 0xffffffff; } else { - /* used space is stored as unsigned 64-bit value */ - info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */ - info->dqi_maxilimit = 0xffffffffffffffffULL; + /* used space is stored as unsigned 64-bit value in bytes */ + info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */ + info->dqi_max_ino_limit = 0xffffffffffffffffULL; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); - info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); + /* No flags currently supported */ + info->dqi_flags = 0; qinfo->dqi_sb = sb; qinfo->dqi_type = type; qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); @@ -157,7 +158,8 @@ static int v2_write_file_info(struct super_block *sb, int type) info->dqi_flags &= ~DQF_INFO_DIRTY; dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); - dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + /* No flags currently supported */ + dinfo.dqi_flags = cpu_to_le32(0); spin_unlock(&dq_data_lock); dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); diff --git a/fs/read_write.c b/fs/read_write.c index c0805c93b6fa..4060691e78f7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t return retval; } - if (unlikely(inode->i_flock && mandatory_lock(inode))) { + if (unlikely(inode->i_flctx && mandatory_lock(inode))) { retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig index 0e0e99bd6bce..c6e17a744c3b 100644 --- a/fs/udf/Kconfig +++ b/fs/udf/Kconfig @@ -2,10 +2,12 @@ config UDF_FS tristate "UDF file system support" select CRC_ITU_T help - This is the new file system used on some CD-ROMs and DVDs. Say Y if - you intend to mount DVD discs or CDRW's written in packet mode, or - if written to by other UDF utilities, such as DirectCD. - Please read <file:Documentation/filesystems/udf.txt>. + This is a file system used on some CD-ROMs and DVDs. Since the + file system is supported by multiple operating systems and is more + compatible with standard unix file systems, it is also suitable for + removable USB disks. Say Y if you intend to mount DVD discs or CDRW's + written in packet mode, or if you want to use UDF for removable USB + disks. Please read <file:Documentation/filesystems/udf.txt>. To compile this file system support as a module, choose M here: the module will be called udf. diff --git a/fs/udf/file.c b/fs/udf/file.c index bb15771b92ae..08f3555fbeac 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -224,7 +224,7 @@ out: static int udf_release_file(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE && - atomic_read(&inode->i_writecount) > 1) { + atomic_read(&inode->i_writecount) == 1) { /* * Grab i_mutex to avoid races with writes changing i_size * while we are running. diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5bc71d9a674a..a445d599098d 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -750,7 +750,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, /* Are we beyond EOF? */ if (etype == -1) { int ret; - isBeyondEOF = 1; + isBeyondEOF = true; if (count) { if (c) laarr[0] = laarr[1]; @@ -792,7 +792,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, endnum = c + 1; lastblock = 1; } else { - isBeyondEOF = 0; + isBeyondEOF = false; endnum = startnum = ((count > 2) ? 2 : count); /* if the current extent is in position 0, @@ -1288,6 +1288,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) struct kernel_lb_addr *iloc = &iinfo->i_location; unsigned int link_count; unsigned int indirections = 0; + int bs = inode->i_sb->s_blocksize; int ret = -EIO; reread: @@ -1374,38 +1375,35 @@ reread: if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { iinfo->i_efe = 1; iinfo->i_use = 0; - ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + ret = udf_alloc_i_data(inode, bs - sizeof(struct extendedFileEntry)); if (ret) goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct extendedFileEntry), - inode->i_sb->s_blocksize - - sizeof(struct extendedFileEntry)); + bs - sizeof(struct extendedFileEntry)); } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { iinfo->i_efe = 0; iinfo->i_use = 0; - ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct fileEntry)); + ret = udf_alloc_i_data(inode, bs - sizeof(struct fileEntry)); if (ret) goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct fileEntry), - inode->i_sb->s_blocksize - sizeof(struct fileEntry)); + bs - sizeof(struct fileEntry)); } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { iinfo->i_efe = 0; iinfo->i_use = 1; iinfo->i_lenAlloc = le32_to_cpu( ((struct unallocSpaceEntry *)bh->b_data)-> lengthAllocDescs); - ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + ret = udf_alloc_i_data(inode, bs - sizeof(struct unallocSpaceEntry)); if (ret) goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct unallocSpaceEntry), - inode->i_sb->s_blocksize - - sizeof(struct unallocSpaceEntry)); + bs - sizeof(struct unallocSpaceEntry)); return 0; } @@ -1489,6 +1487,15 @@ reread: } inode->i_generation = iinfo->i_unique; + /* + * Sanity check length of allocation descriptors and extended attrs to + * avoid integer overflows + */ + if (iinfo->i_lenEAttr > bs || iinfo->i_lenAlloc > bs) + goto out; + /* Now do exact checks */ + if (udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc > bs) + goto out; /* Sanity checks for files in ICB so that we don't get confused later */ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { /* @@ -1498,8 +1505,7 @@ reread: if (iinfo->i_lenAlloc != inode->i_size) goto out; /* File in ICB has to fit in there... */ - if (inode->i_size > inode->i_sb->s_blocksize - - udf_file_entry_alloc_offset(inode)) + if (inode->i_size > bs - udf_file_entry_alloc_offset(inode)) goto out; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 3ccb2f11fc76..f169411c4ea0 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1599,7 +1599,7 @@ static noinline int udf_process_sequence( struct udf_vds_record *curr; struct generic_desc *gd; struct volDescPtr *vdp; - int done = 0; + bool done = false; uint32_t vdsn; uint16_t ident; long next_s = 0, next_e = 0; @@ -1680,7 +1680,7 @@ static noinline int udf_process_sequence( lastblock = next_e; next_s = next_e = 0; } else - done = 1; + done = true; break; } brelse(bh); @@ -2300,6 +2300,7 @@ static void udf_put_super(struct super_block *sb) udf_close_lvid(sb); brelse(sbi->s_lvid_bh); udf_sb_free_partitions(sb); + mutex_destroy(&sbi->s_alloc_mutex); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 53e95b2a1369..a7a3a63bb360 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -91,16 +91,6 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) return ptr; } -void -kmem_free(const void *ptr) -{ - if (!is_vmalloc_addr(ptr)) { - kfree(ptr); - } else { - vfree(ptr); - } -} - void * kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, xfs_km_flags_t flags) diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 64db0e53edea..cc6b768fc068 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -63,7 +63,10 @@ kmem_flags_convert(xfs_km_flags_t flags) extern void *kmem_alloc(size_t, xfs_km_flags_t); extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); -extern void kmem_free(const void *); +static inline void kmem_free(const void *ptr) +{ + kvfree(ptr); +} extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 5d38e8b8a913..15105dbc9e28 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -403,7 +403,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) if (!xfs_sb_version_hasattr2(&mp->m_sb)) { xfs_sb_version_addattr2(&mp->m_sb); spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + xfs_log_sb(tp); } else spin_unlock(&mp->m_sb_lock); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b5eb4743f75a..61ec015dca16 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -973,7 +973,11 @@ xfs_bmap_local_to_extents( *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - /* initialise the block and copy the data */ + /* + * Initialise the block and copy the data + * + * Note: init_fn must set the buffer log item type correctly! + */ init_fn(tp, bp, ip, ifp); /* account for the change in fork size and log everything */ @@ -1221,22 +1225,20 @@ xfs_bmap_add_attrfork( goto bmap_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { - __int64_t sbfields = 0; + bool log_sb = false; spin_lock(&mp->m_sb_lock); if (!xfs_sb_version_hasattr(&mp->m_sb)) { xfs_sb_version_addattr(&mp->m_sb); - sbfields |= XFS_SB_VERSIONNUM; + log_sb = true; } if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { xfs_sb_version_addattr2(&mp->m_sb); - sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); + log_sb = true; } - if (sbfields) { - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); - } else - spin_unlock(&mp->m_sb_lock); + spin_unlock(&mp->m_sb_lock); + if (log_sb) + xfs_log_sb(tp); } error = xfs_bmap_finish(&tp, &flist, &committed); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 44db6db86402..b9d8a499d2c4 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -28,6 +28,37 @@ struct xfs_trans; extern kmem_zone_t *xfs_bmap_free_item_zone; /* + * Argument structure for xfs_bmap_alloc. + */ +struct xfs_bmalloca { + xfs_fsblock_t *firstblock; /* i/o first block allocated */ + struct xfs_bmap_free *flist; /* bmap freelist */ + struct xfs_trans *tp; /* transaction pointer */ + struct xfs_inode *ip; /* incore inode pointer */ + struct xfs_bmbt_irec prev; /* extent before the new one */ + struct xfs_bmbt_irec got; /* extent after, or delayed */ + + xfs_fileoff_t offset; /* offset in file filling in */ + xfs_extlen_t length; /* i/o length asked/allocated */ + xfs_fsblock_t blkno; /* starting block of new extent */ + + struct xfs_btree_cur *cur; /* btree cursor */ + xfs_extnum_t idx; /* current extent index */ + int nallocs;/* number of extents alloc'd */ + int logflags;/* flags for transaction logging */ + + xfs_extlen_t total; /* total blocks needed for xaction */ + xfs_extlen_t minlen; /* minimum allocation size (blocks) */ + xfs_extlen_t minleft; /* amount must be left after alloc */ + bool eof; /* set if allocating past last extent */ + bool wasdel; /* replacing a delayed allocation */ + bool userdata;/* set if is user data */ + bool aeof; /* allocated space at eof */ + bool conv; /* overwriting unwritten extents */ + int flags; +}; + +/* * List of extents to be free "later". * The list is kept sorted on xbf_startblock. */ @@ -149,6 +180,8 @@ void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, struct xfs_bmap_free *flist, struct xfs_mount *mp); void xfs_bmap_cancel(struct xfs_bmap_free *flist); +int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, + int *committed); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index fbd6da263571..8eb718979383 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -151,10 +151,13 @@ typedef struct xfs_sb { __uint32_t sb_features2; /* additional feature bits */ /* - * bad features2 field as a result of failing to pad the sb - * structure to 64 bits. Some machines will be using this field - * for features2 bits. Easiest just to mark it bad and not use - * it for anything else. + * bad features2 field as a result of failing to pad the sb structure to + * 64 bits. Some machines will be using this field for features2 bits. + * Easiest just to mark it bad and not use it for anything else. + * + * This is not kept up to date in memory; it is always overwritten by + * the value in sb_features2 when formatting the incore superblock to + * the disk buffer. */ __uint32_t sb_bad_features2; @@ -304,8 +307,8 @@ typedef enum { #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) -#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) -#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) +#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \ + XFS_SB_MVAL(BAD_FEATURES2)) #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) @@ -319,9 +322,9 @@ typedef enum { XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ - XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ - XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ - XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) + XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \ + XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \ + XFS_SB_PQUOTINO) /* @@ -453,13 +456,11 @@ static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; - sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT; } static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) { sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; - sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; if (!sbp->sb_features2) sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; } @@ -475,7 +476,6 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp) { sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; - sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT; } /* diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 18dc721ca19f..18dc721ca19f 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 752915fa775a..b0a5fe95a3e2 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -40,69 +40,6 @@ * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ -static const struct { - short offset; - short type; /* 0 = integer - * 1 = binary / string (no translation) - */ -} xfs_sb_info[] = { - { offsetof(xfs_sb_t, sb_magicnum), 0 }, - { offsetof(xfs_sb_t, sb_blocksize), 0 }, - { offsetof(xfs_sb_t, sb_dblocks), 0 }, - { offsetof(xfs_sb_t, sb_rblocks), 0 }, - { offsetof(xfs_sb_t, sb_rextents), 0 }, - { offsetof(xfs_sb_t, sb_uuid), 1 }, - { offsetof(xfs_sb_t, sb_logstart), 0 }, - { offsetof(xfs_sb_t, sb_rootino), 0 }, - { offsetof(xfs_sb_t, sb_rbmino), 0 }, - { offsetof(xfs_sb_t, sb_rsumino), 0 }, - { offsetof(xfs_sb_t, sb_rextsize), 0 }, - { offsetof(xfs_sb_t, sb_agblocks), 0 }, - { offsetof(xfs_sb_t, sb_agcount), 0 }, - { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, - { offsetof(xfs_sb_t, sb_logblocks), 0 }, - { offsetof(xfs_sb_t, sb_versionnum), 0 }, - { offsetof(xfs_sb_t, sb_sectsize), 0 }, - { offsetof(xfs_sb_t, sb_inodesize), 0 }, - { offsetof(xfs_sb_t, sb_inopblock), 0 }, - { offsetof(xfs_sb_t, sb_fname[0]), 1 }, - { offsetof(xfs_sb_t, sb_blocklog), 0 }, - { offsetof(xfs_sb_t, sb_sectlog), 0 }, - { offsetof(xfs_sb_t, sb_inodelog), 0 }, - { offsetof(xfs_sb_t, sb_inopblog), 0 }, - { offsetof(xfs_sb_t, sb_agblklog), 0 }, - { offsetof(xfs_sb_t, sb_rextslog), 0 }, - { offsetof(xfs_sb_t, sb_inprogress), 0 }, - { offsetof(xfs_sb_t, sb_imax_pct), 0 }, - { offsetof(xfs_sb_t, sb_icount), 0 }, - { offsetof(xfs_sb_t, sb_ifree), 0 }, - { offsetof(xfs_sb_t, sb_fdblocks), 0 }, - { offsetof(xfs_sb_t, sb_frextents), 0 }, - { offsetof(xfs_sb_t, sb_uquotino), 0 }, - { offsetof(xfs_sb_t, sb_gquotino), 0 }, - { offsetof(xfs_sb_t, sb_qflags), 0 }, - { offsetof(xfs_sb_t, sb_flags), 0 }, - { offsetof(xfs_sb_t, sb_shared_vn), 0 }, - { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, - { offsetof(xfs_sb_t, sb_unit), 0 }, - { offsetof(xfs_sb_t, sb_width), 0 }, - { offsetof(xfs_sb_t, sb_dirblklog), 0 }, - { offsetof(xfs_sb_t, sb_logsectlog), 0 }, - { offsetof(xfs_sb_t, sb_logsectsize), 0 }, - { offsetof(xfs_sb_t, sb_logsunit), 0 }, - { offsetof(xfs_sb_t, sb_features2), 0 }, - { offsetof(xfs_sb_t, sb_bad_features2), 0 }, - { offsetof(xfs_sb_t, sb_features_compat), 0 }, - { offsetof(xfs_sb_t, sb_features_ro_compat), 0 }, - { offsetof(xfs_sb_t, sb_features_incompat), 0 }, - { offsetof(xfs_sb_t, sb_features_log_incompat), 0 }, - { offsetof(xfs_sb_t, sb_crc), 0 }, - { offsetof(xfs_sb_t, sb_pad), 0 }, - { offsetof(xfs_sb_t, sb_pquotino), 0 }, - { offsetof(xfs_sb_t, sb_lsn), 0 }, - { sizeof(xfs_sb_t), 0 } -}; - /* * Reference counting access wrappers to the perag structures. * Because we never free per-ag structures, the only thing we @@ -461,58 +398,49 @@ xfs_sb_from_disk( __xfs_sb_from_disk(to, from, true); } -static inline void +static void xfs_sb_quota_to_disk( - xfs_dsb_t *to, - xfs_sb_t *from, - __int64_t *fields) + struct xfs_dsb *to, + struct xfs_sb *from) { __uint16_t qflags = from->sb_qflags; + to->sb_uquotino = cpu_to_be64(from->sb_uquotino); + if (xfs_sb_version_has_pquotino(from)) { + to->sb_qflags = cpu_to_be16(from->sb_qflags); + to->sb_gquotino = cpu_to_be64(from->sb_gquotino); + to->sb_pquotino = cpu_to_be64(from->sb_pquotino); + return; + } + /* - * We need to do these manipilations only if we are working - * with an older version of on-disk superblock. + * The in-core version of sb_qflags do not have XFS_OQUOTA_* + * flags, whereas the on-disk version does. So, convert incore + * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. */ - if (xfs_sb_version_has_pquotino(from)) - return; + qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | + XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); - if (*fields & XFS_SB_QFLAGS) { - /* - * The in-core version of sb_qflags do not have - * XFS_OQUOTA_* flags, whereas the on-disk version - * does. So, convert incore XFS_{PG}QUOTA_* flags - * to on-disk XFS_OQUOTA_* flags. - */ - qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | - XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); - - if (from->sb_qflags & - (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) - qflags |= XFS_OQUOTA_ENFD; - if (from->sb_qflags & - (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) - qflags |= XFS_OQUOTA_CHKD; - to->sb_qflags = cpu_to_be16(qflags); - *fields &= ~XFS_SB_QFLAGS; - } + if (from->sb_qflags & + (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) + qflags |= XFS_OQUOTA_ENFD; + if (from->sb_qflags & + (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) + qflags |= XFS_OQUOTA_CHKD; + to->sb_qflags = cpu_to_be16(qflags); /* - * GQUOTINO and PQUOTINO cannot be used together in versions of - * superblock that do not have pquotino. from->sb_flags tells us which - * quota is active and should be copied to disk. If neither are active, - * make sure we write NULLFSINO to the sb_gquotino field as a quota - * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature - * bit is set. + * GQUOTINO and PQUOTINO cannot be used together in versions + * of superblock that do not have pquotino. from->sb_flags + * tells us which quota is active and should be copied to + * disk. If neither are active, we should NULL the inode. * - * Note that we don't need to handle the sb_uquotino or sb_pquotino here - * as they do not require any translation. Hence the main sb field loop - * will write them appropriately from the in-core superblock. + * In all cases, the separate pquotino must remain 0 because it + * it beyond the "end" of the valid non-pquotino superblock. */ - if ((*fields & XFS_SB_GQUOTINO) && - (from->sb_qflags & XFS_GQUOTA_ACCT)) + if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); - else if ((*fields & XFS_SB_PQUOTINO) && - (from->sb_qflags & XFS_PQUOTA_ACCT)) + else if (from->sb_qflags & XFS_PQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); else { /* @@ -526,63 +454,78 @@ xfs_sb_quota_to_disk( to->sb_gquotino = cpu_to_be64(NULLFSINO); } - *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); + to->sb_pquotino = 0; } -/* - * Copy in core superblock to ondisk one. - * - * The fields argument is mask of superblock fields to copy. - */ void xfs_sb_to_disk( - xfs_dsb_t *to, - xfs_sb_t *from, - __int64_t fields) + struct xfs_dsb *to, + struct xfs_sb *from) { - xfs_caddr_t to_ptr = (xfs_caddr_t)to; - xfs_caddr_t from_ptr = (xfs_caddr_t)from; - xfs_sb_field_t f; - int first; - int size; - - ASSERT(fields); - if (!fields) - return; + xfs_sb_quota_to_disk(to, from); - /* We should never write the crc here, it's updated in the IO path */ - fields &= ~XFS_SB_CRC; - - xfs_sb_quota_to_disk(to, from, &fields); - while (fields) { - f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); - first = xfs_sb_info[f].offset; - size = xfs_sb_info[f + 1].offset - first; - - ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); - - if (size == 1 || xfs_sb_info[f].type == 1) { - memcpy(to_ptr + first, from_ptr + first, size); - } else { - switch (size) { - case 2: - *(__be16 *)(to_ptr + first) = - cpu_to_be16(*(__u16 *)(from_ptr + first)); - break; - case 4: - *(__be32 *)(to_ptr + first) = - cpu_to_be32(*(__u32 *)(from_ptr + first)); - break; - case 8: - *(__be64 *)(to_ptr + first) = - cpu_to_be64(*(__u64 *)(from_ptr + first)); - break; - default: - ASSERT(0); - } - } + to->sb_magicnum = cpu_to_be32(from->sb_magicnum); + to->sb_blocksize = cpu_to_be32(from->sb_blocksize); + to->sb_dblocks = cpu_to_be64(from->sb_dblocks); + to->sb_rblocks = cpu_to_be64(from->sb_rblocks); + to->sb_rextents = cpu_to_be64(from->sb_rextents); + memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); + to->sb_logstart = cpu_to_be64(from->sb_logstart); + to->sb_rootino = cpu_to_be64(from->sb_rootino); + to->sb_rbmino = cpu_to_be64(from->sb_rbmino); + to->sb_rsumino = cpu_to_be64(from->sb_rsumino); + to->sb_rextsize = cpu_to_be32(from->sb_rextsize); + to->sb_agblocks = cpu_to_be32(from->sb_agblocks); + to->sb_agcount = cpu_to_be32(from->sb_agcount); + to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks); + to->sb_logblocks = cpu_to_be32(from->sb_logblocks); + to->sb_versionnum = cpu_to_be16(from->sb_versionnum); + to->sb_sectsize = cpu_to_be16(from->sb_sectsize); + to->sb_inodesize = cpu_to_be16(from->sb_inodesize); + to->sb_inopblock = cpu_to_be16(from->sb_inopblock); + memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); + to->sb_blocklog = from->sb_blocklog; + to->sb_sectlog = from->sb_sectlog; + to->sb_inodelog = from->sb_inodelog; + to->sb_inopblog = from->sb_inopblog; + to->sb_agblklog = from->sb_agblklog; + to->sb_rextslog = from->sb_rextslog; + to->sb_inprogress = from->sb_inprogress; + to->sb_imax_pct = from->sb_imax_pct; + to->sb_icount = cpu_to_be64(from->sb_icount); + to->sb_ifree = cpu_to_be64(from->sb_ifree); + to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); + to->sb_frextents = cpu_to_be64(from->sb_frextents); - fields &= ~(1LL << f); + to->sb_flags = from->sb_flags; + to->sb_shared_vn = from->sb_shared_vn; + to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); + to->sb_unit = cpu_to_be32(from->sb_unit); + to->sb_width = cpu_to_be32(from->sb_width); + to->sb_dirblklog = from->sb_dirblklog; + to->sb_logsectlog = from->sb_logsectlog; + to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); + to->sb_logsunit = cpu_to_be32(from->sb_logsunit); + + /* + * We need to ensure that bad_features2 always matches features2. + * Hence we enforce that here rather than having to remember to do it + * everywhere else that updates features2. + */ + from->sb_bad_features2 = from->sb_features2; + to->sb_features2 = cpu_to_be32(from->sb_features2); + to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); + + if (xfs_sb_version_hascrc(from)) { + to->sb_features_compat = cpu_to_be32(from->sb_features_compat); + to->sb_features_ro_compat = + cpu_to_be32(from->sb_features_ro_compat); + to->sb_features_incompat = + cpu_to_be32(from->sb_features_incompat); + to->sb_features_log_incompat = + cpu_to_be32(from->sb_features_log_incompat); + to->sb_pad = 0; + to->sb_lsn = cpu_to_be64(from->sb_lsn); } } @@ -816,42 +759,51 @@ xfs_initialize_perag_data( } /* - * xfs_mod_sb() can be used to copy arbitrary changes to the - * in-core superblock into the superblock buffer to be logged. - * It does not provide the higher level of locking that is - * needed to protect the in-core superblock from concurrent - * access. + * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock + * into the superblock buffer to be logged. It does not provide the higher + * level of locking that is needed to protect the in-core superblock from + * concurrent access. */ void -xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) +xfs_log_sb( + struct xfs_trans *tp) { - xfs_buf_t *bp; - int first; - int last; - xfs_mount_t *mp; - xfs_sb_field_t f; - - ASSERT(fields); - if (!fields) - return; - mp = tp->t_mountp; - bp = xfs_trans_getsb(tp, mp, 0); - first = sizeof(xfs_sb_t); - last = 0; - - /* translate/copy */ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); + xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); +} - /* find modified range */ - f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - last = xfs_sb_info[f + 1].offset - 1; +/* + * xfs_sync_sb + * + * Sync the superblock to disk. + * + * Note that the caller is responsible for checking the frozen state of the + * filesystem. This procedure uses the non-blocking transaction allocator and + * thus will allow modifications to a frozen fs. This is required because this + * code can be called during the process of freezing where use of the high-level + * allocator would deadlock. + */ +int +xfs_sync_sb( + struct xfs_mount *mp, + bool wait) +{ + struct xfs_trans *tp; + int error; - f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - first = xfs_sb_info[f].offset; + tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); - xfs_trans_log_buf(tp, bp, first, last); + xfs_log_sb(tp); + if (wait) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); } diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 8eb1c54bafbf..b25bb9a343f3 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -27,11 +27,12 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, extern void xfs_perag_put(struct xfs_perag *pag); extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); -extern void xfs_sb_calc_crc(struct xfs_buf *); -extern void xfs_mod_sb(struct xfs_trans *, __int64_t); -extern void xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *); -extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); -extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); +extern void xfs_sb_calc_crc(struct xfs_buf *bp); +extern void xfs_log_sb(struct xfs_trans *tp); +extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); +extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); +extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); +extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 82404da2ca67..8dda4b321343 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -82,7 +82,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; #define XFS_TRANS_ATTR_RM 23 #define XFS_TRANS_ATTR_FLAG 24 #define XFS_TRANS_CLEAR_AGI_BUCKET 25 -#define XFS_TRANS_QM_SBCHANGE 26 +#define XFS_TRANS_SB_CHANGE 26 /* * Dummy entries since we use the transaction type to index into the * trans_type[] in xlog_recover_print_trans_head() @@ -95,17 +95,15 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; #define XFS_TRANS_QM_DQCLUSTER 32 #define XFS_TRANS_QM_QINOCREATE 33 #define XFS_TRANS_QM_QUOTAOFF_END 34 -#define XFS_TRANS_SB_UNIT 35 -#define XFS_TRANS_FSYNC_TS 36 -#define XFS_TRANS_GROWFSRT_ALLOC 37 -#define XFS_TRANS_GROWFSRT_ZERO 38 -#define XFS_TRANS_GROWFSRT_FREE 39 -#define XFS_TRANS_SWAPEXT 40 -#define XFS_TRANS_SB_COUNT 41 -#define XFS_TRANS_CHECKPOINT 42 -#define XFS_TRANS_ICREATE 43 -#define XFS_TRANS_CREATE_TMPFILE 44 -#define XFS_TRANS_TYPE_MAX 44 +#define XFS_TRANS_FSYNC_TS 35 +#define XFS_TRANS_GROWFSRT_ALLOC 36 +#define XFS_TRANS_GROWFSRT_ZERO 37 +#define XFS_TRANS_GROWFSRT_FREE 38 +#define XFS_TRANS_SWAPEXT 39 +#define XFS_TRANS_CHECKPOINT 40 +#define XFS_TRANS_ICREATE 41 +#define XFS_TRANS_CREATE_TMPFILE 42 +#define XFS_TRANS_TYPE_MAX 43 /* new transaction types need to be reflected in xfs_logprint(8) */ #define XFS_TRANS_TYPES \ @@ -113,7 +111,6 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ { XFS_TRANS_INACTIVE, "INACTIVE" }, \ { XFS_TRANS_CREATE, "CREATE" }, \ - { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ { XFS_TRANS_REMOVE, "REMOVE" }, \ @@ -134,23 +131,23 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ - { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ + { XFS_TRANS_SB_CHANGE, "SBCHANGE" }, \ + { XFS_TRANS_DUMMY1, "DUMMY1" }, \ + { XFS_TRANS_DUMMY2, "DUMMY2" }, \ { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ - { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ - { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ - { XFS_TRANS_DUMMY1, "DUMMY1" }, \ - { XFS_TRANS_DUMMY2, "DUMMY2" }, \ + { XFS_TRANS_ICREATE, "ICREATE" }, \ + { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } /* diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c80c5236c3da..e7e26bd6468f 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -178,6 +178,8 @@ xfs_symlink_local_to_remote( struct xfs_mount *mp = ip->i_mount; char *buf; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); + if (!xfs_sb_version_hascrc(&mp->m_sb)) { bp->b_ops = NULL; memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6c1330f29050..68cb1e7bf2bb 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -716,17 +716,6 @@ xfs_calc_clear_agi_bucket_reservation( } /* - * Clearing the quotaflags in the superblock. - * the super block for changing quota flags: sector size - */ -STATIC uint -xfs_calc_qm_sbchange_reservation( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); -} - -/* * Adjusting quota limits. * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) */ @@ -864,9 +853,6 @@ xfs_trans_resv_calc( * The following transactions are logged in logical format with * a default log count. */ - resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp); - resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT; - resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 1097d14cd583..2d5bdfce6d8f 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -56,7 +56,6 @@ struct xfs_trans_resv { struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */ struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */ struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ - struct xfs_trans_res tr_qm_sbchange; /* change quota flags */ struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ diff --git a/fs/xfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index b79dc66b2ecd..b79dc66b2ecd 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 18e2f3bbae5e..3a9b7a1b8704 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( */ STATIC int xfs_setfilesize( - struct xfs_ioend *ioend) + struct xfs_inode *ip, + struct xfs_trans *tp, + xfs_off_t offset, + size_t size) { - struct xfs_inode *ip = XFS_I(ioend->io_inode); - struct xfs_trans *tp = ioend->io_append_trans; xfs_fsize_t isize; - /* - * The transaction may have been allocated in the I/O submission thread, - * thus we need to mark ourselves as beeing in a transaction manually. - * Similarly for freeze protection. - */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); - rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); - xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); + isize = xfs_new_eof(ip, offset + size); if (!isize) { xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, 0); return 0; } - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + trace_xfs_setfilesize(ip, offset, size); ip->i_d.di_size = isize; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -167,6 +159,25 @@ xfs_setfilesize( return xfs_trans_commit(tp, 0); } +STATIC int +xfs_setfilesize_ioend( + struct xfs_ioend *ioend) +{ + struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_trans *tp = ioend->io_append_trans; + + /* + * The transaction may have been allocated in the I/O submission thread, + * thus we need to mark ourselves as being in a transaction manually. + * Similarly for freeze protection. + */ + current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); + + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); +} + /* * Schedule IO completion handling on the final put of an ioend. * @@ -182,8 +193,7 @@ xfs_finish_ioend( if (ioend->io_type == XFS_IO_UNWRITTEN) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); - else if (ioend->io_append_trans || - (ioend->io_isdirect && xfs_ioend_is_append(ioend))) + else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); else xfs_destroy_ioend(ioend); @@ -215,22 +225,8 @@ xfs_end_io( if (ioend->io_type == XFS_IO_UNWRITTEN) { error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); - } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { - /* - * For direct I/O we do not know if we need to allocate blocks - * or not so we can't preallocate an append transaction as that - * results in nested reservations and log space deadlocks. Hence - * allocate the transaction here. While this is sub-optimal and - * can block IO completion for some time, we're stuck with doing - * it this way until we can pass the ioend to the direct IO - * allocation callbacks and avoid nesting that way. - */ - error = xfs_setfilesize_trans_alloc(ioend); - if (error) - goto done; - error = xfs_setfilesize(ioend); } else if (ioend->io_append_trans) { - error = xfs_setfilesize(ioend); + error = xfs_setfilesize_ioend(ioend); } else { ASSERT(!xfs_ioend_is_append(ioend)); } @@ -242,17 +238,6 @@ done: } /* - * Call IO completion handling in caller context on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend_sync( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) - xfs_end_io(&ioend->io_work); -} - -/* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. * We'll need to extend this for updating the ondisk inode size later @@ -273,7 +258,6 @@ xfs_alloc_ioend( * all the I/O from calling the completion routine too early. */ atomic_set(&ioend->io_remaining, 1); - ioend->io_isdirect = 0; ioend->io_error = 0; ioend->io_list = NULL; ioend->io_type = type; @@ -1459,11 +1443,7 @@ xfs_get_blocks_direct( * * If the private argument is non-NULL __xfs_get_blocks signals us that we * need to issue a transaction to convert the range from unwritten to written - * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successful AIO - * request this handler is called from interrupt context, from which we - * can't start transactions. In that case offload the I/O completion to - * the workqueues we also use for buffered I/O completion. + * extents. */ STATIC void xfs_end_io_direct_write( @@ -1472,7 +1452,12 @@ xfs_end_io_direct_write( ssize_t size, void *private) { - struct xfs_ioend *ioend = iocb->private; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) + return; /* * While the generic direct I/O code updates the inode size, it does @@ -1480,22 +1465,33 @@ xfs_end_io_direct_write( * end_io handler thinks the on-disk size is outside the in-core * size. To prevent this just update it a little bit earlier here. */ - if (offset + size > i_size_read(ioend->io_inode)) - i_size_write(ioend->io_inode, offset + size); + if (offset + size > i_size_read(inode)) + i_size_write(inode, offset + size); /* - * blockdev_direct_IO can return an error even after the I/O - * completion handler was called. Thus we need to protect - * against double-freeing. + * For direct I/O we do not know if we need to allocate blocks or not, + * so we can't preallocate an append transaction, as that results in + * nested reservations and log space deadlocks. Hence allocate the + * transaction here. While this is sub-optimal and can block IO + * completion for some time, we're stuck with doing it this way until + * we can pass the ioend to the direct IO allocation callbacks and + * avoid nesting that way. */ - iocb->private = NULL; - - ioend->io_offset = offset; - ioend->io_size = size; - if (private && size > 0) - ioend->io_type = XFS_IO_UNWRITTEN; + if (private && size > 0) { + xfs_iomap_write_unwritten(ip, offset, size); + } else if (offset + size > ip->i_d.di_size) { + struct xfs_trans *tp; + int error; + + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return; + } - xfs_finish_ioend_sync(ioend); + xfs_setfilesize(ip, tp, offset, size); + } } STATIC ssize_t @@ -1507,39 +1503,16 @@ xfs_vm_direct_IO( { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); - struct xfs_ioend *ioend = NULL; - ssize_t ret; if (rw & WRITE) { - size_t size = iov_iter_count(iter); - - /* - * We cannot preallocate a size update transaction here as we - * don't know whether allocation is necessary or not. Hence we - * can only tell IO completion that one is necessary if we are - * not doing unwritten extent conversion. - */ - iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); - if (offset + size > XFS_I(inode)->i_d.di_size) - ioend->io_isdirect = 1; - - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); - if (ret != -EIOCBQUEUED && iocb->private) - goto out_destroy_ioend; - } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, - NULL, NULL, 0); } - - return ret; - -out_destroy_ioend: - xfs_destroy_ioend(ioend); - return ret; + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, + NULL, NULL, 0); } /* diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f94dd459dff9..ac644e0137a4 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool; * Types of I/O for bmap clustering and I/O completion tracking. */ enum { - XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ XFS_IO_DELALLOC, /* covers delalloc region */ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_OVERWRITE, /* covers already allocated extent */ }; #define XFS_IO_TYPES \ - { 0, "" }, \ { XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \ { XFS_IO_OVERWRITE, "overwrite" } @@ -45,7 +43,6 @@ typedef struct xfs_ioend { unsigned int io_type; /* delalloc / unwritten */ int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ - unsigned int io_isdirect : 1;/* direct I/O */ struct inode *io_inode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */ diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 2fdb72d2c908..736429a72a12 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -26,43 +26,8 @@ struct xfs_ifork; struct xfs_inode; struct xfs_mount; struct xfs_trans; +struct xfs_bmalloca; -/* - * Argument structure for xfs_bmap_alloc. - */ -struct xfs_bmalloca { - xfs_fsblock_t *firstblock; /* i/o first block allocated */ - struct xfs_bmap_free *flist; /* bmap freelist */ - struct xfs_trans *tp; /* transaction pointer */ - struct xfs_inode *ip; /* incore inode pointer */ - struct xfs_bmbt_irec prev; /* extent before the new one */ - struct xfs_bmbt_irec got; /* extent after, or delayed */ - - xfs_fileoff_t offset; /* offset in file filling in */ - xfs_extlen_t length; /* i/o length asked/allocated */ - xfs_fsblock_t blkno; /* starting block of new extent */ - - struct xfs_btree_cur *cur; /* btree cursor */ - xfs_extnum_t idx; /* current extent index */ - int nallocs;/* number of extents alloc'd */ - int logflags;/* flags for transaction logging */ - - xfs_extlen_t total; /* total blocks needed for xaction */ - xfs_extlen_t minlen; /* minimum allocation size (blocks) */ - xfs_extlen_t minleft; /* amount must be left after alloc */ - bool eof; /* set if allocating past last extent */ - bool wasdel; /* replacing a delayed allocation */ - bool userdata;/* set if is user data */ - bool aeof; /* allocated space at eof */ - bool conv; /* overwriting unwritten extents */ - int flags; - struct completion *done; - struct work_struct work; - int result; -}; - -int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, - int *committed); int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3f9bd58edec7..507d96a57ac7 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -319,6 +319,10 @@ xfs_buf_item_format( ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); + ASSERT((bip->bli_flags & XFS_BLI_STALE) || + (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF + && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF)); + /* * If it is an inode buffer, transfer the in-memory state to the @@ -535,7 +539,7 @@ xfs_buf_item_push( if ((bp->b_flags & XBF_WRITE_FAIL) && ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { xfs_warn(bp->b_target->bt_mount, -"Detected failing async write on buffer block 0x%llx. Retrying async write.\n", +"Detected failing async write on buffer block 0x%llx. Retrying async write.", (long long)bp->b_bn); } diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index c24c67e22a2a..2f536f33cd26 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -86,7 +86,7 @@ static inline void xfs_dqflock(xfs_dquot_t *dqp) wait_for_completion(&dqp->q_flush); } -static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp) { return try_wait_for_completion(&dqp->q_flush); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 13e974e6a889..712d312d8e3e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -127,6 +127,42 @@ xfs_iozero( return (-status); } +int +xfs_update_prealloc_flags( + struct xfs_inode *ip, + enum xfs_prealloc_flags flags) +{ + struct xfs_trans *tp; + int error; + + tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); + error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + if (!(flags & XFS_PREALLOC_INVISIBLE)) { + ip->i_d.di_mode &= ~S_ISUID; + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } + + if (flags & XFS_PREALLOC_SET) + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + if (flags & XFS_PREALLOC_CLEAR) + ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + if (flags & XFS_PREALLOC_SYNC) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); +} + /* * Fsync operations on directories are much simpler than on regular files, * as there is no file data to flush, and thus also no need for explicit @@ -784,8 +820,8 @@ xfs_file_fallocate( { struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); - struct xfs_trans *tp; long error; + enum xfs_prealloc_flags flags = 0; loff_t new_size = 0; if (!S_ISREG(inode->i_mode)) @@ -822,6 +858,8 @@ xfs_file_fallocate( if (error) goto out_unlock; } else { + flags |= XFS_PREALLOC_SET; + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; @@ -839,28 +877,10 @@ xfs_file_fallocate( goto out_unlock; } - tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out_unlock; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - ip->i_d.di_mode &= ~S_ISUID; - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - - if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (file->f_flags & O_DSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + flags |= XFS_PREALLOC_SYNC; + + error = xfs_update_prealloc_flags(ip, flags); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index fdc64220fcb0..fba6532efba4 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -488,6 +488,7 @@ xfs_growfs_data_private( xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); if (dpct) xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); if (error) return error; @@ -541,7 +542,7 @@ xfs_growfs_data_private( saved_error = error; continue; } - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); + xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); error = xfs_bwrite(bp); xfs_buf_relse(bp); @@ -756,37 +757,6 @@ out: return 0; } -/* - * Dump a transaction into the log that contains no real change. This is needed - * to be able to make the log dirty or stamp the current tail LSN into the log - * during the covering operation. - * - * We cannot use an inode here for this - that will push dirty state back up - * into the VFS and then periodic inode flushing will prevent log covering from - * making progress. Hence we log a field in the superblock instead and use a - * synchronous transaction to ensure the superblock is immediately unpinned - * and can be written back. - */ -int -xfs_fs_log_dummy( - xfs_mount_t *mp) -{ - xfs_trans_t *tp; - int error; - - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - /* log the UUID because it is an unchanging field */ - xfs_mod_sb(tp, XFS_SB_UUID); - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp, 0); -} - int xfs_fs_goingdown( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 41f804e740d7..daafa1f6d260 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1995,6 +1995,7 @@ xfs_iunlink( agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); return 0; @@ -2086,6 +2087,7 @@ xfs_iunlink_remove( agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); } else { @@ -2656,6 +2658,124 @@ xfs_sort_for_rename( } /* + * xfs_cross_rename() + * + * responsible for handling RENAME_EXCHANGE flag in renameat2() sytemcall + */ +STATIC int +xfs_cross_rename( + struct xfs_trans *tp, + struct xfs_inode *dp1, + struct xfs_name *name1, + struct xfs_inode *ip1, + struct xfs_inode *dp2, + struct xfs_name *name2, + struct xfs_inode *ip2, + struct xfs_bmap_free *free_list, + xfs_fsblock_t *first_block, + int spaceres) +{ + int error = 0; + int ip1_flags = 0; + int ip2_flags = 0; + int dp2_flags = 0; + + /* Swap inode number for dirent in first parent */ + error = xfs_dir_replace(tp, dp1, name1, + ip2->i_ino, + first_block, free_list, spaceres); + if (error) + goto out; + + /* Swap inode number for dirent in second parent */ + error = xfs_dir_replace(tp, dp2, name2, + ip1->i_ino, + first_block, free_list, spaceres); + if (error) + goto out; + + /* + * If we're renaming one or more directories across different parents, + * update the respective ".." entries (and link counts) to match the new + * parents. + */ + if (dp1 != dp2) { + dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + + if (S_ISDIR(ip2->i_d.di_mode)) { + error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, + dp1->i_ino, first_block, + free_list, spaceres); + if (error) + goto out; + + /* transfer ip2 ".." reference to dp1 */ + if (!S_ISDIR(ip1->i_d.di_mode)) { + error = xfs_droplink(tp, dp2); + if (error) + goto out; + error = xfs_bumplink(tp, dp1); + if (error) + goto out; + } + + /* + * Although ip1 isn't changed here, userspace needs + * to be warned about the change, so that applications + * relying on it (like backup ones), will properly + * notify the change + */ + ip1_flags |= XFS_ICHGTIME_CHG; + ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + } + + if (S_ISDIR(ip1->i_d.di_mode)) { + error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, + dp2->i_ino, first_block, + free_list, spaceres); + if (error) + goto out; + + /* transfer ip1 ".." reference to dp2 */ + if (!S_ISDIR(ip2->i_d.di_mode)) { + error = xfs_droplink(tp, dp1); + if (error) + goto out; + error = xfs_bumplink(tp, dp2); + if (error) + goto out; + } + + /* + * Although ip2 isn't changed here, userspace needs + * to be warned about the change, so that applications + * relying on it (like backup ones), will properly + * notify the change + */ + ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + ip2_flags |= XFS_ICHGTIME_CHG; + } + } + + if (ip1_flags) { + xfs_trans_ichgtime(tp, ip1, ip1_flags); + xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); + } + if (ip2_flags) { + xfs_trans_ichgtime(tp, ip2, ip2_flags); + xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); + } + if (dp2_flags) { + xfs_trans_ichgtime(tp, dp2, dp2_flags); + xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); + } + xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); +out: + return error; +} + +/* * xfs_rename */ int @@ -2665,7 +2785,8 @@ xfs_rename( xfs_inode_t *src_ip, xfs_inode_t *target_dp, struct xfs_name *target_name, - xfs_inode_t *target_ip) + xfs_inode_t *target_ip, + unsigned int flags) { xfs_trans_t *tp = NULL; xfs_mount_t *mp = src_dp->i_mount; @@ -2743,6 +2864,18 @@ xfs_rename( } /* + * Handle RENAME_EXCHANGE flags + */ + if (flags & RENAME_EXCHANGE) { + error = xfs_cross_rename(tp, src_dp, src_name, src_ip, + target_dp, target_name, target_ip, + &free_list, &first_block, spaceres); + if (error) + goto abort_return; + goto finish_rename; + } + + /* * Set up the target. */ if (target_ip == NULL) { @@ -2881,6 +3014,7 @@ xfs_rename( if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); +finish_rename: /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4ed2ba9342dc..86cd6b39bed7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -338,7 +338,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, - struct xfs_inode *target_ip); + struct xfs_inode *target_ip, unsigned int flags); void xfs_ilock(xfs_inode_t *, uint); int xfs_ilock_nowait(xfs_inode_t *, uint); @@ -377,6 +377,15 @@ int xfs_droplink(struct xfs_trans *, struct xfs_inode *); int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); /* from xfs_file.c */ +enum xfs_prealloc_flags { + XFS_PREALLOC_SET = (1 << 1), + XFS_PREALLOC_CLEAR = (1 << 2), + XFS_PREALLOC_SYNC = (1 << 3), + XFS_PREALLOC_INVISIBLE = (1 << 4), +}; + +int xfs_update_prealloc_flags(struct xfs_inode *, + enum xfs_prealloc_flags); int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); int xfs_iozero(struct xfs_inode *, loff_t, size_t); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a1831980a68e..f7afb86c9148 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -606,11 +606,8 @@ xfs_ioc_space( unsigned int cmd, xfs_flock64_t *bf) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; struct iattr iattr; - bool setprealloc = false; - bool clrprealloc = false; + enum xfs_prealloc_flags flags = 0; int error; /* @@ -630,6 +627,11 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -EINVAL; + if (filp->f_flags & O_DSYNC) + flags |= XFS_PREALLOC_SYNC; + if (ioflags & XFS_IO_INVIS) + flags |= XFS_PREALLOC_INVISIBLE; + error = mnt_want_write_file(filp); if (error) return error; @@ -673,25 +675,23 @@ xfs_ioc_space( } if (bf->l_start < 0 || - bf->l_start > mp->m_super->s_maxbytes || + bf->l_start > inode->i_sb->s_maxbytes || bf->l_start + bf->l_len < 0 || - bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { + bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { error = -EINVAL; goto out_unlock; } switch (cmd) { case XFS_IOC_ZERO_RANGE: + flags |= XFS_PREALLOC_SET; error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); - if (!error) - setprealloc = true; break; case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: + flags |= XFS_PREALLOC_SET; error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, XFS_BMAPI_PREALLOC); - if (!error) - setprealloc = true; break; case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: @@ -701,6 +701,7 @@ xfs_ioc_space( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: + flags |= XFS_PREALLOC_CLEAR; if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), bf->l_start - XFS_ISIZE(ip), 0); @@ -712,8 +713,6 @@ xfs_ioc_space( iattr.ia_size = bf->l_start; error = xfs_setattr_size(ip, &iattr); - if (!error) - clrprealloc = true; break; default: ASSERT(0); @@ -723,32 +722,7 @@ xfs_ioc_space( if (error) goto out_unlock; - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out_unlock; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - if (!(ioflags & XFS_IO_INVIS)) { - ip->i_d.di_mode &= ~S_ISUID; - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } - - if (setprealloc) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - else if (clrprealloc) - ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (filp->f_flags & O_DSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_update_prealloc_flags(ip, flags); out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); @@ -1013,20 +987,182 @@ xfs_diflags_to_linux( inode->i_flags &= ~S_NOATIME; } -#define FSX_PROJID 1 -#define FSX_EXTSIZE 2 -#define FSX_XFLAGS 4 -#define FSX_NONBLOCK 8 +static int +xfs_ioctl_setattr_xflags( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct fsxattr *fa) +{ + struct xfs_mount *mp = ip->i_mount; + + /* Can't change realtime flag if any extents are allocated. */ + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) + return -EINVAL; + + /* If realtime flag is set then must have realtime device */ + if (fa->fsx_xflags & XFS_XFLAG_REALTIME) { + if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || + (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) + return -EINVAL; + } + + /* + * Can't modify an immutable/append-only file unless + * we have appropriate permission. + */ + if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || + (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + xfs_set_diflags(ip, fa->fsx_xflags); + xfs_diflags_to_linux(ip); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_STATS_INC(xs_ig_attrchg); + return 0; +} + +/* + * Set up the transaction structure for the setattr operation, checking that we + * have permission to do so. On success, return a clean transaction and the + * inode locked exclusively ready for further operation specific checks. On + * failure, return an error without modifying or locking the inode. + */ +static struct xfs_trans * +xfs_ioctl_setattr_get_trans( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return ERR_PTR(-EROFS); + if (XFS_FORCED_SHUTDOWN(mp)) + return ERR_PTR(-EIO); + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); + if (error) + goto out_cancel; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + /* + * CAP_FOWNER overrides the following restrictions: + * + * The user ID of the calling process must be equal to the file owner + * ID, except in cases where the CAP_FSETID capability is applicable. + */ + if (!inode_owner_or_capable(VFS_I(ip))) { + error = -EPERM; + goto out_cancel; + } + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + return tp; + +out_cancel: + xfs_trans_cancel(tp, 0); + return ERR_PTR(error); +} + +/* + * extent size hint validation is somewhat cumbersome. Rules are: + * + * 1. extent size hint is only valid for directories and regular files + * 2. XFS_XFLAG_EXTSIZE is only valid for regular files + * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories. + * 4. can only be changed on regular files if no extents are allocated + * 5. can be changed on directories at any time + * 6. extsize hint of 0 turns off hints, clears inode flags. + * 7. Extent size must be a multiple of the appropriate block size. + * 8. for non-realtime files, the extent size hint must be limited + * to half the AG size to avoid alignment extending the extent beyond the + * limits of the AG. + */ +static int +xfs_ioctl_setattr_check_extsize( + struct xfs_inode *ip, + struct fsxattr *fa) +{ + struct xfs_mount *mp = ip->i_mount; + + if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) && + !S_ISDIR(ip->i_d.di_mode)) + return -EINVAL; + + if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) + return -EINVAL; + + if (fa->fsx_extsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; + + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) + return -EINVAL; + + if (XFS_IS_REALTIME_INODE(ip) || + (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; + } + + if (fa->fsx_extsize % size) + return -EINVAL; + } else + fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT); + + return 0; +} + +static int +xfs_ioctl_setattr_check_projid( + struct xfs_inode *ip, + struct fsxattr *fa) +{ + /* Disallow 32bit project ids if projid32bit feature is not enabled. */ + if (fa->fsx_projid > (__uint16_t)-1 && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + return -EINVAL; + + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() == &init_user_ns) + return 0; + + if (xfs_get_projid(ip) != fa->fsx_projid) + return -EINVAL; + if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) != + (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) + return -EINVAL; + + return 0; +} STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, - struct fsxattr *fa, - int mask) + struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; - unsigned int lock_flags = 0; struct xfs_dquot *udqp = NULL; struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; @@ -1034,17 +1170,9 @@ xfs_ioctl_setattr( trace_xfs_ioctl_setattr(ip); - if (mp->m_flags & XFS_MOUNT_RDONLY) - return -EROFS; - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - /* - * Disallow 32bit project ids when projid32bit feature is not enabled. - */ - if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) - return -EINVAL; + code = xfs_ioctl_setattr_check_projid(ip, fa); + if (code) + return code; /* * If disk quotas is on, we make sure that the dquots do exist on disk, @@ -1054,7 +1182,7 @@ xfs_ioctl_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { + if (XFS_IS_QUOTA_ON(mp)) { code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); @@ -1062,175 +1190,49 @@ xfs_ioctl_setattr( return code; } - /* - * For the other attributes, we acquire the inode lock and - * first do an error checking pass. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - code = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); - if (code) - goto error_return; - - lock_flags = XFS_ILOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (!inode_owner_or_capable(VFS_I(ip))) { - code = -EPERM; - goto error_return; - } - - /* - * Do a quota reservation only if projid is actually going to change. - * Only allow changing of projid from init_user_ns since it is a - * non user namespace aware identifier. - */ - if (mask & FSX_PROJID) { - if (current_user_ns() != &init_user_ns) { - code = -EINVAL; - goto error_return; - } - - if (XFS_IS_QUOTA_RUNNING(mp) && - XFS_IS_PQUOTA_ON(mp) && - xfs_get_projid(ip) != fa->fsx_projid) { - ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, - pdqp, capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (code) /* out of quota */ - goto error_return; - } + tp = xfs_ioctl_setattr_get_trans(ip); + if (IS_ERR(tp)) { + code = PTR_ERR(tp); + goto error_free_dquots; } - if (mask & FSX_EXTSIZE) { - /* - * Can't change extent size if any extents are allocated. - */ - if (ip->i_d.di_nextents && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - fa->fsx_extsize)) { - code = -EINVAL; /* EFBIG? */ - goto error_return; - } - /* - * Extent size must be a multiple of the appropriate block - * size, if set at all. It must also be smaller than the - * maximum extent size supported by the filesystem. - * - * Also, for non-realtime files, limit the extent size hint to - * half the size of the AGs in the filesystem so alignment - * doesn't result in extents larger than an AG. - */ - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) { - code = -EINVAL; - goto error_return; - } - - if (XFS_IS_REALTIME_INODE(ip) || - ((mask & FSX_XFLAGS) && - (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { - size = mp->m_sb.sb_rextsize << - mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { - code = -EINVAL; - goto error_return; - } - } - - if (fa->fsx_extsize % size) { - code = -EINVAL; - goto error_return; - } - } + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && + xfs_get_projid(ip) != fa->fsx_projid) { + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, + capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_trans_cancel; } + code = xfs_ioctl_setattr_check_extsize(ip, fa); + if (code) + goto error_trans_cancel; - if (mask & FSX_XFLAGS) { - /* - * Can't change realtime flag if any extents are allocated. - */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - (XFS_IS_REALTIME_INODE(ip)) != - (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - code = -EINVAL; /* EFBIG? */ - goto error_return; - } - - /* - * If realtime flag is set then must have realtime data. - */ - if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - if ((mp->m_sb.sb_rblocks == 0) || - (mp->m_sb.sb_rextsize == 0) || - (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = -EINVAL; - goto error_return; - } - } - - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if ((ip->i_d.di_flags & - (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || - (fa->fsx_xflags & - (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) { - code = -EPERM; - goto error_return; - } - } - - xfs_trans_ijoin(tp, ip, 0); + code = xfs_ioctl_setattr_xflags(tp, ip, fa); + if (code) + goto error_trans_cancel; /* - * Change file ownership. Must be the owner or privileged. + * Change file ownership. Must be the owner or privileged. CAP_FSETID + * overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be cleared upon + * successful return from chown() */ - if (mask & FSX_PROJID) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (xfs_get_projid(ip) != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { - olddquot = xfs_qm_vop_chown(tp, ip, - &ip->i_pdquot, pdqp); - } - ASSERT(ip->i_d.di_version > 1); - xfs_set_projid(ip, fa->fsx_projid); - } - } + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - if (mask & FSX_XFLAGS) { - xfs_set_diflags(ip, fa->fsx_xflags); - xfs_diflags_to_linux(ip); + /* Change the ownerships and register project quota modifications */ + if (xfs_get_projid(ip) != fa->fsx_projid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + olddquot = xfs_qm_vop_chown(tp, ip, + &ip->i_pdquot, pdqp); + } + ASSERT(ip->i_d.di_version > 1); + xfs_set_projid(ip, fa->fsx_projid); } /* @@ -1238,34 +1240,12 @@ xfs_ioctl_setattr( * extent size hint should be set on the inode. If no extent size flags * are set on the inode then unconditionally clear the extent size hint. */ - if (mask & FSX_EXTSIZE) { - int extsize = 0; - - if (ip->i_d.di_flags & - (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) - extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; - ip->i_d.di_extsize = extsize; - } - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); + if (ip->i_d.di_flags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) + ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + else + ip->i_d.di_extsize = 0; - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesystems. - * One for the truncate and one for the timestamps since we - * don't want to change the timestamps unless we're sure the - * truncate worked. Truncates are less than 1% of the laddis - * mix so this probably isn't worth the trouble to optimize. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); code = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, lock_flags); /* * Release any dquot(s) the inode had kept before chown. @@ -1276,12 +1256,11 @@ xfs_ioctl_setattr( return code; - error_return: +error_trans_cancel: + xfs_trans_cancel(tp, 0); +error_free_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); - xfs_trans_cancel(tp, 0); - if (lock_flags) - xfs_iunlock(ip, lock_flags); return code; } @@ -1292,20 +1271,15 @@ xfs_ioc_fssetxattr( void __user *arg) { struct fsxattr fa; - unsigned int mask; int error; if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; - mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; - error = mnt_want_write_file(filp); if (error) return error; - error = xfs_ioctl_setattr(ip, &fa, mask); + error = xfs_ioctl_setattr(ip, &fa); mnt_drop_write_file(filp); return error; } @@ -1325,14 +1299,14 @@ xfs_ioc_getxflags( STATIC int xfs_ioc_setxflags( - xfs_inode_t *ip, + struct xfs_inode *ip, struct file *filp, void __user *arg) { + struct xfs_trans *tp; struct fsxattr fa; unsigned int flags; - unsigned int mask; - int error; + int error; if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -1342,15 +1316,26 @@ xfs_ioc_setxflags( FS_SYNC_FL)) return -EOPNOTSUPP; - mask = FSX_XFLAGS; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); error = mnt_want_write_file(filp); if (error) return error; - error = xfs_ioctl_setattr(ip, &fa, mask); + + tp = xfs_ioctl_setattr_get_trans(ip); + if (IS_ERR(tp)) { + error = PTR_ERR(tp); + goto out_drop_write; + } + + error = xfs_ioctl_setattr_xflags(tp, ip, &fa); + if (error) { + xfs_trans_cancel(tp, 0); + goto out_drop_write; + } + + error = xfs_trans_commit(tp, 0); +out_drop_write: mnt_drop_write_file(filp); return error; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index ec6772866f3d..bfc7c7c8a0c8 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -423,7 +423,7 @@ xfs_compat_attrmulti_by_handle( ops = memdup_user(compat_ptr(am_hreq.ops), size); if (IS_ERR(ops)) { - error = -PTR_ERR(ops); + error = PTR_ERR(ops); goto out_dput; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c980e2a5086b..ccb1dd0d509e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -802,7 +802,7 @@ int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - size_t count) + xfs_off_t count) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 411fbb8919ef..8688e663d744 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -27,6 +27,6 @@ int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *); int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c50311cae1b1..ce80eeb8faa4 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -380,18 +380,27 @@ xfs_vn_rename( struct inode *odir, struct dentry *odentry, struct inode *ndir, - struct dentry *ndentry) + struct dentry *ndentry, + unsigned int flags) { struct inode *new_inode = ndentry->d_inode; + int omode = 0; struct xfs_name oname; struct xfs_name nname; - xfs_dentry_to_name(&oname, odentry, 0); + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + /* if we are exchanging files, we need to set i_mode of both files */ + if (flags & RENAME_EXCHANGE) + omode = ndentry->d_inode->i_mode; + + xfs_dentry_to_name(&oname, odentry, omode); xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? - XFS_I(new_inode) : NULL); + XFS_I(ndir), &nname, + new_inode ? XFS_I(new_inode) : NULL, flags); } /* @@ -1144,7 +1153,7 @@ static const struct inode_operations xfs_dir_inode_operations = { */ .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, + .rename2 = xfs_vn_rename, .get_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, @@ -1172,7 +1181,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { */ .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, + .rename2 = xfs_vn_rename, .get_acl = xfs_get_acl, .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e408bf5a3ff7..bcc7cfabb787 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -33,6 +33,7 @@ #include "xfs_fsops.h" #include "xfs_cksum.h" #include "xfs_sysfs.h" +#include "xfs_sb.h" kmem_zone_t *xfs_log_ticket_zone; @@ -1290,9 +1291,20 @@ xfs_log_worker( struct xfs_mount *mp = log->l_mp; /* dgc: errors ignored - not fatal and nowhere to report them */ - if (xfs_log_need_covered(mp)) - xfs_fs_log_dummy(mp); - else + if (xfs_log_need_covered(mp)) { + /* + * Dump a transaction into the log that contains no real change. + * This is needed to stamp the current tail LSN into the log + * during the covering operation. + * + * We cannot use an inode here for this - that will push dirty + * state back up into the VFS and then periodic inode flushing + * will prevent log covering from making progress. Hence we + * synchronously log the superblock instead to ensure the + * superblock is immediately unpinned and can be written back. + */ + xfs_sync_sb(mp, true); + } else xfs_log_force(mp, 0); /* start pushing all the metadata that is currently dirty */ @@ -1395,6 +1407,8 @@ xlog_alloc_log( ASSERT(xfs_buf_islocked(bp)); xfs_buf_unlock(bp); + /* use high priority wq for log I/O completion */ + bp->b_ioend_wq = mp->m_log_workqueue; bp->b_iodone = xlog_iodone; log->l_xbuf = bp; @@ -1427,6 +1441,8 @@ xlog_alloc_log( ASSERT(xfs_buf_islocked(bp)); xfs_buf_unlock(bp); + /* use high priority wq for log I/O completion */ + bp->b_ioend_wq = mp->m_log_workqueue; bp->b_iodone = xlog_iodone; iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; @@ -1806,8 +1822,6 @@ xlog_sync( XFS_BUF_ZEROFLAGS(bp); XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_SYNCIO; - /* use high priority completion wq */ - bp->b_ioend_wq = log->l_mp->m_log_workqueue; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { bp->b_flags |= XBF_FUA; @@ -1856,8 +1870,6 @@ xlog_sync( bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) bp->b_flags |= XBF_FUA; - /* use high priority completion wq */ - bp->b_ioend_wq = log->l_mp->m_log_workqueue; ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); @@ -2027,7 +2039,7 @@ xlog_print_tic_res( " total reg = %u bytes (o/flow = %u bytes)\n" " ophdrs = %u (ophdr space = %u bytes)\n" " ophdr + reg = %u bytes\n" - " num regions = %u\n", + " num regions = %u", ((ticket->t_trans_type <= 0 || ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d3d38836f87f..4fa80e63eea2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -408,11 +408,11 @@ xfs_update_alignment(xfs_mount_t *mp) if (xfs_sb_version_hasdalign(sbp)) { if (sbp->sb_unit != mp->m_dalign) { sbp->sb_unit = mp->m_dalign; - mp->m_update_flags |= XFS_SB_UNIT; + mp->m_update_sb = true; } if (sbp->sb_width != mp->m_swidth) { sbp->sb_width = mp->m_swidth; - mp->m_update_flags |= XFS_SB_WIDTH; + mp->m_update_sb = true; } } else { xfs_warn(mp, @@ -583,38 +583,19 @@ int xfs_mount_reset_sbqflags( struct xfs_mount *mp) { - int error; - struct xfs_trans *tp; - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without m_sb_lock. - */ + /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */ if (mp->m_sb.sb_qflags == 0) return 0; spin_lock(&mp->m_sb_lock); mp->m_sb.sb_qflags = 0; spin_unlock(&mp->m_sb_lock); - /* - * If the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (!xfs_fs_writable(mp, SB_FREEZE_WRITE)) return 0; - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_alert(mp, "%s: Superblock update failed!", __func__); - return error; - } - - xfs_mod_sb(tp, XFS_SB_QFLAGS); - return xfs_trans_commit(tp, 0); + return xfs_sync_sb(mp, false); } __uint64_t @@ -659,26 +640,25 @@ xfs_mountfs( xfs_sb_mount_common(mp, sbp); /* - * Check for a mismatched features2 values. Older kernels - * read & wrote into the wrong sb offset for sb_features2 - * on some platforms due to xfs_sb_t not being 64bit size aligned - * when sb_features2 was added, which made older superblock - * reading/writing routines swap it as a 64-bit value. + * Check for a mismatched features2 values. Older kernels read & wrote + * into the wrong sb offset for sb_features2 on some platforms due to + * xfs_sb_t not being 64bit size aligned when sb_features2 was added, + * which made older superblock reading/writing routines swap it as a + * 64-bit value. * * For backwards compatibility, we make both slots equal. * - * If we detect a mismatched field, we OR the set bits into the - * existing features2 field in case it has already been modified; we - * don't want to lose any features. We then update the bad location - * with the ORed value so that older kernels will see any features2 - * flags, and mark the two fields as needing updates once the - * transaction subsystem is online. + * If we detect a mismatched field, we OR the set bits into the existing + * features2 field in case it has already been modified; we don't want + * to lose any features. We then update the bad location with the ORed + * value so that older kernels will see any features2 flags. The + * superblock writeback code ensures the new sb_features2 is copied to + * sb_bad_features2 before it is logged or written to disk. */ if (xfs_sb_has_mismatched_features2(sbp)) { xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; - sbp->sb_bad_features2 = sbp->sb_features2; - mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; + mp->m_update_sb = true; /* * Re-check for ATTR2 in case it was found in bad_features2 @@ -692,17 +672,17 @@ xfs_mountfs( if (xfs_sb_version_hasattr2(&mp->m_sb) && (mp->m_flags & XFS_MOUNT_NOATTR2)) { xfs_sb_version_removeattr2(&mp->m_sb); - mp->m_update_flags |= XFS_SB_FEATURES2; + mp->m_update_sb = true; /* update sb_versionnum for the clearing of the morebits */ if (!sbp->sb_features2) - mp->m_update_flags |= XFS_SB_VERSIONNUM; + mp->m_update_sb = true; } /* always use v2 inodes by default now */ if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; - mp->m_update_flags |= XFS_SB_VERSIONNUM; + mp->m_update_sb = true; } /* @@ -895,8 +875,8 @@ xfs_mountfs( * the next remount into writeable mode. Otherwise we would never * perform the update e.g. for the root filesystem. */ - if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { - error = xfs_mount_log_sb(mp, mp->m_update_flags); + if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) { + error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); goto out_rtunmount; @@ -1103,9 +1083,6 @@ xfs_fs_writable( int xfs_log_sbcount(xfs_mount_t *mp) { - xfs_trans_t *tp; - int error; - /* allow this to proceed during the freeze sequence... */ if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) return 0; @@ -1119,17 +1096,7 @@ xfs_log_sbcount(xfs_mount_t *mp) if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) return 0; - tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - return error; + return xfs_sync_sb(mp, true); } /* @@ -1423,34 +1390,6 @@ xfs_freesb( } /* - * Used to log changes to the superblock unit and width fields which could - * be altered by the mount options, as well as any potential sb_features2 - * fixup. Only the first superblock is updated. - */ -int -xfs_mount_log_sb( - xfs_mount_t *mp, - __int64_t fields) -{ - xfs_trans_t *tp; - int error; - - ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | - XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 | - XFS_SB_VERSIONNUM)); - - tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - xfs_mod_sb(tp, fields); - error = xfs_trans_commit(tp, 0); - return error; -} - -/* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 22ccf69d4d3c..a5b2ff822653 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -162,8 +162,7 @@ typedef struct xfs_mount { struct delayed_work m_reclaim_work; /* background inode reclaim */ struct delayed_work m_eofblocks_work; /* background eof blocks trimming */ - __int64_t m_update_flags; /* sb flags we need to update - on the next remount,rw */ + bool m_update_sb; /* sb needs update in mount */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ struct xfs_kobj m_kobj; @@ -378,7 +377,7 @@ extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); -extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); +extern int xfs_mount_log_sb(xfs_mount_t *); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 79fb19dd9c83..3e8186279541 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -714,7 +714,6 @@ STATIC int xfs_qm_qino_alloc( xfs_mount_t *mp, xfs_inode_t **ip, - __int64_t sbfields, uint flags) { xfs_trans_t *tp; @@ -777,11 +776,6 @@ xfs_qm_qino_alloc( spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); - ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) == - (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | - XFS_SB_QFLAGS)); xfs_sb_version_addquota(&mp->m_sb); mp->m_sb.sb_uquotino = NULLFSINO; @@ -798,7 +792,7 @@ xfs_qm_qino_alloc( else mp->m_sb.sb_pquotino = (*ip)->i_ino; spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); + xfs_log_sb(tp); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { xfs_alert(mp, "%s failed (error %d)!", __func__, error); @@ -1451,7 +1445,7 @@ xfs_qm_mount_quotas( spin_unlock(&mp->m_sb_lock); if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + if (xfs_sync_sb(mp, false)) { /* * We could only have been turning quotas off. * We aren't in very good shape actually because @@ -1482,7 +1476,6 @@ xfs_qm_init_quotainos( struct xfs_inode *gip = NULL; struct xfs_inode *pip = NULL; int error; - __int64_t sbflags = 0; uint flags = 0; ASSERT(mp->m_quotainfo); @@ -1517,9 +1510,6 @@ xfs_qm_init_quotainos( } } else { flags |= XFS_QMOPT_SBVERSION; - sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | - XFS_SB_QFLAGS); } /* @@ -1530,7 +1520,6 @@ xfs_qm_init_quotainos( */ if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { error = xfs_qm_qino_alloc(mp, &uip, - sbflags | XFS_SB_UQUOTINO, flags | XFS_QMOPT_UQUOTA); if (error) goto error_rele; @@ -1539,7 +1528,6 @@ xfs_qm_init_quotainos( } if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, flags | XFS_QMOPT_GQUOTA); if (error) goto error_rele; @@ -1548,7 +1536,6 @@ xfs_qm_init_quotainos( } if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { error = xfs_qm_qino_alloc(mp, &pip, - sbflags | XFS_SB_PQUOTINO, flags | XFS_QMOPT_PQUOTA); if (error) goto error_rele; @@ -1587,32 +1574,6 @@ xfs_qm_dqfree_one( xfs_qm_dqdestroy(dqp); } -/* - * Start a transaction and write the incore superblock changes to - * disk. flags parameter indicates which fields have changed. - */ -int -xfs_qm_write_sb_changes( - xfs_mount_t *mp, - __int64_t flags) -{ - xfs_trans_t *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_mod_sb(tp, flags); - error = xfs_trans_commit(tp, 0); - - return error; -} - - /* --------------- utility functions for vnodeops ---------------- */ diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 3a07a937e232..0d4d3590cf85 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -157,7 +157,6 @@ struct xfs_dquot_acct { #define XFS_QM_RTBWARNLIMIT 5 extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); -extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); /* dquot stuff */ extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); @@ -166,9 +165,9 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); /* quota ops */ extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, - uint, struct fs_disk_quota *); + uint, struct qc_dqblk *); extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, - struct fs_disk_quota *); + struct qc_dqblk *); extern int xfs_qm_scall_getqstat(struct xfs_mount *, struct fs_quota_stat *); extern int xfs_qm_scall_getqstatv(struct xfs_mount *, diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 74fca68e43b6..9b965db45800 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -39,7 +39,6 @@ STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, uint); STATIC uint xfs_qm_export_flags(uint); -STATIC uint xfs_qm_export_qtype_flags(uint); /* * Turn off quota accounting and/or enforcement for all udquots and/or @@ -92,8 +91,7 @@ xfs_qm_scall_quotaoff( mutex_unlock(&q->qi_quotaofflock); /* XXX what to do if error ? Revert back to old vals incore ? */ - error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); - return error; + return xfs_sync_sb(mp, false); } dqtype = 0; @@ -314,7 +312,6 @@ xfs_qm_scall_quotaon( { int error; uint qf; - __int64_t sbflags; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); /* @@ -322,30 +319,22 @@ xfs_qm_scall_quotaon( */ flags &= ~(XFS_ALL_QUOTA_ACCT); - sbflags = 0; - if (flags == 0) { xfs_debug(mp, "%s: zero flags, m_qflags=%x", __func__, mp->m_qflags); return -EINVAL; } - /* No fs can turn on quotas with a delayed effect */ - ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); - /* * Can't enforce without accounting. We check the superblock * qflags here instead of m_qflags because rootfs can have * quota acct on ondisk without m_qflags' knowing. */ - if (((flags & XFS_UQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && + if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && (flags & XFS_UQUOTA_ENFD)) || - ((flags & XFS_GQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && + ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && (flags & XFS_GQUOTA_ENFD)) || - ((flags & XFS_PQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && (flags & XFS_PQUOTA_ENFD))) { xfs_debug(mp, "%s: Can't enforce without acct, flags=%x sbflags=%x", @@ -370,11 +359,11 @@ xfs_qm_scall_quotaon( /* * There's nothing to change if it's the same. */ - if ((qf & flags) == flags && sbflags == 0) + if ((qf & flags) == flags) return -EEXIST; - sbflags |= XFS_SB_QFLAGS; - if ((error = xfs_qm_write_sb_changes(mp, sbflags))) + error = xfs_sync_sb(mp, false); + if (error) return error; /* * If we aren't trying to switch on quota enforcement, we are done. @@ -384,8 +373,7 @@ xfs_qm_scall_quotaon( ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != (mp->m_qflags & XFS_PQUOTA_ACCT)) || ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != - (mp->m_qflags & XFS_GQUOTA_ACCT)) || - (flags & XFS_ALL_QUOTA_ENFD) == 0) + (mp->m_qflags & XFS_GQUOTA_ACCT))) return 0; if (! XFS_IS_QUOTA_RUNNING(mp)) @@ -422,20 +410,12 @@ xfs_qm_scall_getqstat( memset(out, 0, sizeof(fs_quota_stat_t)); out->qs_version = FS_QSTAT_VERSION; - if (!xfs_sb_version_hasquota(&mp->m_sb)) { - out->qs_uquota.qfs_ino = NULLFSINO; - out->qs_gquota.qfs_ino = NULLFSINO; - return 0; - } - out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & (XFS_ALL_QUOTA_ACCT| XFS_ALL_QUOTA_ENFD)); - if (q) { - uip = q->qi_uquotaip; - gip = q->qi_gquotaip; - pip = q->qi_pquotaip; - } + uip = q->qi_uquotaip; + gip = q->qi_gquotaip; + pip = q->qi_pquotaip; if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &uip) == 0) @@ -481,14 +461,13 @@ xfs_qm_scall_getqstat( if (temppqip) IRELE(pip); } - if (q) { - out->qs_incoredqs = q->qi_dquots; - out->qs_btimelimit = q->qi_btimelimit; - out->qs_itimelimit = q->qi_itimelimit; - out->qs_rtbtimelimit = q->qi_rtbtimelimit; - out->qs_bwarnlimit = q->qi_bwarnlimit; - out->qs_iwarnlimit = q->qi_iwarnlimit; - } + out->qs_incoredqs = q->qi_dquots; + out->qs_btimelimit = q->qi_btimelimit; + out->qs_itimelimit = q->qi_itimelimit; + out->qs_rtbtimelimit = q->qi_rtbtimelimit; + out->qs_bwarnlimit = q->qi_bwarnlimit; + out->qs_iwarnlimit = q->qi_iwarnlimit; + return 0; } @@ -509,13 +488,6 @@ xfs_qm_scall_getqstatv( bool tempgqip = false; bool temppqip = false; - if (!xfs_sb_version_hasquota(&mp->m_sb)) { - out->qs_uquota.qfs_ino = NULLFSINO; - out->qs_gquota.qfs_ino = NULLFSINO; - out->qs_pquota.qfs_ino = NULLFSINO; - return 0; - } - out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & (XFS_ALL_QUOTA_ACCT| XFS_ALL_QUOTA_ENFD)); @@ -523,11 +495,9 @@ xfs_qm_scall_getqstatv( out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino; - if (q) { - uip = q->qi_uquotaip; - gip = q->qi_gquotaip; - pip = q->qi_pquotaip; - } + uip = q->qi_uquotaip; + gip = q->qi_gquotaip; + pip = q->qi_pquotaip; if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &uip) == 0) @@ -562,19 +532,18 @@ xfs_qm_scall_getqstatv( if (temppqip) IRELE(pip); } - if (q) { - out->qs_incoredqs = q->qi_dquots; - out->qs_btimelimit = q->qi_btimelimit; - out->qs_itimelimit = q->qi_itimelimit; - out->qs_rtbtimelimit = q->qi_rtbtimelimit; - out->qs_bwarnlimit = q->qi_bwarnlimit; - out->qs_iwarnlimit = q->qi_iwarnlimit; - } + out->qs_incoredqs = q->qi_dquots; + out->qs_btimelimit = q->qi_btimelimit; + out->qs_itimelimit = q->qi_itimelimit; + out->qs_rtbtimelimit = q->qi_rtbtimelimit; + out->qs_bwarnlimit = q->qi_bwarnlimit; + out->qs_iwarnlimit = q->qi_iwarnlimit; + return 0; } -#define XFS_DQ_MASK \ - (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) +#define XFS_QC_MASK \ + (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK) /* * Adjust quota limits, and start/stop timers accordingly. @@ -584,7 +553,7 @@ xfs_qm_scall_setqlim( struct xfs_mount *mp, xfs_dqid_t id, uint type, - fs_disk_quota_t *newlim) + struct qc_dqblk *newlim) { struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_disk_dquot *ddq; @@ -593,9 +562,9 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if (newlim->d_fieldmask & ~XFS_DQ_MASK) + if (newlim->d_fieldmask & ~XFS_QC_MASK) return -EINVAL; - if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) + if ((newlim->d_fieldmask & XFS_QC_MASK) == 0) return 0; /* @@ -633,11 +602,11 @@ xfs_qm_scall_setqlim( /* * Make sure that hardlimits are >= soft limits before changing. */ - hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : + hard = (newlim->d_fieldmask & QC_SPC_HARD) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_spc_hardlimit) : be64_to_cpu(ddq->d_blk_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : + soft = (newlim->d_fieldmask & QC_SPC_SOFT) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_spc_softlimit) : be64_to_cpu(ddq->d_blk_softlimit); if (hard == 0 || hard >= soft) { ddq->d_blk_hardlimit = cpu_to_be64(hard); @@ -650,11 +619,11 @@ xfs_qm_scall_setqlim( } else { xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); } - hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : + hard = (newlim->d_fieldmask & QC_RT_SPC_HARD) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_rt_spc_hardlimit) : be64_to_cpu(ddq->d_rtb_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : + soft = (newlim->d_fieldmask & QC_RT_SPC_SOFT) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_rt_spc_softlimit) : be64_to_cpu(ddq->d_rtb_softlimit); if (hard == 0 || hard >= soft) { ddq->d_rtb_hardlimit = cpu_to_be64(hard); @@ -667,10 +636,10 @@ xfs_qm_scall_setqlim( xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); } - hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? + hard = (newlim->d_fieldmask & QC_INO_HARD) ? (xfs_qcnt_t) newlim->d_ino_hardlimit : be64_to_cpu(ddq->d_ino_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? + soft = (newlim->d_fieldmask & QC_INO_SOFT) ? (xfs_qcnt_t) newlim->d_ino_softlimit : be64_to_cpu(ddq->d_ino_softlimit); if (hard == 0 || hard >= soft) { @@ -687,12 +656,12 @@ xfs_qm_scall_setqlim( /* * Update warnings counter(s) if requested */ - if (newlim->d_fieldmask & FS_DQ_BWARNS) - ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); - if (newlim->d_fieldmask & FS_DQ_IWARNS) - ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); + if (newlim->d_fieldmask & QC_SPC_WARNS) + ddq->d_bwarns = cpu_to_be16(newlim->d_spc_warns); + if (newlim->d_fieldmask & QC_INO_WARNS) + ddq->d_iwarns = cpu_to_be16(newlim->d_ino_warns); + if (newlim->d_fieldmask & QC_RT_SPC_WARNS) + ddq->d_rtbwarns = cpu_to_be16(newlim->d_rt_spc_warns); if (id == 0) { /* @@ -702,24 +671,24 @@ xfs_qm_scall_setqlim( * soft and hard limit values (already done, above), and * for warnings. */ - if (newlim->d_fieldmask & FS_DQ_BTIMER) { - q->qi_btimelimit = newlim->d_btimer; - ddq->d_btimer = cpu_to_be32(newlim->d_btimer); + if (newlim->d_fieldmask & QC_SPC_TIMER) { + q->qi_btimelimit = newlim->d_spc_timer; + ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer); } - if (newlim->d_fieldmask & FS_DQ_ITIMER) { - q->qi_itimelimit = newlim->d_itimer; - ddq->d_itimer = cpu_to_be32(newlim->d_itimer); + if (newlim->d_fieldmask & QC_INO_TIMER) { + q->qi_itimelimit = newlim->d_ino_timer; + ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer); } - if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { - q->qi_rtbtimelimit = newlim->d_rtbtimer; - ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); + if (newlim->d_fieldmask & QC_RT_SPC_TIMER) { + q->qi_rtbtimelimit = newlim->d_rt_spc_timer; + ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer); } - if (newlim->d_fieldmask & FS_DQ_BWARNS) - q->qi_bwarnlimit = newlim->d_bwarns; - if (newlim->d_fieldmask & FS_DQ_IWARNS) - q->qi_iwarnlimit = newlim->d_iwarns; - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - q->qi_rtbwarnlimit = newlim->d_rtbwarns; + if (newlim->d_fieldmask & QC_SPC_WARNS) + q->qi_bwarnlimit = newlim->d_spc_warns; + if (newlim->d_fieldmask & QC_INO_WARNS) + q->qi_iwarnlimit = newlim->d_ino_warns; + if (newlim->d_fieldmask & QC_RT_SPC_WARNS) + q->qi_rtbwarnlimit = newlim->d_rt_spc_warns; } else { /* * If the user is now over quota, start the timelimit. @@ -801,7 +770,7 @@ xfs_qm_log_quotaoff( mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, XFS_SB_QFLAGS); + xfs_log_sb(tp); /* * We have to make sure that the transaction is secure on disk before we @@ -824,7 +793,7 @@ xfs_qm_scall_getquota( struct xfs_mount *mp, xfs_dqid_t id, uint type, - struct fs_disk_quota *dst) + struct qc_dqblk *dst) { struct xfs_dquot *dqp; int error; @@ -848,28 +817,25 @@ xfs_qm_scall_getquota( } memset(dst, 0, sizeof(*dst)); - dst->d_version = FS_DQUOT_VERSION; - dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags); - dst->d_id = be32_to_cpu(dqp->q_core.d_id); - dst->d_blk_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); - dst->d_blk_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit)); + dst->d_spc_hardlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); + dst->d_spc_softlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit)); dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); - dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount); - dst->d_icount = dqp->q_res_icount; - dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer); - dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer); - dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns); - dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns); - dst->d_rtb_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); - dst->d_rtb_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); - dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount); - dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer); - dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns); + dst->d_space = XFS_FSB_TO_B(mp, dqp->q_res_bcount); + dst->d_ino_count = dqp->q_res_icount; + dst->d_spc_timer = be32_to_cpu(dqp->q_core.d_btimer); + dst->d_ino_timer = be32_to_cpu(dqp->q_core.d_itimer); + dst->d_ino_warns = be16_to_cpu(dqp->q_core.d_iwarns); + dst->d_spc_warns = be16_to_cpu(dqp->q_core.d_bwarns); + dst->d_rt_spc_hardlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); + dst->d_rt_spc_softlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); + dst->d_rt_space = XFS_FSB_TO_B(mp, dqp->q_res_rtbcount); + dst->d_rt_spc_timer = be32_to_cpu(dqp->q_core.d_rtbtimer); + dst->d_rt_spc_warns = be16_to_cpu(dqp->q_core.d_rtbwarns); /* * Internally, we don't reset all the timers when quota enforcement @@ -882,23 +848,23 @@ xfs_qm_scall_getquota( dqp->q_core.d_flags == XFS_DQ_GROUP) || (!XFS_IS_PQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_PROJ)) { - dst->d_btimer = 0; - dst->d_itimer = 0; - dst->d_rtbtimer = 0; + dst->d_spc_timer = 0; + dst->d_ino_timer = 0; + dst->d_rt_spc_timer = 0; } #ifdef DEBUG - if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || - (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || - (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) && - dst->d_id != 0) { - if ((dst->d_bcount > dst->d_blk_softlimit) && - (dst->d_blk_softlimit > 0)) { - ASSERT(dst->d_btimer != 0); + if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) || + (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) || + (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) && + id != 0) { + if ((dst->d_space > dst->d_spc_softlimit) && + (dst->d_spc_softlimit > 0)) { + ASSERT(dst->d_spc_timer != 0); } - if ((dst->d_icount > dst->d_ino_softlimit) && + if ((dst->d_ino_count > dst->d_ino_softlimit) && (dst->d_ino_softlimit > 0)) { - ASSERT(dst->d_itimer != 0); + ASSERT(dst->d_ino_timer != 0); } } #endif @@ -908,26 +874,6 @@ out_put: } STATIC uint -xfs_qm_export_qtype_flags( - uint flags) -{ - /* - * Can't be more than one, or none. - */ - ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != - (FS_PROJ_QUOTA | FS_USER_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != - (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != - (FS_USER_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); - - return (flags & XFS_DQ_USER) ? - FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? - FS_PROJ_QUOTA : FS_GROUP_QUOTA; -} - -STATIC uint xfs_qm_export_flags( uint flags) { diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 7542bbeca6a1..6923905ab33d 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -64,19 +64,10 @@ xfs_fs_get_xstatev( return xfs_qm_scall_getqstatv(mp, fqs); } -STATIC int -xfs_fs_set_xstate( - struct super_block *sb, - unsigned int uflags, - int op) +static unsigned int +xfs_quota_flags(unsigned int uflags) { - struct xfs_mount *mp = XFS_M(sb); - unsigned int flags = 0; - - if (sb->s_flags & MS_RDONLY) - return -EROFS; - if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; + unsigned int flags = 0; if (uflags & FS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; @@ -91,16 +82,39 @@ xfs_fs_set_xstate( if (uflags & FS_QUOTA_PDQ_ENFD) flags |= XFS_PQUOTA_ENFD; - switch (op) { - case Q_XQUOTAON: - return xfs_qm_scall_quotaon(mp, flags); - case Q_XQUOTAOFF: - if (!XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return xfs_qm_scall_quotaoff(mp, flags); - } + return flags; +} + +STATIC int +xfs_quota_enable( + struct super_block *sb, + unsigned int uflags) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + + return xfs_qm_scall_quotaon(mp, xfs_quota_flags(uflags)); +} + +STATIC int +xfs_quota_disable( + struct super_block *sb, + unsigned int uflags) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -EINVAL; - return -EINVAL; + return xfs_qm_scall_quotaoff(mp, xfs_quota_flags(uflags)); } STATIC int @@ -131,7 +145,7 @@ STATIC int xfs_fs_get_dqblk( struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); @@ -141,14 +155,14 @@ xfs_fs_get_dqblk( return -ESRCH; return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), - xfs_quota_type(qid.type), fdq); + xfs_quota_type(qid.type), qdq); } STATIC int xfs_fs_set_dqblk( struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); @@ -160,13 +174,14 @@ xfs_fs_set_dqblk( return -ESRCH; return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), - xfs_quota_type(qid.type), fdq); + xfs_quota_type(qid.type), qdq); } const struct quotactl_ops xfs_quotactl_operations = { .get_xstatev = xfs_fs_get_xstatev, .get_xstate = xfs_fs_get_xstate, - .set_xstate = xfs_fs_set_xstate, + .quota_enable = xfs_quota_enable, + .quota_disable = xfs_quota_disable, .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, .set_dqblk = xfs_fs_set_dqblk, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19cbda196369..f2449fd86926 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -685,7 +685,7 @@ xfs_blkdev_get( mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); - xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); + xfs_warn(mp, "Invalid device [%s], error=%d", name, error); } return error; @@ -1111,6 +1111,11 @@ xfs_fs_statfs( statp->f_files, mp->m_maxicount); + /* If sb_icount overshot maxicount, report actual allocation */ + statp->f_files = max_t(typeof(statp->f_files), + statp->f_files, + sbp->sb_icount); + /* make sure statp->f_ffree does not underflow */ ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); statp->f_ffree = max_t(__int64_t, ffree, 0); @@ -1257,13 +1262,13 @@ xfs_fs_remount( * If this is the first remount to writeable state we * might have some superblock changes to update. */ - if (mp->m_update_flags) { - error = xfs_mount_log_sb(mp, mp->m_update_flags); + if (mp->m_update_sb) { + error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); return error; } - mp->m_update_flags = 0; + mp->m_update_sb = false; } /* @@ -1293,8 +1298,9 @@ xfs_fs_remount( /* * Second stage of a freeze. The data is already frozen so we only - * need to take care of the metadata. Once that's done write a dummy - * record to dirty the log in case of a crash while frozen. + * need to take care of the metadata. Once that's done sync the superblock + * to the log to dirty it in case of a crash while frozen. This ensures that we + * will recover the unlinked inode lists on the next mount. */ STATIC int xfs_fs_freeze( @@ -1304,7 +1310,7 @@ xfs_fs_freeze( xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return xfs_fs_log_dummy(mp); + return xfs_sync_sb(mp, true); } STATIC int diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 1743b9f8e23d..a0c8067cea6f 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -149,24 +149,6 @@ static struct ctl_table xfs_table[] = { .extra2 = &xfs_params.inherit_noatim.max }, { - .procname = "xfsbufd_centisecs", - .data = &xfs_params.xfs_buf_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_timer.min, - .extra2 = &xfs_params.xfs_buf_timer.max - }, - { - .procname = "age_buffer_centisecs", - .data = &xfs_params.xfs_buf_age.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_age.min, - .extra2 = &xfs_params.xfs_buf_age.max - }, - { .procname = "inherit_nosymlinks", .data = &xfs_params.inherit_nosym.val, .maxlen = sizeof(int), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fa3135b9bf04..eb90cd59a0ec 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -472,6 +472,7 @@ xfs_trans_apply_sb_deltas( whole = 1; } + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); if (whole) /* * Log the whole thing, the fields are noncontiguous. diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 0a4d4ab6d9a9..75798412859a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -327,9 +327,10 @@ xfs_trans_read_buf_map( return -EIO; } - if (tp) + if (tp) { _xfs_trans_bjoin(tp, bp, 1); - trace_xfs_trans_read_buf(bp->b_fspriv); + trace_xfs_trans_read_buf(bp->b_fspriv); + } *bpp = bp; return 0; |