diff options
Diffstat (limited to 'fs')
109 files changed, 841 insertions, 786 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index f1046cf6ad85..bfb1c6095c7a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS config VALIDATE_FS_PARSER bool "Validate filesystem parameter description" - default y help Enable this to perform validation of the parameter description for a filesystem when it is registered. diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fe39310c1a0a..35a4d9f4c3ae 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir.c * * Copyright (C) 1999-2000 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Common directory handling for ADFS */ #include "adfs.h" diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index 693f69ed3de3..7557378e58b3 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir_f.c * * Copyright (C) 1997-1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * E and F format directory handling */ #include <linux/buffer_head.h> diff --git a/fs/adfs/dir_f.h b/fs/adfs/dir_f.h index e4713404096c..5aec332b90f5 100644 --- a/fs/adfs/dir_f.h +++ b/fs/adfs/dir_f.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/adfs/dir_f.h * * Copyright (C) 1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Structures of directories on the F format disk */ #ifndef ADFS_DIR_F_H diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 97b9f28f459b..6c5fbb0259c9 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir_fplus.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <linux/slab.h> diff --git a/fs/adfs/dir_fplus.h b/fs/adfs/dir_fplus.h index b55aa41a68fe..4ec0931e36ad 100644 --- a/fs/adfs/dir_fplus.h +++ b/fs/adfs/dir_fplus.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/adfs/dir_fplus.h * * Copyright (C) 1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Structures of directories on the F+ format disk */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 66621e96f9af..904d624541ad 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/inode.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <linux/writeback.h> diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 6935f05202ac..4d34338c6176 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/map.c * * Copyright (C) 1997-2002 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <asm/unaligned.h> diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 2a83655c408f..ffb669f9bba7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/super.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 86da532c192f..df415c05939e 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -246,8 +246,8 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", - &result, _expiry, true); + ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len, + "srv=1", &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d441bef72163..915010464572 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -275,9 +275,9 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_super_info *as = AFS_FS_S(cbi->sb); struct afs_volume *volume = as->volume; - write_lock(&volume->cb_break_lock); + write_lock(&volume->cb_v_break_lock); volume->cb_v_break++; - write_unlock(&volume->cb_break_lock); + write_unlock(&volume->cb_v_break_lock); } else { data.volume = NULL; data.fid = *fid; diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 057b8d322422..361088a5edb9 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -60,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); - - /* vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(old, 0); } kfree(scb); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 9b3b2f1f1fc0..bcd1bafb0278 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -24,6 +24,7 @@ const struct file_operations afs_dynroot_file_operations = { static int afs_probe_cell_name(struct dentry *dentry) { struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; int ret; @@ -36,13 +37,14 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + cell = afs_lookup_cell_rcu(net, name, len); if (!IS_ERR(cell)) { - afs_put_cell(afs_d2net(dentry), cell); + afs_put_cell(net, cell); return 0; } - ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false); + ret = dns_query(net->net, "afsdb", name, len, "srv=1", + NULL, NULL, false); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b42d9d09669c..18a50d4febcf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren } /* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, @@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } - /* - * Estimate 512 bytes blocks used, rounded up to nearest 1K - * for consistency with other AFS clients. - */ - inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); vnode->invalid_before = status->data_version; inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); @@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (expected_version && *expected_version != status->data_version) { - kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version, - fc->type ? fc->type->name : "???"); + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)*expected_version, + (unsigned long long)status->data_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); } } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8a67bf741880..7ee63526c6a2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -109,10 +109,8 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - union { - struct afs_server *server; - struct afs_vlserver *vlserver; - }; + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ struct afs_cb_interest *cbi; /* Callback interest for server used */ struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ @@ -616,7 +614,7 @@ struct afs_volume { unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned cb_v_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; + rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ short error; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 08fdb3951c49..1a414300b654 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, atomic_set(&volume->usage, 1); INIT_LIST_HEAD(&volume->proc_link); rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); @@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..e4b59e76afb0 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -856,9 +856,14 @@ err: static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +877,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; - - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - res = prepare_binprm(&bprm); - - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); diff --git a/fs/block_dev.c b/fs/block_dev.c index 749f5984425d..f00b569a9f89 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -203,13 +203,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec; + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; loff_t pos = iocb->ki_pos; bool should_dirty = false; struct bio bio; ssize_t ret; blk_qc_t qc; - struct bvec_iter_all iter_all; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -259,13 +258,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, } __set_current_state(TASK_RUNNING); - bio_for_each_segment_all(bvec, &bio, iter_all) { - if (should_dirty && !PageCompound(bvec->bv_page)) - set_page_dirty_lock(bvec->bv_page); - if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) - put_page(bvec->bv_page); - } - + bio_release_pages(&bio, should_dirty); if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); @@ -335,13 +328,7 @@ static void blkdev_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - put_page(bvec->bv_page); - } + bio_release_pages(bio, false); bio_put(bio); } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1aee51a9f3bf..5faf057f6f37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10831,17 +10831,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, remove_em = (atomic_read(&block_group->trimming) == 0); spin_unlock(&block_group->lock); - if (remove_em) { - struct extent_map_tree *em_tree; - - em_tree = &fs_info->mapping_tree.map_tree; - write_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - /* once for the tree */ - free_extent_map(em); - } - mutex_unlock(&fs_info->chunk_mutex); ret = remove_block_group_free_space(trans, block_group); @@ -10858,6 +10847,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + if (remove_em) { + struct extent_map_tree *em_tree; + + em_tree = &fs_info->mapping_tree.map_tree; + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + /* once for the tree */ + free_extent_map(em); + } out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); @@ -11137,13 +11139,11 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * it while performing the free space search since we have already * held back allocations. */ -static int btrfs_trim_free_extents(struct btrfs_device *device, - struct fstrim_range *range, u64 *trimmed) +static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) { - u64 start, len = 0, end = 0; + u64 start = SZ_1M, len = 0, end = 0; int ret; - start = max_t(u64, range->start, SZ_1M); *trimmed = 0; /* Discard not supported = nothing to do. */ @@ -11186,22 +11186,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } - /* Keep going until we satisfy minlen or reach end of space */ - if (len < range->minlen) { - mutex_unlock(&fs_info->chunk_mutex); - start += len; - continue; - } - - /* If we are out of the passed range break */ - if (start > range->start + range->len - 1) { - mutex_unlock(&fs_info->chunk_mutex); - break; - } - - start = max(range->start, start); - len = min(range->len, len); - ret = btrfs_issue_discard(device->bdev, start, len, &bytes); if (!ret) @@ -11216,10 +11200,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; - /* We've trimmed enough */ - if (*trimmed >= range->len) - break; - if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -11303,7 +11283,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range, &group_trimmed); + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; dev_ret = ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6dafa857bbb9..56ae2f659b6d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -312,8 +312,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_abort_transaction(trans, ret); goto out_end_trans; } - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); } else { ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); @@ -2930,8 +2928,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode_lock(inode); err = btrfs_delete_subvolume(dir, dentry); inode_unlock(inode); - if (!err) + if (!err) { + fsnotify_rmdir(dir, dentry); d_delete(dentry); + } out_dput: dput(dentry); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 10d9589001a9..bb5bd49573b4 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -747,6 +747,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) u64 total = 0; int i; +again: do { enqueued = 0; mutex_lock(&fs_devices->device_list_mutex); @@ -758,6 +759,10 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); + if (fs_devices->seed) { + fs_devices = fs_devices->seed; + goto again; + } if (enqueued == 0) return; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 78b6ba2029e8..95d9aebff2c4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -213,6 +213,9 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, } out: btrfs_free_path(path); + if (!ret) + set_bit(BTRFS_INODE_COPY_EVERYTHING, + &BTRFS_I(inode)->runtime_flags); return ret; } @@ -236,7 +239,6 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); out: @@ -388,8 +390,6 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, if (!ret) { inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); } diff --git a/fs/buffer.c b/fs/buffer.c index e450c55f6434..49a871570092 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2086,38 +2086,6 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, } EXPORT_SYMBOL(block_write_begin); -void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page) -{ - loff_t old_size = inode->i_size; - bool i_size_changed = false; - - /* - * No need to use i_size_read() here, the i_size cannot change under us - * because we hold i_rwsem. - * - * But it's important to update i_size while still holding page lock: - * page writeout could otherwise come in and zero beyond i_size. - */ - if (pos + copied > inode->i_size) { - i_size_write(inode, pos + copied); - i_size_changed = true; - } - - unlock_page(page); - - if (old_size < pos) - pagecache_isize_extended(inode, old_size, pos); - /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling - * filesystems. - */ - if (i_size_changed) - mark_inode_dirty(inode); -} - int block_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) @@ -2158,9 +2126,37 @@ int generic_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; + bool i_size_changed = false; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - __generic_write_end(mapping->host, pos, copied, page); + + /* + * No need to use i_size_read() here, the i_size cannot change under us + * because we hold i_rwsem. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos + copied > inode->i_size) { + i_size_write(inode, pos + copied); + i_size_changed = true; + } + + unlock_page(page); put_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (i_size_changed) + mark_inode_dirty(inode); return copied; } EXPORT_SYMBOL(generic_write_end); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6af2d0d4a87a..c8a9b89b922d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2121,9 +2121,10 @@ retry: if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); - } else if (stop_on_nosnap && inode && + } else if (stop_on_nosnap && inode && dentry != temp && ceph_snap(inode) == CEPH_NOSNAP) { spin_unlock(&temp->d_lock); + pos++; /* get rid of any prepended '/' */ break; } else { pos -= temp->d_name.len; diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index aae2b8b2adf5..523e9ea78a28 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -10,7 +10,7 @@ config CIFS select CRYPTO_SHA512 select CRYPTO_CMAC select CRYPTO_HMAC - select CRYPTO_ARC4 + select CRYPTO_LIB_ARC4 select CRYPTO_AEAD2 select CRYPTO_CCM select CRYPTO_ECB diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index d2a05e46d6f5..97b7497c13ef 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -33,7 +33,8 @@ #include <linux/ctype.h> #include <linux/random.h> #include <linux/highmem.h> -#include <crypto/skcipher.h> +#include <linux/fips.h> +#include <crypto/arc4.h> #include <crypto/aead.h> int __cifs_calc_signature(struct smb_rqst *rqst, @@ -772,63 +773,32 @@ setup_ntlmv2_rsp_ret: int calc_seckey(struct cifs_ses *ses) { - int rc; - struct crypto_skcipher *tfm_arc4; - struct scatterlist sgin, sgout; - struct skcipher_request *req; - unsigned char *sec_key; + unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */ + struct arc4_ctx *ctx_arc4; - sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL); - if (sec_key == NULL) - return -ENOMEM; + if (fips_enabled) + return -ENODEV; get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); - tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm_arc4)) { - rc = PTR_ERR(tfm_arc4); - cifs_dbg(VFS, "could not allocate crypto API arc4\n"); - goto out; - } - - rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response, - CIFS_SESS_KEY_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set response as a key\n", - __func__); - goto out_free_cipher; - } - - req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; - cifs_dbg(VFS, "could not allocate crypto API arc4 request\n"); - goto out_free_cipher; + ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL); + if (!ctx_arc4) { + cifs_dbg(VFS, "could not allocate arc4 context\n"); + return -ENOMEM; } - sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); - sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL); - - rc = crypto_skcipher_encrypt(req); - skcipher_request_free(req); - if (rc) { - cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc); - goto out_free_cipher; - } + arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); + arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key, + CIFS_CPHTXT_SIZE); /* make secondary_key/nonce as session key */ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE); /* and make len as that of session key only */ ses->auth_key.len = CIFS_SESS_KEY_SIZE; -out_free_cipher: - crypto_free_skcipher(tfm_arc4); -out: - kfree(sec_key); - return rc; + memzero_explicit(sec_key, CIFS_SESS_KEY_SIZE); + kzfree(ctx_arc4); + return 0; } void diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f11eea6125c1..24635b65effa 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -303,6 +303,7 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; cifs_inode->epoch = 0; + spin_lock_init(&cifs_inode->open_file_lock); generate_random_uuid(cifs_inode->lease_key); /* @@ -1594,7 +1595,6 @@ MODULE_DESCRIPTION ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and " "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); -MODULE_SOFTDEP("pre: arc4"); MODULE_SOFTDEP("pre: des"); MODULE_SOFTDEP("pre: ecb"); MODULE_SOFTDEP("pre: hmac"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 334ff5f9c3f3..4777b3c4a92c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1377,6 +1377,7 @@ struct cifsInodeInfo { struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; + spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ unsigned int epoch; /* used to track lease state changes */ @@ -1780,10 +1781,14 @@ require use of the stronger protocol */ * tcp_ses_lock protects: * list operations on tcp and SMB session lists * tcon->open_file_lock protects the list of open files hanging off the tcon + * inode->open_file_lock protects the openFileList hanging off the inode * cfile->file_info_lock protects counters and fields in cifs file struct * f_owner.lock protects certain per file struct operations * mapping->page_lock protects certain per page operations * + * Note that the cifs_tcon.open_file_lock should be taken before + * not after the cifsInodeInfo.open_file_lock + * * Semaphores * ---------- * sesSem operations on smb session diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8c4121da624e..714a359c7c8d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -476,6 +476,7 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); server->nr_targets = 1; #ifdef CONFIG_CIFS_DFS_UPCALL + spin_unlock(&GlobalMid_Lock); cifs_sb = find_super_by_tcp(server); if (IS_ERR(cifs_sb)) { rc = PTR_ERR(cifs_sb); @@ -493,6 +494,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__, server->nr_targets); + spin_lock(&GlobalMid_Lock); #endif if (server->tcpStatus == CifsExiting) { /* the demux thread will exit normally @@ -2629,7 +2631,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) task = xchg(&server->tsk, NULL); if (task) - force_sig(SIGKILL, task); + send_sig(SIGKILL, task, 1); } static struct TCP_Server_Info * diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 1e21b2528cfb..534cbba72789 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -77,7 +77,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) goto name_is_IP_address; /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false); + rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, + NULL, ip_addr, NULL, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 06e27ac6d82c..97090693d182 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -338,10 +338,12 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, atomic_inc(&tcon->num_local_opens); /* if readable file instance put first in list*/ + spin_lock(&cinode->open_file_lock); if (file->f_mode & FMODE_READ) list_add(&cfile->flist, &cinode->openFileList); else list_add_tail(&cfile->flist, &cinode->openFileList); + spin_unlock(&cinode->open_file_lock); spin_unlock(&tcon->open_file_lock); if (fid->purge_cache) @@ -413,7 +415,9 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); /* remove it from the lists */ + spin_lock(&cifsi->open_file_lock); list_del(&cifs_file->flist); + spin_unlock(&cifsi->open_file_lock); list_del(&cifs_file->tlist); atomic_dec(&tcon->num_local_opens); @@ -1950,9 +1954,9 @@ refind_writable: return 0; } - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); list_move_tail(&inv_file->flist, &cifs_inode->openFileList); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); cifsFileInfo_put(inv_file); ++refind; inv_file = NULL; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c4e75afa3258..9e430ae9314f 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * SMB1 (CIFS) version specific operations * * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com> - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2 as published - * by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/pagemap.h> diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index e32c264e3adb..82ade16c9501 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -457,7 +457,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, "STATUS_ALLOTTED_SPACE_EXCEEDED"}, - {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO, + {STATUS_INSUFFICIENT_RESOURCES, -EAGAIN, "STATUS_INSUFFICIENT_RESOURCES"}, {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e921e6511728..9fd56b0acd7e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3,19 +3,6 @@ * SMB2 version specific operations * * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com> - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2 as published - * by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/pagemap.h> @@ -2385,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, kfree(dfs_rsp); return rc; } + +static int +parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + unsigned int sub_len; + unsigned int sub_offset; + + /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */ + if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) { + cifs_dbg(VFS, "srv returned invalid symlink buffer\n"); + return -EIO; + } + + sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); + sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + if (sub_offset + 20 > plen || + sub_offset + sub_len + 20 > plen) { + cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); + return -EIO; + } + + *target_path = cifs_strndup_from_utf16( + symlink_buf->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) + return -ENOMEM; + + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + + return 0; +} + #define SMB2_SYMLINK_STRUCT_SIZE \ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp)) @@ -2414,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct kvec close_iov[1]; struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; - char *ioctl_buf; + struct reparse_data_buffer *reparse_buf; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + *target_path = NULL; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -2496,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == 0) && (is_reparse_point)) { /* See MS-FSCC 2.3.23 */ - ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset); + reparse_buf = (struct reparse_data_buffer *) + ((char *)ioctl_rsp + + le32_to_cpu(ioctl_rsp->OutputOffset)); plen = le32_to_cpu(ioctl_rsp->OutputCount); if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > rsp_iov[1].iov_len) { - cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen); + cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n", + plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < 8) { + cifs_dbg(VFS, "reparse buffer is too small. Must be " + "at least 8 bytes but was %d\n", plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) { + cifs_dbg(VFS, "srv returned invalid reparse buf " + "length: %d\n", plen); rc = -EIO; goto querty_exit; } - /* Do stuff with ioctl_buf/plen */ + rc = parse_reparse_symlink( + (struct reparse_symlink_data_buffer *)reparse_buf, + plen, target_path, cifs_sb); goto querty_exit; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 29b699d532ef..75311a8a68bf 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3114,9 +3114,14 @@ void smb2_reconnect_server(struct work_struct *work) tcon_exist = true; } } + /* + * IPC has the same lifetime as its session and uses its + * refcount. + */ if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); tcon_exist = true; + ses->ses_count++; } } /* @@ -3135,7 +3140,10 @@ void smb2_reconnect_server(struct work_struct *work) else resched = true; list_del_init(&tcon->rlist); - cifs_put_tcon(tcon); + if (tcon->ipc) + cifs_put_smb_ses(tcon->ses); + else + cifs_put_tcon(tcon); } cifs_dbg(FYI, "Reconnecting tcons finished\n"); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c7d5813bebd8..858353d20c39 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer { __u8 PathBuffer[0]; /* Variable Length */ } __packed; -/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ +#define SYMLINK_FLAG_RELATIVE 0x00000001 + +struct reparse_symlink_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; /* Variable Length */ +} __packed; /* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ diff --git a/fs/compat.c b/fs/compat.c index 4a0aaaf53217..436d228cf71c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/compat.c * @@ -9,10 +10,6 @@ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/compat.h> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index d2ca5287762d..92112915de8e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -13,6 +13,7 @@ #undef DEBUG #include <linux/fs.h> +#include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/slab.h> @@ -1788,6 +1789,7 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + fsnotify_rmdir(d_inode(parent), dentry); d_delete(dentry); inode_unlock(d_inode(parent)); @@ -1916,6 +1918,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); d_delete(dentry); @@ -720,12 +720,11 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + void *old; + dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); - } - - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -734,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas, * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ - void *old = dax_lock_entry(xas, new_entry); + old = dax_lock_entry(xas, new_entry); WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); entry = new_entry; @@ -1188,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, unsigned flags = 0; if (iov_iter_rw(iter) == WRITE) { - lockdep_assert_held_exclusive(&inode->i_rwsem); + lockdep_assert_held_write(&inode->i_rwsem); flags |= IOMAP_WRITE; } else { lockdep_assert_held(&inode->i_rwsem); diff --git a/fs/dcache.c b/fs/dcache.c index c435398f2c81..f41121e5d1ec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2372,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup); void d_delete(struct dentry * dentry) { struct inode *inode = dentry->d_inode; - int isdir = d_is_dir(dentry); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); @@ -2387,7 +2386,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); } - fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index acef14ad53db..1e444fe1f778 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -617,13 +617,10 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, } EXPORT_SYMBOL_GPL(debugfs_create_symlink); -static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent) +static void __debugfs_file_removed(struct dentry *dentry) { struct debugfs_fsdata *fsd; - simple_unlink(d_inode(parent), dentry); - d_delete(dentry); - /* * Paired with the closing smp_mb() implied by a successful * cmpxchg() in debugfs_file_get(): either @@ -644,16 +641,18 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) if (simple_positive(dentry)) { dget(dentry); - if (!d_is_reg(dentry)) { - if (d_is_dir(dentry)) - ret = simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); + if (d_is_dir(dentry)) { + ret = simple_rmdir(d_inode(parent), dentry); if (!ret) - d_delete(dentry); + fsnotify_rmdir(d_inode(parent), dentry); } else { - __debugfs_remove_file(dentry, parent); + simple_unlink(d_inode(parent), dentry); + fsnotify_unlink(d_inode(parent), dentry); } + if (!ret) + d_delete(dentry); + if (d_is_reg(dentry)) + __debugfs_file_removed(dentry); dput(dentry); } return ret; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 2c14ae044dce..beeadca23b05 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -621,6 +621,7 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_delete(dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/direct-io.c b/fs/direct-io.c index ac7fb19b6ade..ae196784f487 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -538,8 +538,8 @@ static struct bio *dio_await_one(struct dio *dio) */ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) { - struct bio_vec *bvec; blk_status_t err = bio->bi_status; + bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty; if (err) { if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) @@ -548,19 +548,10 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) dio->io_error = -EIO; } - if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { + if (dio->is_async && should_dirty) { bio_check_pages_dirty(bio); /* transfers ownership */ } else { - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - if (dio->op == REQ_OP_READ && !PageCompound(page) && - dio->should_dirty) - set_page_dirty_lock(page); - put_page(page); - } + bio_release_pages(bio, should_dirty); bio_put(bio); } return err; diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8e568428c88b..ee3bc0c96b9d 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/efi.h> diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 8c6ab6c95727..96c0c86f3fff 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/efi.h> diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index b4505188e799..30ae44cb7453 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef EFIVAR_FS_INTERNAL_H #define EFIVAR_FS_INTERNAL_H diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 5b68e4294faa..5bc3c4a4c563 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/ctype.h> diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c6f513100cc9..4c74c768ae43 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, error = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, error == -EINTR); return error; } @@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, err = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, err == -EINTR); return err; } diff --git a/fs/exec.c b/fs/exec.c index 89a500bb897a..c71cbfe6826a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1663,7 +1663,7 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval < 0 && !bprm->mm) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return retval; } if (retval != -ENOEXEC || !bprm->file) { diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 33db13365c5e..547c165299c0 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1197,7 +1197,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) /* * Returns 1 if the passed-in block region is valid; 0 if some part overlaps - * with filesystem metadata blocksi. + * with filesystem metadata blocks. */ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, unsigned int count) @@ -1212,7 +1212,6 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, (start_blk + count >= sbi->s_sb_block)) return 0; - return 1; } diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a0c5ea91fcd4..fda7d3f5b4be 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -172,9 +172,7 @@ static void ext2_preread_inode(struct inode *inode) struct backing_dev_info *bdi; bdi = inode_to_bdi(inode); - if (bdi_read_congested(bdi)) - return; - if (bdi_write_congested(bdi)) + if (bdi_rw_congested(bdi)) return; block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); @@ -511,6 +509,7 @@ repeat_in_this_group: /* * Scanned all blockgroups. */ + brelse(bitmap_bh); err = -ENOSPC; goto fail; got: diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e474127dd255..7004ce581a32 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1400,7 +1400,7 @@ void ext2_set_file_ops(struct inode *inode) struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; - struct buffer_head * bh; + struct buffer_head * bh = NULL; struct ext2_inode *raw_inode; struct inode *inode; long ret = -EIO; @@ -1446,7 +1446,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) */ if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ - brelse (bh); ret = -ESTALE; goto bad_inode; } @@ -1463,7 +1462,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) { ext2_error(sb, "ext2_iget", "bad extended attribute block %u", ei->i_file_acl); - brelse(bh); ret = -EFSCORRUPTED; goto bad_inode; } @@ -1526,6 +1524,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) return inode; bad_inode: + brelse(bh); iget_failed(inode); return ERR_PTR(ret); } @@ -1640,7 +1639,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) } int ext2_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_falgs) + u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct ext2_inode_info *ei = EXT2_I(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 1d7ab73b1014..44eb6e7eb492 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -303,16 +303,16 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_XIP) + if (test_opt(sb, XIP)) seq_puts(seq, ",xip"); - if (sbi->s_mount_opt & EXT2_MOUNT_DAX) + if (test_opt(sb, DAX)) seq_puts(seq, ",dax"); if (!test_opt(sb, RESERVATION)) @@ -935,8 +935,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resgid = opts.s_resgid; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? - SB_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); sb->s_iflags |= SB_I_CGROUPWB; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && @@ -967,11 +966,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { + if (test_opt(sb, DAX)) { if (!bdev_dax_supported(sb->s_bdev, blocksize)) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); - sbi->s_mount_opt &= ~EXT2_MOUNT_DAX; + clear_opt(sbi->s_mount_opt, DAX); } } @@ -1404,7 +1403,7 @@ out_set: sbi->s_resuid = new_opts.s_resuid; sbi->s_resgid = new_opts.s_resgid; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); spin_unlock(&sbi->s_lock); return 0; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 1e33e0ac8cf1..79369c13cc55 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -134,6 +134,53 @@ ext2_xattr_handler(int name_index) return handler; } +static bool +ext2_xattr_header_valid(struct ext2_xattr_header *header) +{ + if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + header->h_blocks != cpu_to_le32(1)) + return false; + + return true; +} + +static bool +ext2_xattr_entry_valid(struct ext2_xattr_entry *entry, + char *end, size_t end_offs) +{ + struct ext2_xattr_entry *next; + size_t size; + + next = EXT2_XATTR_NEXT(entry); + if ((char *)next >= end) + return false; + + if (entry->e_value_block != 0) + return false; + + size = le32_to_cpu(entry->e_value_size); + if (size > end_offs || + le16_to_cpu(entry->e_value_offs) + size > end_offs) + return false; + + return true; +} + +static int +ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name, + struct ext2_xattr_entry *entry) +{ + int cmp; + + cmp = name_index - entry->e_name_index; + if (!cmp) + cmp = name_len - entry->e_name_len; + if (!cmp) + cmp = memcmp(name, entry->e_name, name_len); + + return cmp; +} + /* * ext2_xattr_get() * @@ -152,7 +199,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, struct ext2_xattr_entry *entry; size_t name_len, size; char *end; - int error; + int error, not_found; struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", @@ -176,9 +223,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_get", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -188,29 +235,25 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", /* find named attribute */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = - EXT2_XATTR_NEXT(entry); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - if (name_index == entry->e_name_index && - name_len == entry->e_name_len && - memcmp(name, entry->e_name, name_len) == 0) + + not_found = ext2_xattr_cmp_entry(name_index, name_len, name, + entry); + if (!not_found) goto found; - entry = next; + if (not_found < 0) + break; + + entry = EXT2_XATTR_NEXT(entry); } if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); error = -ENODATA; goto cleanup; found: - /* check the buffer size */ - if (entry->e_value_block != 0) - goto bad_block; size = le32_to_cpu(entry->e_value_size); - if (size > inode->i_sb->s_blocksize || - le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) - goto bad_block; - if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); if (buffer) { @@ -266,9 +309,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_list", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -278,11 +321,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", /* check the on-disk data structure */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry); - - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - entry = next; + entry = EXT2_XATTR_NEXT(entry); } if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); @@ -367,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct ext2_xattr_header *header = NULL; - struct ext2_xattr_entry *here, *last; + struct ext2_xattr_entry *here = NULL, *last = NULL; size_t name_len, free, min_offs = sb->s_blocksize; int not_found = 1, error; char *end; @@ -406,47 +448,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, le32_to_cpu(HDR(bh)->h_refcount)); header = HDR(bh); end = bh->b_data + bh->b_size; - if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - header->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(sb, "ext2_xattr_set", + if (!ext2_xattr_header_valid(header)) { +bad_block: + ext2_error(sb, "ext2_xattr_set", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; goto cleanup; } - /* Find the named attribute. */ - here = FIRST_ENTRY(bh); - while (!IS_LAST_ENTRY(here)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); - if ((char *)next >= end) - goto bad_block; - if (!here->e_value_block && here->e_value_size) { - size_t offs = le16_to_cpu(here->e_value_offs); - if (offs < min_offs) - min_offs = offs; - } - not_found = name_index - here->e_name_index; - if (!not_found) - not_found = name_len - here->e_name_len; - if (!not_found) - not_found = memcmp(name, here->e_name,name_len); - if (not_found <= 0) - break; - here = next; - } - last = here; - /* We still need to compute min_offs and last. */ + /* + * Find the named attribute. If not found, 'here' will point + * to entry where the new attribute should be inserted to + * maintain sorting. + */ + last = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(last)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize)) goto bad_block; - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; } - last = next; + if (not_found > 0) { + not_found = ext2_xattr_cmp_entry(name_index, + name_len, + name, last); + if (not_found <= 0) + here = last; + } + last = EXT2_XATTR_NEXT(last); } + if (not_found > 0) + here = last; /* Check whether we have enough space left. */ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); @@ -454,7 +488,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* We will use a new extended attribute block. */ free = sb->s_blocksize - sizeof(struct ext2_xattr_header) - sizeof(__u32); - here = last = NULL; /* avoid gcc uninitialized warning. */ } if (not_found) { @@ -470,14 +503,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", error = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; - if (!here->e_value_block && here->e_value_size) { - size_t size = le32_to_cpu(here->e_value_size); - - if (le16_to_cpu(here->e_value_offs) + size > - sb->s_blocksize || size > sb->s_blocksize) - goto bad_block; - free += EXT2_XATTR_SIZE(size); - } + free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size)); free += EXT2_XATTR_LEN(name_len); } error = -ENOSPC; @@ -506,11 +532,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set", unlock_buffer(bh); ea_bdebug(bh, "cloning"); - header = kmalloc(bh->b_size, GFP_KERNEL); + header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); error = -ENOMEM; if (header == NULL) goto cleanup; - memcpy(header, HDR(bh), bh->b_size); header->h_refcount = cpu_to_le32(1); offset = (char *)here - bh->b_data; @@ -542,7 +567,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", here->e_name_len = name_len; memcpy(here->e_name, name, name_len); } else { - if (!here->e_value_block && here->e_value_size) { + if (here->e_value_size) { char *first_val = (char *)header + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; @@ -569,7 +594,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", last = ENTRY(header+1); while (!IS_LAST_ENTRY(last)) { size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && o < offs) + if (o < offs) last->e_value_offs = cpu_to_le16(o + size); last = EXT2_XATTR_NEXT(last); @@ -784,8 +809,7 @@ ext2_xattr_delete_inode(struct inode *inode) goto cleanup; } ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { + if (!ext2_xattr_header_valid(HDR(bh))) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e41cbe8e81b9..9ebfb1b28430 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -715,6 +715,7 @@ void wbc_detach_inode(struct writeback_control *wbc) void wbc_account_io(struct writeback_control *wbc, struct page *page, size_t bytes) { + struct cgroup_subsys_state *css; int id; /* @@ -726,7 +727,12 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, if (!wbc->wb) return; - id = mem_cgroup_css_from_page(page)->id; + css = mem_cgroup_css_from_page(page); + /* dead cgroups shouldn't contribute to inode ownership arbitration */ + if (!(css->flags & CSS_ONLINE)) + return; + + id = css->id; if (id == wbc->wb_id) { wbc->wb_bytes += bytes; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 24ea19cfe07e..ea8237513dfa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1317,16 +1317,6 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, unsigned reqsize; unsigned int hash; - /* - * Require sane minimum read buffer - that has capacity for fixed part - * of any request header + negotated max_write room for data. If the - * requirement is not satisfied return EINVAL to the filesystem server - * to indicate that it is not following FUSE server/client contract. - * Don't dequeue / abort any request. - */ - if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, 4096 + fc->max_write)) - return -EINVAL; - restart: spin_lock(&fiq->waitq.lock); err = -EAGAIN; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c78ccaf83ef8..f4b895fc632d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -991,9 +991,12 @@ static void gfs2_write_unlock(struct inode *inode) static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, unsigned len, struct iomap *iomap) { + unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int blocks; - return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0); + blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; + return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); } static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, @@ -1179,6 +1182,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); + if (iomap->flags & IOMAP_F_SIZE_CHANGED) + mark_inode_dirty(inode); gfs2_write_unlock(inode); out: diff --git a/fs/inode.c b/fs/inode.c index 4348cfb14562..5f5431ec3d62 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); diff --git a/fs/internal.h b/fs/internal.h index a48ef81be37d..2f3c3de51fad 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -40,8 +40,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) extern void guard_bio_eod(int rw, struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); -void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page); /* * char_dev.c diff --git a/fs/io_uring.c b/fs/io_uring.c index 0fbb486a320e..4ed4b110a154 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ @@ -997,9 +998,6 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); if (offset) iov_iter_advance(iter, offset); - - /* don't drop a reference to these pages */ - iter->type |= ITER_BVEC_FLAG_NO_REF; return 0; } @@ -1801,10 +1799,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, req->sequence = ctx->cached_sq_head - 1; } - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || @@ -2201,11 +2197,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - if (ret == -ERESTARTSYS) - ret = -EINTR; if (sig) - restore_user_sigmask(sig, &sigsaved); + restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); + + if (ret == -ERESTARTSYS) + ret = -EINTR; return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } @@ -2777,8 +2774,10 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); #if defined(CONFIG_UNIX) - if (ctx->ring_sock) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ sock_release(ctx->ring_sock); + } #endif io_mem_free(ctx->sq_ring); diff --git a/fs/iomap.c b/fs/iomap.c index 23ef63fd1669..217c3e5a13d6 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -287,7 +287,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap_readpage_ctx *ctx = data; struct page *page = ctx->cur_page; struct iomap_page *iop = iomap_page_create(inode, page); - bool is_contig = false; + bool same_page = false, is_contig = false; loff_t orig_pos = pos; unsigned poff, plen; sector_t sector; @@ -315,10 +315,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * Try to merge into a previous segment if we can. */ sector = iomap_sector(iomap, pos); - if (ctx->bio && bio_end_sector(ctx->bio) == sector) { - if (__bio_try_merge_page(ctx->bio, page, plen, poff, true)) - goto done; + if (ctx->bio && bio_end_sector(ctx->bio) == sector) is_contig = true; + + if (is_contig && + __bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) { + if (!same_page && iop) + atomic_inc(&iop->read_count); + goto done; } /* @@ -329,7 +333,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (iop) atomic_inc(&iop->read_count); - if (!ctx->bio || !is_contig || bio_full(ctx->bio)) { + if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -773,6 +777,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page, struct iomap *iomap) { const struct iomap_page_ops *page_ops = iomap->page_ops; + loff_t old_size = inode->i_size; int ret; if (iomap->type == IOMAP_INLINE) { @@ -784,9 +789,21 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, ret = __iomap_write_end(inode, pos, len, copied, page, iomap); } - __generic_write_end(inode, pos, ret, page); + /* + * Update the in-memory inode size after copying the data into the page + * cache. It's up to the file system to write the updated size to disk, + * preferably after I/O completion so that no stale data is exposed. + */ + if (pos + ret > old_size) { + i_size_write(inode, pos + ret); + iomap->flags |= IOMAP_F_SIZE_CHANGED; + } + unlock_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); if (page_ops && page_ops->page_done) - page_ops->page_done(inode, pos, copied, page, iomap); + page_ops->page_done(inode, pos, ret, page, iomap); put_page(page); if (ret < len) @@ -1595,13 +1612,7 @@ static void iomap_dio_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - put_page(bvec->bv_page); - } + bio_release_pages(bio, false); bio_put(bio); } } diff --git a/fs/locks.c b/fs/locks.c index ec1e4a5df629..de87a3231789 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1534,11 +1534,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { - if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) - return false; - if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) - return false; - return locks_conflict(breaker, lease); + bool rc; + + if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { + rc = false; + goto trace; + } + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) { + rc = false; + goto trace; + } + + rc = locks_conflict(breaker, lease); +trace: + trace_leases_conflict(rc, lease, breaker); + return rc; } static bool @@ -1753,10 +1763,10 @@ int fcntl_getlease(struct file *filp) } /** - * check_conflicting_open - see if the given dentry points to a file that has + * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. - * @dentry: dentry to check + * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * @@ -1764,30 +1774,42 @@ int fcntl_getlease(struct file *filp) * conflict with the lease we're trying to set. */ static int -check_conflicting_open(const struct dentry *dentry, const long arg, int flags) +check_conflicting_open(struct file *filp, const long arg, int flags) { - int ret = 0; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); + int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) - return -EAGAIN; + if (arg == F_RDLCK) + return inode_is_open_for_write(inode) ? -EAGAIN : 0; + else if (arg != F_WRLCK) + return 0; - if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || - (atomic_read(&inode->i_count) > 1))) - ret = -EAGAIN; + /* + * Make sure that only read/write count is from lease requestor. + * Note that this will result in denying write leases when i_writecount + * is negative, which is what we want. (We shouldn't grant write leases + * on files open for execution.) + */ + if (filp->f_mode & FMODE_WRITE) + self_wcount = 1; + else if (filp->f_mode & FMODE_READ) + self_rcount = 1; - return ret; + if (atomic_read(&inode->i_writecount) != self_wcount || + atomic_read(&inode->i_readcount) != self_rcount) + return -EAGAIN; + + return 0; } static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct file_lock *fl, *my_fl = NULL, *lease; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; @@ -1822,7 +1844,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) goto out; @@ -1879,7 +1901,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr * precedes these checks. */ smp_mb(); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) { locks_unlink_lock_ctx(lease); goto out; diff --git a/fs/namei.c b/fs/namei.c index 20831c2fbb34..209c51a5226c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); + fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); @@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (!error) { dont_mount(dentry); detach_mounts(dentry); + fsnotify_unlink(dir, dentry); } } } diff --git a/fs/namespace.c b/fs/namespace.c index b26778bdc236..6fbc9126367a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2105,6 +2105,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); + child->mnt.mnt_flags &= ~MNT_LOCKED; commit_tree(child); } put_mountpoint(smp); @@ -2595,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path) if (!check_mnt(p)) goto out; - /* The thing moved should be either ours or completely unattached. */ - if (attached && !check_mnt(old)) + /* The thing moved must be mounted... */ + if (!is_mounted(&old->mnt)) goto out; - if (!attached && !(ns && is_anon_ns(ns))) + /* ... and either ours or the root of anon namespace */ + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) goto out; if (old->mnt.mnt_flags & MNT_LOCKED) @@ -3445,6 +3447,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, ns->root = mnt; ns->mounts = 1; list_add(&mnt->mnt_list, &ns->list); + mntget(newmount.mnt); /* Attach to an apparent O_PATH fd with a note that we need to unmount * it, not just simply put it. diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index e6a700f01452..aec769a500a1 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -22,7 +22,8 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, char *ip_addr = NULL; int ip_len; - ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false); + ip_len = dns_query(net, NULL, name, namelen, NULL, &ip_addr, NULL, + false); if (ip_len > 0) ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..19f856f45689 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 4884fdae28fb..1e7296395d71 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -291,7 +291,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, - desc, "", 0, idmap); + desc, NULL, "", 0, idmap); mutex_unlock(&idmap->idmap_mutex); } if (!IS_ERR(rkey)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e38f4af20950..6418cb6c079b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1256,10 +1256,20 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); - p->o_arg.umask = current_umask(); p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); p->o_arg.share_access = nfs4_map_atomic_open_share(server, fmode, flags); + if (flags & O_CREAT) { + p->o_arg.umask = current_umask(); + p->o_arg.label = nfs4_label_copy(p->a_label, label); + if (c->sattr != NULL && c->sattr->ia_valid != 0) { + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); + + memcpy(p->o_arg.u.verifier.data, c->verf, + sizeof(p->o_arg.u.verifier.data)); + } + } /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS * will return permission denied for all bits until close */ if (!(flags & O_EXCL)) { @@ -1283,7 +1293,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.label = nfs4_label_copy(p->a_label, label); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: @@ -1296,13 +1305,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_DELEG_PREV_FH: p->o_arg.fh = NFS_FH(d_inode(dentry)); } - if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) { - p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); - - memcpy(p->o_arg.u.verifier.data, c->verf, - sizeof(p->o_arg.u.verifier.data)); - } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 52d533967485..0effeee28352 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) nfs_cancel_async_unlink(dentry); return; } - - /* - * vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(dentry, 0); } #define SILLYNAME_PREFIX ".nfs" diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 618e66078ee5..1a0cdeb3b875 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1563,7 +1563,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) * Never use more than a third of the remaining memory, * unless it's the only way to give this client a slot: */ - avail = clamp_t(int, avail, slotsize, total_avail/3); + avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e6fde1a5c072..b428c295d13f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -355,6 +355,10 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) /* Mark is just getting destroyed or created? */ if (!conn) continue; + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) + continue; + /* Pairs with smp_wmb() in fsnotify_add_mark_list() */ + smp_rmb(); fsid = conn->fsid; if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) continue; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a90bb19dcfa2..91006f47e420 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return 0; } +static int fanotify_events_supported(struct path *path, __u64 mask) +{ + /* + * Some filesystems such as 'proc' acquire unusual locks when opening + * files. For them fanotify permission events have high chances of + * deadlocking the system - open done when reporting fanotify event + * blocks on this "unusual" lock while another process holding the lock + * waits for fanotify permission event to be answered. Just disallow + * permission events for such filesystems. + */ + if (mask & FANOTIFY_PERM_EVENTS && + path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) + return -EINVAL; + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (flags & FAN_MARK_ADD) { + ret = fanotify_events_supported(&path, mask); + if (ret) + goto path_put_and_out; + } + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { ret = fanotify_test_fid(&path, &__fsid); if (ret) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4eb2ebfac468..2ecef6155fc0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -95,47 +95,6 @@ void fsnotify_sb_delete(struct super_block *sb) } /* - * fsnotify_nameremove - a filename was removed from a directory - * - * This is mostly called under parent vfs inode lock so name and - * dentry->d_parent should be stable. However there are some corner cases where - * inode lock is not held. So to be on the safe side and be reselient to future - * callers and out of tree users of d_delete(), we do not assume that d_parent - * and d_name are stable and we use dget_parent() and - * take_dentry_name_snapshot() to grab stable references. - */ -void fsnotify_nameremove(struct dentry *dentry, int isdir) -{ - struct dentry *parent; - struct name_snapshot name; - __u32 mask = FS_DELETE; - - /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ - if (IS_ROOT(dentry)) - return; - - if (isdir) - mask |= FS_ISDIR; - - parent = dget_parent(dentry); - /* Avoid unneeded take_dentry_name_snapshot() */ - if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) && - !(dentry->d_sb->s_fsnotify_mask & FS_DELETE)) - goto out_dput; - - take_dentry_name_snapshot(&name, dentry); - - fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &name.name, 0); - - release_dentry_name_snapshot(&name); - -out_dput: - dput(parent); -} -EXPORT_SYMBOL(fsnotify_nameremove); - -/* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 25eb247ea85a..99ddd126f6f0 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -482,10 +482,13 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->type = type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ - if (fsid) + if (fsid) { conn->fsid = *fsid; - else + conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID; + } else { conn->fsid.val[0] = conn->fsid.val[1] = 0; + conn->flags = 0; + } if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -560,7 +563,12 @@ restart: if (err) return err; goto restart; - } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) { + conn->fsid = *fsid; + /* Pairs with smp_rmb() in fanotify_get_fsid() */ + smp_wmb(); + conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID; + } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) && (fsid->val[0] != conn->fsid.val[0] || fsid->val[1] != conn->fsid.val[1])) { /* diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 2d016937fdda..42a61eecdacd 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -296,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, out_attach: spin_lock(&dentry_attach_lock); + if (unlikely(dentry->d_fsdata && !alias)) { + /* d_fsdata is set by a racing thread which is doing + * the same thing as this thread is doing. Leave the racing + * thread going ahead and we return here. + */ + spin_unlock(&dentry_attach_lock); + iput(dl->dl_inode); + ocfs2_lock_res_free(&dl->dl_lockres); + kfree(dl); + return 0; + } + dentry->d_fsdata = dl; dl->dl_count++; spin_unlock(&dentry_attach_lock); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 56feaa739979..b801c6353100 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/module.h> @@ -37,7 +34,7 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param) } module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); -MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing"); +MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing"); int ovl_copy_xattr(struct dentry *old, struct dentry *new) { diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93872bb50230..702aa63f6774 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -21,7 +18,7 @@ static unsigned short ovl_redirect_max = 256; module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); -MODULE_PARM_DESC(ovl_redirect_max, +MODULE_PARM_DESC(redirect_max, "Maximum length of absolute redirect xattr value"); static int ovl_set_redirect(struct dentry *dentry, bool samedir); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index cc1c9e5606ba..cb8ec1f65c03 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Overlayfs NFS export support. * * Amir Goldstein <amir73il@gmail.com> * * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 340a6ad45914..e235a635d9ec 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/cred.h> @@ -409,37 +406,16 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd, return ret; } -static unsigned int ovl_get_inode_flags(struct inode *inode) -{ - unsigned int flags = READ_ONCE(inode->i_flags); - unsigned int ovl_iflags = 0; - - if (flags & S_SYNC) - ovl_iflags |= FS_SYNC_FL; - if (flags & S_APPEND) - ovl_iflags |= FS_APPEND_FL; - if (flags & S_IMMUTABLE) - ovl_iflags |= FS_IMMUTABLE_FL; - if (flags & S_NOATIME) - ovl_iflags |= FS_NOATIME_FL; - - return ovl_iflags; -} - static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, - unsigned long arg) + unsigned long arg, unsigned int iflags) { long ret; struct inode *inode = file_inode(file); - unsigned int flags; - unsigned int old_flags; + unsigned int old_iflags; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - ret = mnt_want_write_file(file); if (ret) return ret; @@ -448,8 +424,8 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, /* Check the capability before cred override */ ret = -EPERM; - old_flags = ovl_get_inode_flags(inode); - if (((flags ^ old_flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && + old_iflags = READ_ONCE(inode->i_flags); + if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) && !capable(CAP_LINUX_IMMUTABLE)) goto unlock; @@ -469,6 +445,63 @@ unlock: } +static unsigned int ovl_fsflags_to_iflags(unsigned int flags) +{ + unsigned int iflags = 0; + + if (flags & FS_SYNC_FL) + iflags |= S_SYNC; + if (flags & FS_APPEND_FL) + iflags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + iflags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + iflags |= S_NOATIME; + + return iflags; +} + +static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned int flags; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + return ovl_ioctl_set_flags(file, cmd, arg, + ovl_fsflags_to_iflags(flags)); +} + +static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags) +{ + unsigned int iflags = 0; + + if (xflags & FS_XFLAG_SYNC) + iflags |= S_SYNC; + if (xflags & FS_XFLAG_APPEND) + iflags |= S_APPEND; + if (xflags & FS_XFLAG_IMMUTABLE) + iflags |= S_IMMUTABLE; + if (xflags & FS_XFLAG_NOATIME) + iflags |= S_NOATIME; + + return iflags; +} + +static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fsxattr fa; + + memset(&fa, 0, sizeof(fa)); + if (copy_from_user(&fa, (void __user *) arg, sizeof(fa))) + return -EFAULT; + + return ovl_ioctl_set_flags(file, cmd, arg, + ovl_fsxflags_to_iflags(fa.fsx_xflags)); +} + static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret; @@ -480,8 +513,11 @@ static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case FS_IOC_SETFLAGS: + ret = ovl_ioctl_set_fsflags(file, cmd, arg); + break; + case FS_IOC_FSSETXATTR: - ret = ovl_ioctl_set_flags(file, cmd, arg); + ret = ovl_ioctl_set_fsxflags(file, cmd, arg); break; default: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f7eba21effa5..7663aeb85fa3 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -553,15 +550,15 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, int xinobits = ovl_xino_bits(inode->i_sb); /* - * When NFS export is enabled and d_ino is consistent with st_ino - * (samefs or i_ino has enough bits to encode layer), set the same - * value used for d_ino to i_ino, because nfsd readdirplus compares - * d_ino values to i_ino values of child entries. When called from + * When d_ino is consistent with st_ino (samefs or i_ino has enough + * bits to encode layer), set the same value used for st_ino to i_ino, + * so inode number exposed via /proc/locks and a like will be + * consistent with d_ino and st_ino values. An i_ino value inconsistent + * with d_ino also causes nfsd readdirplus to fail. When called from * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). */ - if (inode->i_sb->s_export_op && - (ovl_same_sb(inode->i_sb) || xinobits)) { + if (ovl_same_sb(inode->i_sb) || xinobits) { inode->i_ino = ino; if (xinobits && fsid && !(ino >> (64 - xinobits))) inode->i_ino |= (unsigned long)fsid << (64 - xinobits); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index badf039267a2..e9717c2f7d45 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cec40077b522..6934bcf030f0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 6ed1ace8f8b3..28a2d12a1029 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ struct ovl_config { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index cc8303a806b4..47a91c9733a5 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 746ea36f3171..b368e2e102fa 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <uapi/linux/magic.h> @@ -31,29 +28,29 @@ struct ovl_dir_cache; static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); -MODULE_PARM_DESC(ovl_redirect_dir_def, +MODULE_PARM_DESC(redirect_dir, "Default to on or off for the redirect_dir feature"); static bool ovl_redirect_always_follow = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); module_param_named(redirect_always_follow, ovl_redirect_always_follow, bool, 0644); -MODULE_PARM_DESC(ovl_redirect_always_follow, +MODULE_PARM_DESC(redirect_always_follow, "Follow redirects even if redirect_dir feature is turned off"); static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); module_param_named(index, ovl_index_def, bool, 0644); -MODULE_PARM_DESC(ovl_index_def, +MODULE_PARM_DESC(index, "Default to on or off for the inodes index feature"); static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); -MODULE_PARM_DESC(ovl_nfs_export_def, +MODULE_PARM_DESC(nfs_export, "Default to on or off for the NFS export feature"); static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); -MODULE_PARM_DESC(ovl_xino_auto_def, +MODULE_PARM_DESC(xino_auto, "Auto enable xino feature"); static void ovl_entry_stack_free(struct ovl_entry *oe) @@ -66,7 +63,7 @@ static void ovl_entry_stack_free(struct ovl_entry *oe) static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); module_param_named(metacopy, ovl_metacopy_def, bool, 0644); -MODULE_PARM_DESC(ovl_metacopy_def, +MODULE_PARM_DESC(metacopy, "Default to on or off for the metadata only copy up feature"); static void ovl_dentry_release(struct dentry *dentry) @@ -995,8 +992,8 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, int err; trap = ovl_get_trap_inode(sb, dir); - err = PTR_ERR(trap); - if (IS_ERR(trap)) { + err = PTR_ERR_OR_ZERO(trap); + if (err) { if (err == -ELOOP) pr_err("overlayfs: conflicting %s path\n", name); return err; @@ -1471,23 +1468,20 @@ out_err: * Check if this layer root is a descendant of: * - another layer of this overlayfs instance * - upper/work dir of any overlayfs instance - * - a disconnected dentry (detached root) */ static int ovl_check_layer(struct super_block *sb, struct dentry *dentry, const char *name) { - struct dentry *next, *parent; - bool is_root = false; + struct dentry *next = dentry, *parent; int err = 0; - if (!dentry || dentry == dentry->d_sb->s_root) + if (!dentry) return 0; - next = dget(dentry); - /* Walk back ancestors to fs root (inclusive) looking for traps */ - do { - parent = dget_parent(next); - is_root = (parent == next); + parent = dget_parent(next); + + /* Walk back ancestors to root (inclusive) looking for traps */ + while (!err && parent != next) { if (ovl_is_inuse(parent)) { err = -EBUSY; pr_err("overlayfs: %s path overlapping in-use upperdir/workdir\n", @@ -1496,17 +1490,12 @@ static int ovl_check_layer(struct super_block *sb, struct dentry *dentry, err = -ELOOP; pr_err("overlayfs: overlapping %s path\n", name); } - dput(next); next = parent; - } while (!err && !is_root); - - /* Did we really walk to fs root or found a detached root? */ - if (!err && next != dentry->d_sb->s_root) { - err = -ESTALE; - pr_err("overlayfs: disconnected %s path\n", name); + parent = dget_parent(next); + dput(next); } - dput(next); + dput(parent); return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index e135064e87ad..f5678a3f8350 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/pnode.c b/fs/pnode.c index 595857a1883e..49f6d7ff2139 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -261,7 +261,6 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); - child->mnt.mnt_flags &= ~MNT_LOCKED; mnt_set_mountpoint(m, mp, child); last_dest = m; last_source = child; diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 62ee41b4bbd0..4c3dcb718961 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -98,3 +98,7 @@ config PROC_CHILDREN Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_PID_ARCH_STATUS + def_bool n + depends on PROC_FS diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..46dcb6f0eccf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); } static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..c40fca98f2b7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3077,8 +3080,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); @@ -3449,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/root.c b/fs/proc/root.c index 8b145e7b9661..522199e9525e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -211,7 +211,7 @@ static struct file_system_type proc_fs_type = { .init_fs_context = proc_init_fs_context, .parameters = &proc_fs_parameters, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; void __init proc_root_init(void) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7bb96fdd38ad..57957c91c6df 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -166,7 +166,7 @@ void __weak elfcorehdr_free(unsigned long long addr) */ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, false); + return read_from_oldmem(buf, count, ppos, 0, sev_active()); } /* @@ -174,7 +174,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) */ ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, sme_active()); + return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active()); } /* @@ -374,7 +374,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, buflen); start = m->paddr + *fpos - m->offset; tmp = read_from_oldmem(buffer, tsz, &start, - userbuf, sme_active()); + userbuf, mem_encrypt_active()); if (tmp < 0) return tmp; buflen -= tsz; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9ad72ea7f71f..be9c471cdbc8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -223,9 +223,9 @@ static void put_quota_format(struct quota_format_type *fmt) /* * Dquot List Management: - * The quota code uses three lists for dquot management: the inuse_list, - * free_dquots, and dquot_hash[] array. A single dquot structure may be - * on all three lists, depending on its current state. + * The quota code uses four lists for dquot management: the inuse_list, + * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot + * structure may be on some of those lists, depending on its current state. * * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. @@ -236,6 +236,11 @@ static void put_quota_format(struct quota_format_type *fmt) * dqstats.free_dquots gives the number of dquots on the list. When * dquot is invalidated it's completely released from memory. * + * Dirty dquots are added to the dqi_dirty_list of quota_info when mark + * dirtied, and this list is searched when writing dirty dquots back to + * quota file. Note that some filesystems do dirty dquot tracking on their + * own (e.g. in a journal) and thus don't use dqi_dirty_list. + * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. @@ -1996,8 +2001,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) &warn_to[cnt]); if (ret) goto over_quota; - ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, - &warn_to[cnt]); + ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, + DQUOT_SPACE_WARN, &warn_to[cnt]); if (ret) { spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index fd5dd806f1b9..cb13fb76dbee 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -331,9 +331,9 @@ static int quota_state_to_flags(struct qc_state *state) return flags; } -static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) +static int quota_getstate(struct super_block *sb, int type, + struct fs_quota_stat *fqs) { - int type; struct qc_state state; int ret; @@ -349,14 +349,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; - /* - * GETXSTATE quotactl has space for just one set of time limits so - * report them for the first enabled quota type - */ - for (type = 0; type < MAXQUOTAS; type++) - if (state.s_state[type].flags & QCI_ACCT_ENABLED) - break; - BUG_ON(type == MAXQUOTAS); + fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -391,22 +384,22 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) return 0; } -static int quota_getxstate(struct super_block *sb, void __user *addr) +static int quota_getxstate(struct super_block *sb, int type, void __user *addr) { struct fs_quota_stat fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; - ret = quota_getstate(sb, &fqs); + ret = quota_getstate(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; } -static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) +static int quota_getstatev(struct super_block *sb, int type, + struct fs_quota_statv *fqs) { - int type; struct qc_state state; int ret; @@ -422,14 +415,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; - /* - * GETXSTATV quotactl has space for just one set of time limits so - * report them for the first enabled quota type - */ - for (type = 0; type < MAXQUOTAS; type++) - if (state.s_state[type].flags & QCI_ACCT_ENABLED) - break; - BUG_ON(type == MAXQUOTAS); + fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -455,7 +441,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) return 0; } -static int quota_getxstatev(struct super_block *sb, void __user *addr) +static int quota_getxstatev(struct super_block *sb, int type, void __user *addr) { struct fs_quota_statv fqs; int ret; @@ -474,7 +460,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr) default: return -EINVAL; } - ret = quota_getstatev(sb, &fqs); + ret = quota_getstatev(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; @@ -744,9 +730,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: - return quota_getxstate(sb, addr); + return quota_getxstate(sb, type, addr); case Q_XGETQSTATV: - return quota_getxstatev(sb, addr); + return quota_getxstatev(sb, type, addr); case Q_XSETQLIM: return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: diff --git a/fs/select.c b/fs/select.c index 6cbc9ff56ba0..a4d8f6e8b63c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index d6008a636479..c181dee235bb 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Minchan Kim <minchan@kernel.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> #include <linux/mutex.h> diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 23a9c28ad8ea..2a2a2d106440 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index a6c75929a00e..550c3e592032 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index a9ba8d96776a..54c17b7c85fd 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/fs.h> diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 80db1b86a27c..a4894cc59447 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/fs.h> diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index 95da65366548..c4e47e0588c7 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013, 2014 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/buffer_head.h> diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 9b7b1b6a7892..520d323a99ce 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/kernel.h> diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 98537eab27e2..2e3073ace009 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef PAGE_ACTOR_H #define PAGE_ACTOR_H /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #ifndef CONFIG_SQUASHFS_FILE_DIRECT diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 57038604d4a8..d41c21fef138 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -175,6 +175,26 @@ int sysfs_create_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_create_group); +static int internal_create_groups(struct kobject *kobj, int update, + const struct attribute_group **groups) +{ + int error = 0; + int i; + + if (!groups) + return 0; + + for (i = 0; groups[i]; i++) { + error = internal_create_group(kobj, update, groups[i]); + if (error) { + while (--i >= 0) + sysfs_remove_group(kobj, groups[i]); + break; + } + } + return error; +} + /** * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups * @kobj: The kobject to create the group on @@ -191,25 +211,29 @@ EXPORT_SYMBOL_GPL(sysfs_create_group); int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) { - int error = 0; - int i; - - if (!groups) - return 0; - - for (i = 0; groups[i]; i++) { - error = sysfs_create_group(kobj, groups[i]); - if (error) { - while (--i >= 0) - sysfs_remove_group(kobj, groups[i]); - break; - } - } - return error; + return internal_create_groups(kobj, 0, groups); } EXPORT_SYMBOL_GPL(sysfs_create_groups); /** + * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups + * @kobj: The kobject to update the group on + * @groups: The attribute groups to update, NULL terminated + * + * This function update a bunch of attribute groups. If an error occurs when + * updating a group, all previously updated groups will be removed together + * with already existing (not updated) attributes. + * + * Returns 0 on success or error code from sysfs_update_group on failure. + */ +int sysfs_update_groups(struct kobject *kobj, + const struct attribute_group **groups) +{ + return internal_create_groups(kobj, 1, groups); +} +EXPORT_SYMBOL_GPL(sysfs_update_groups); + +/** * sysfs_update_group - given a directory kobject, update an attribute group * @kobj: The kobject to update the group on * @grp: The attribute group to update diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 7098c49f3693..eeeae0475da9 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * inode.c - part of tracefs, a pseudo file system for activating tracing * @@ -5,12 +6,7 @@ * * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * * tracefs is the file system that is used by the tracing infrastructure. - * */ #include <linux/module.h> @@ -509,9 +505,12 @@ static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) switch (dentry->d_inode->i_mode & S_IFMT) { case S_IFDIR: ret = simple_rmdir(parent->d_inode, dentry); + if (!ret) + fsnotify_rmdir(parent->d_inode, dentry); break; default: simple_unlink(parent->d_inode, dentry); + fsnotify_unlink(parent->d_inode, dentry); break; } if (!ret) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index e7276932e433..9bb18311a22f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block, return NULL; } -/* Extend the file by 'blocks' blocks, return the number of extents added */ +/* Extend the file with new blocks totaling 'new_block_bytes', + * return the number of extents added + */ static int udf_do_extend_file(struct inode *inode, struct extent_position *last_pos, struct kernel_long_ad *last_ext, - sector_t blocks) + loff_t new_block_bytes) { - sector_t add; + uint32_t add; int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); struct super_block *sb = inode->i_sb; struct kernel_lb_addr prealloc_loc = {}; @@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode, /* The previous extent is fake and we should not extend by anything * - there's nothing to do... */ - if (!blocks && fake) + if (!new_block_bytes && fake) return 0; iinfo = UDF_I(inode); @@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode, /* Can we merge with the previous extent? */ if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { - add = ((1 << 30) - sb->s_blocksize - - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >> - sb->s_blocksize_bits; - if (add > blocks) - add = blocks; - blocks -= add; - last_ext->extLength += add << sb->s_blocksize_bits; + add = (1 << 30) - sb->s_blocksize - + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + if (add > new_block_bytes) + add = new_block_bytes; + new_block_bytes -= add; + last_ext->extLength += add; } if (fake) { @@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode, } /* Managed to do everything necessary? */ - if (!blocks) + if (!new_block_bytes) goto out; /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */ last_ext->extLocation.logicalBlockNum = 0; last_ext->extLocation.partitionReferenceNum = 0; - add = (1 << (30-sb->s_blocksize_bits)) - 1; - last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (add << sb->s_blocksize_bits); + add = (1 << 30) - sb->s_blocksize; + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add; /* Create enough extents to cover the whole hole */ - while (blocks > add) { - blocks -= add; + while (new_block_bytes > add) { + new_block_bytes -= add; err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) return err; count++; } - if (blocks) { + if (new_block_bytes) { last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (blocks << sb->s_blocksize_bits); + new_block_bytes; err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) @@ -596,6 +596,24 @@ out: return count; } +/* Extend the final block of the file to final_block_len bytes */ +static void udf_do_extend_final_block(struct inode *inode, + struct extent_position *last_pos, + struct kernel_long_ad *last_ext, + uint32_t final_block_len) +{ + struct super_block *sb = inode->i_sb; + uint32_t added_bytes; + + added_bytes = final_block_len - + (last_ext->extLength & (sb->s_blocksize - 1)); + last_ext->extLength += added_bytes; + UDF_I(inode)->i_lenExtents += added_bytes; + + udf_write_aext(inode, last_pos, &last_ext->extLocation, + last_ext->extLength, 1); +} + static int udf_extend_file(struct inode *inode, loff_t newsize) { @@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) int8_t etype; struct super_block *sb = inode->i_sb; sector_t first_block = newsize >> sb->s_blocksize_bits, offset; + unsigned long partial_final_block; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); struct kernel_long_ad extent; - int err; + int err = 0; + int within_final_block; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) BUG(); etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); + within_final_block = (etype != -1); - /* File has extent covering the new size (could happen when extending - * inside a block)? */ - if (etype != -1) - return 0; - if (newsize & (sb->s_blocksize - 1)) - offset++; - /* Extended file just to the boundary of the last file block? */ - if (offset == 0) - return 0; - - /* Truncate is extending the file by 'offset' blocks */ if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { /* File has no extents at all or has empty last @@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) &extent.extLength, 0); extent.extLength |= etype << 30; } - err = udf_do_extend_file(inode, &epos, &extent, offset); + + partial_final_block = newsize & (sb->s_blocksize - 1); + + /* File has extent covering the new size (could happen when extending + * inside a block)? + */ + if (within_final_block) { + /* Extending file within the last file block */ + udf_do_extend_final_block(inode, &epos, &extent, + partial_final_block); + } else { + loff_t add = ((loff_t)offset << sb->s_blocksize_bits) | + partial_final_block; + err = udf_do_extend_file(inode, &epos, &extent, add); + } + if (err < 0) goto out; err = 0; @@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, /* Are we beyond EOF? */ if (etype == -1) { int ret; + loff_t hole_len; isBeyondEOF = true; if (count) { if (c) @@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, startnum = (offset > 0); } /* Create extents for the hole between EOF and offset */ - ret = udf_do_extend_file(inode, &prev_epos, laarr, offset); + hole_len = (loff_t)offset << inode->i_blkbits; + ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len); if (ret < 0) { *err = ret; newblock = 0; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3b30301c90ec..ccbdbd62f0d8 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/userfaultfd.c * @@ -5,9 +6,6 @@ * Copyright (C) 2008-2009 Red Hat, Inc. * Copyright (C) 2015 Red Hat, Inc. * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * * Some part derived from fs/eventfd.c (anon inode setup) and * mm/ksm.c (mm hashing). */ @@ -42,6 +40,16 @@ enum userfaultfd_state { /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ @@ -460,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : TASK_KILLABLE; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -472,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * __add_wait_queue. */ set_current_state(blocking_state); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -554,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* @@ -585,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -615,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { struct vm_area_struct *vma; @@ -920,10 +928,10 @@ wakeup: * the last page faults that may have been already waiting on * the fault_*wqh. */ - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); @@ -1136,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can @@ -1182,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (ret) userfaultfd_ctx_put(fork_nctx); } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); } return ret; @@ -1221,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, @@ -1883,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; @@ -1891,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a6f0f4761a37..11f703d4a605 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -758,6 +758,7 @@ xfs_add_to_ioend( struct block_device *bdev = xfs_find_bdev_for_inode(inode); unsigned len = i_blocksize(inode); unsigned poff = offset & (PAGE_SIZE - 1); + bool merged, same_page = false; sector_t sector; sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) + @@ -774,10 +775,14 @@ xfs_add_to_ioend( wpc->imap.br_state, offset, bdev, sector); } - if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) { - if (iop) - atomic_inc(&iop->write_count); - if (bio_full(wpc->ioend->io_bio)) + merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, + &same_page); + + if (iop && !same_page) + atomic_inc(&iop->write_count); + + if (!merged) { + if (bio_full(wpc->ioend->io_bio, len)) xfs_chain_bio(wpc->ioend, wbc, bdev, sector); bio_add_page(wpc->ioend->io_bio, page, len, poff); } |