diff options
Diffstat (limited to 'fs')
243 files changed, 3631 insertions, 2644 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index bc57ae9e2963..cce9ace651a2 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -35,8 +35,9 @@ * @page: structure to page * */ -static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) +static int v9fs_fid_readpage(void *data, struct page *page) { + struct p9_fid *fid = data; struct inode *inode = page->mapping->host; struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE}; struct iov_iter to; @@ -107,7 +108,8 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) return ret; - ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); + ret = read_cache_pages(mapping, pages, v9fs_fid_readpage, + filp->private_data); p9_debug(P9_DEBUG_VFS, " = %d\n", ret); return ret; } diff --git a/fs/Kconfig b/fs/Kconfig index f1046cf6ad85..bfb1c6095c7a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS config VALIDATE_FS_PARSER bool "Validate filesystem parameter description" - default y help Enable this to perform validation of the parameter description for a filesystem when it is registered. diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index f87ddd1b6d72..62dc4f577ba1 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -91,12 +91,28 @@ config BINFMT_SCRIPT Most systems will not boot if you say M or N here. If unsure, say Y. +config ARCH_HAS_BINFMT_FLAT + bool + config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU || ARM || M68K + depends on ARCH_HAS_BINFMT_FLAT help Support uClinux FLAT format binaries. +config BINFMT_FLAT_ARGVP_ENVP_ON_STACK + bool + +config BINFMT_FLAT_OLD_ALWAYS_RAM + bool + +config BINFMT_FLAT_OLD + bool "Enable support for very old legacy flat binaries" + depends on BINFMT_FLAT + help + Support decade old uClinux FLAT format binaries. Unless you know + you have some of those say N here. + config BINFMT_ZFLAT bool "Enable ZFLAT support" depends on BINFMT_FLAT diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fe39310c1a0a..35a4d9f4c3ae 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir.c * * Copyright (C) 1999-2000 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Common directory handling for ADFS */ #include "adfs.h" diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index 693f69ed3de3..7557378e58b3 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir_f.c * * Copyright (C) 1997-1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * E and F format directory handling */ #include <linux/buffer_head.h> diff --git a/fs/adfs/dir_f.h b/fs/adfs/dir_f.h index e4713404096c..5aec332b90f5 100644 --- a/fs/adfs/dir_f.h +++ b/fs/adfs/dir_f.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/adfs/dir_f.h * * Copyright (C) 1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Structures of directories on the F format disk */ #ifndef ADFS_DIR_F_H diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 97b9f28f459b..6c5fbb0259c9 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir_fplus.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <linux/slab.h> diff --git a/fs/adfs/dir_fplus.h b/fs/adfs/dir_fplus.h index b55aa41a68fe..4ec0931e36ad 100644 --- a/fs/adfs/dir_fplus.h +++ b/fs/adfs/dir_fplus.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/adfs/dir_fplus.h * * Copyright (C) 1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Structures of directories on the F+ format disk */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 66621e96f9af..904d624541ad 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/inode.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <linux/writeback.h> diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 6935f05202ac..4d34338c6176 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/map.c * * Copyright (C) 1997-2002 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <asm/unaligned.h> diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 2a83655c408f..ffb669f9bba7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/super.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> diff --git a/fs/afs/Makefile b/fs/afs/Makefile index cbf31f6cd177..10359bea7070 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -29,7 +29,6 @@ kafs-y := \ server.o \ server_list.o \ super.o \ - netdevices.o \ vlclient.o \ vl_list.o \ vl_probe.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 86da532c192f..df415c05939e 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -246,8 +246,8 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", - &result, _expiry, true); + ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len, + "srv=1", &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d441bef72163..6cdd7047c809 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -48,7 +48,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server, refcount_set(&new->usage, 1); new->sb = vnode->vfs_inode.i_sb; new->vid = vnode->volume->vid; - new->server = afs_get_server(server); + new->server = afs_get_server(server, afs_server_trace_get_new_cbi); INIT_HLIST_NODE(&new->cb_vlink); write_lock(&server->cb_break_lock); @@ -195,7 +195,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) write_unlock(&cbi->server->cb_break_lock); if (vi) kfree_rcu(vi, rcu); - afs_put_server(net, cbi->server); + afs_put_server(net, cbi->server, afs_server_trace_put_cbi); } kfree_rcu(cbi, rcu); } @@ -212,7 +212,7 @@ void afs_init_callback_state(struct afs_server *server) /* * actually break a callback */ -void __afs_break_callback(struct afs_vnode *vnode) +void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason) { _enter(""); @@ -223,13 +223,17 @@ void __afs_break_callback(struct afs_vnode *vnode) if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_lock_may_be_available(vnode); + + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true); + } else { + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false); } } -void afs_break_callback(struct afs_vnode *vnode) +void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason) { write_seqlock(&vnode->cb_lock); - __afs_break_callback(vnode); + __afs_break_callback(vnode, reason); write_sequnlock(&vnode->cb_lock); } @@ -275,9 +279,11 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_super_info *as = AFS_FS_S(cbi->sb); struct afs_volume *volume = as->volume; - write_lock(&volume->cb_break_lock); + write_lock(&volume->cb_v_break_lock); volume->cb_v_break++; - write_unlock(&volume->cb_break_lock); + trace_afs_cb_break(fid, volume->cb_v_break, + afs_cb_break_for_volume_callback, false); + write_unlock(&volume->cb_v_break_lock); } else { data.volume = NULL; data.fid = *fid; @@ -285,8 +291,10 @@ static void afs_break_one_callback(struct afs_server *server, afs_iget5_test, &data); if (inode) { vnode = AFS_FS_I(inode); - afs_break_callback(vnode); + afs_break_callback(vnode, afs_cb_break_for_callback); iput(inode); + } else { + trace_afs_cb_miss(fid, afs_cb_break_for_callback); } } } diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 3451be03667f..4f1b6f466ff5 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -256,8 +256,11 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * server holds up change visibility till it receives our reply so as * to maintain cache coherency. */ - if (call->server) + if (call->server) { + trace_afs_server(call->server, atomic_read(&call->server->usage), + afs_server_trace_callback); afs_break_callbacks(call->server, call->count, call->request); + } afs_send_empty_reply(call); afs_put_call(call); @@ -580,9 +583,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) */ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) { - struct afs_interface *ifs; struct afs_call *call = container_of(work, struct afs_call, work); - int loop, nifs; + int loop; struct { struct /* InterfaceAddr */ { @@ -600,19 +602,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) _enter(""); - nifs = 0; - ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL); - if (ifs) { - nifs = afs_get_ipv4_interfaces(call->net, ifs, 32, false); - if (nifs < 0) { - kfree(ifs); - ifs = NULL; - nifs = 0; - } - } - memset(&reply, 0, sizeof(reply)); - reply.ia.nifs = htonl(nifs); reply.ia.uuid[0] = call->net->uuid.time_low; reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid)); @@ -622,15 +612,6 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) for (loop = 0; loop < 6; loop++) reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]); - if (ifs) { - for (loop = 0; loop < nifs; loop++) { - reply.ia.ifaddr[loop] = ifs[loop].address.s_addr; - reply.ia.netmask[loop] = ifs[loop].netmask.s_addr; - reply.ia.mtu[loop] = htonl(ifs[loop].mtu); - } - kfree(ifs); - } - reply.cap.capcount = htonl(1); reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION); afs_send_simple_reply(call, &reply, sizeof(reply)); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index da9563d62b32..e640d67274be 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -238,8 +238,7 @@ retry: if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) nr_inline = 0; - req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline, - GFP_KERNEL); + req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL); if (!req) return ERR_PTR(-ENOMEM); @@ -1363,12 +1362,12 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry, drop_nlink(&vnode->vfs_inode); if (vnode->vfs_inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); - __afs_break_callback(vnode); + __afs_break_callback(vnode, afs_cb_break_for_unlink); } write_sequnlock(&vnode->cb_lock); ret = 0; } else { - afs_break_callback(vnode); + afs_break_callback(vnode, afs_cb_break_for_unlink); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) kdebug("AFS_VNODE_DELETED"); @@ -1390,7 +1389,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; struct afs_status_cb *scb; - struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; bool need_rehash = false; int ret; @@ -1413,15 +1413,12 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) } /* Try to make sure we have a callback promise on the victim. */ - if (d_really_is_positive(dentry)) { - vnode = AFS_FS_I(d_inode(dentry)); - ret = afs_validate(vnode, key); - if (ret < 0) - goto error_key; - } + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_key; spin_lock(&dentry->d_lock); - if (vnode && d_count(dentry) > 1) { + if (d_count(dentry) > 1) { spin_unlock(&dentry->d_lock); /* Start asynchronous writeout of the inode */ write_inode_now(d_inode(dentry), 0); diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 057b8d322422..361088a5edb9 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -60,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); - - /* vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(old, 0); } kfree(scb); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 9b3b2f1f1fc0..bcd1bafb0278 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -24,6 +24,7 @@ const struct file_operations afs_dynroot_file_operations = { static int afs_probe_cell_name(struct dentry *dentry) { struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; int ret; @@ -36,13 +37,14 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + cell = afs_lookup_cell_rcu(net, name, len); if (!IS_ERR(cell)) { - afs_put_cell(afs_d2net(dentry), cell); + afs_put_cell(net, cell); return 0; } - ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false); + ret = dns_query(net->net, "afsdb", name, len, "srv=1", + NULL, NULL, false); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/afs/file.c b/fs/afs/file.c index 8fd7d3b9a1b1..56b69576274d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -310,8 +310,7 @@ int afs_page_filler(void *data, struct page *page) /* fall through */ default: go_on: - req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), - GFP_KERNEL); + req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); if (!req) goto enomem; @@ -461,8 +460,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, n++; } - req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n, - GFP_NOFS); + req = kzalloc(struct_size(req, array, n), GFP_NOFS); if (!req) return -ENOMEM; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index a1ef0266422a..1ce73e014139 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1911,7 +1911,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, return ERR_PTR(-ENOMEM); call->key = key; - call->server = afs_get_server(server); + call->server = afs_get_server(server, afs_server_trace_get_caps); call->server_index = server_index; call->upgrade = true; call->async = true; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b42d9d09669c..7b1c18c32f48 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren } /* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, @@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } - /* - * Estimate 512 bytes blocks used, rounded up to nearest 1K - * for consistency with other AFS clients. - */ - inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); vnode->invalid_before = status->data_version; inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); @@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (expected_version && *expected_version != status->data_version) { - kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version, - fc->type ? fc->type->name : "???"); + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)*expected_version, + (unsigned long long)status->data_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); } } @@ -276,7 +283,7 @@ void afs_vnode_commit_status(struct afs_fs_cursor *fc, if (scb->status.abort_code == VNOVNODE) { set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_nlink(&vnode->vfs_inode); - __afs_break_callback(vnode); + __afs_break_callback(vnode, afs_cb_break_for_deleted); } } else { if (scb->have_status) @@ -587,8 +594,9 @@ bool afs_check_validity(struct afs_vnode *vnode) struct afs_cb_interest *cbi; struct afs_server *server; struct afs_volume *volume = vnode->volume; + enum afs_cb_break_reason need_clear = afs_cb_break_no_break; time64_t now = ktime_get_real_seconds(); - bool valid, need_clear = false; + bool valid; unsigned int cb_break, cb_s_break, cb_v_break; int seq = 0; @@ -606,13 +614,13 @@ bool afs_check_validity(struct afs_vnode *vnode) vnode->cb_v_break != cb_v_break) { vnode->cb_s_break = cb_s_break; vnode->cb_v_break = cb_v_break; - need_clear = true; + need_clear = afs_cb_break_for_vsbreak; valid = false; } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - need_clear = true; + need_clear = afs_cb_break_for_zap; valid = false; } else if (vnode->cb_expires_at - 10 <= now) { - need_clear = true; + need_clear = afs_cb_break_for_lapsed; valid = false; } else { valid = true; @@ -628,10 +636,12 @@ bool afs_check_validity(struct afs_vnode *vnode) done_seqretry(&vnode->cb_lock, seq); - if (need_clear) { + if (need_clear != afs_cb_break_no_break) { write_seqlock(&vnode->cb_lock); if (cb_break == vnode->cb_break) - __afs_break_callback(vnode); + __afs_break_callback(vnode, need_clear); + else + trace_afs_cb_miss(&vnode->fid, need_clear); write_sequnlock(&vnode->cb_lock); valid = false; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8a67bf741880..f66a3be12fd6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -109,10 +109,8 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - union { - struct afs_server *server; - struct afs_vlserver *vlserver; - }; + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ struct afs_cb_interest *cbi; /* Callback interest for server used */ struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ @@ -516,6 +514,7 @@ struct afs_server { atomic_t usage; u32 addr_version; /* Address list version */ u32 cm_epoch; /* Server RxRPC epoch */ + unsigned int debug_id; /* Debugging ID for traces */ /* file service access */ rwlock_t fs_lock; /* access lock */ @@ -616,7 +615,7 @@ struct afs_volume { unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned cb_v_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; + rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ short error; @@ -721,15 +720,6 @@ struct afs_permits { }; /* - * record of one of a system's set of network interfaces - */ -struct afs_interface { - struct in_addr address; /* IPv4 address bound to interface */ - struct in_addr netmask; /* netmask applied to address */ - unsigned mtu; /* MTU of interface */ -}; - -/* * Error prioritisation and accumulation. */ struct afs_error { @@ -846,9 +836,9 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; * callback.c */ extern void afs_init_callback_state(struct afs_server *); -extern void __afs_break_callback(struct afs_vnode *); -extern void afs_break_callback(struct afs_vnode *); -extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); +extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); +extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); +extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *); extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_list *, unsigned int); @@ -1092,12 +1082,6 @@ extern struct vfsmount *afs_d_automount(struct path *); extern void afs_mntpt_kill_timer(void); /* - * netdevices.c - */ -extern int afs_get_ipv4_interfaces(struct afs_net *, struct afs_interface *, - size_t, bool); - -/* * proc.c */ #ifdef CONFIG_PROC_FS @@ -1242,17 +1226,12 @@ extern void __exit afs_clean_up_permit_cache(void); */ extern spinlock_t afs_server_peer_lock; -static inline struct afs_server *afs_get_server(struct afs_server *server) -{ - atomic_inc(&server->usage); - return server; -} - extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); -extern void afs_put_server(struct afs_net *, struct afs_server *); +extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); +extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace); extern void afs_manage_servers(struct work_struct *); extern void afs_servers_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); @@ -1436,7 +1415,7 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, { if (fc->ac.error == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); - afs_break_callback(vnode); + afs_break_callback(vnode, afs_cb_break_for_deleted); } } diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 5497ab38f585..52b19e9c1535 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include "internal.h" #include "afs_fs.h" +#include "protocol_uae.h" /* * convert an AFS abort code to a Linux error number @@ -65,34 +66,25 @@ int afs_abort_to_error(u32 abort_code) case AFSVL_PERM: return -EACCES; case AFSVL_NOMEM: return -EREMOTEIO; - /* Unified AFS error table; ET "uae" == 0x2f6df00 */ - case 0x2f6df00: return -EPERM; - case 0x2f6df01: return -ENOENT; - case 0x2f6df04: return -EIO; - case 0x2f6df0a: return -EAGAIN; - case 0x2f6df0b: return -ENOMEM; - case 0x2f6df0c: return -EACCES; - case 0x2f6df0f: return -EBUSY; - case 0x2f6df10: return -EEXIST; - case 0x2f6df11: return -EXDEV; - case 0x2f6df12: return -ENODEV; - case 0x2f6df13: return -ENOTDIR; - case 0x2f6df14: return -EISDIR; - case 0x2f6df15: return -EINVAL; - case 0x2f6df1a: return -EFBIG; - case 0x2f6df1b: return -ENOSPC; - case 0x2f6df1d: return -EROFS; - case 0x2f6df1e: return -EMLINK; - case 0x2f6df20: return -EDOM; - case 0x2f6df21: return -ERANGE; - case 0x2f6df22: return -EDEADLK; - case 0x2f6df23: return -ENAMETOOLONG; - case 0x2f6df24: return -ENOLCK; - case 0x2f6df26: return -ENOTEMPTY; - case 0x2f6df28: return -EWOULDBLOCK; - case 0x2f6df69: return -ENOTCONN; - case 0x2f6df6c: return -ETIMEDOUT; - case 0x2f6df78: return -EDQUOT; + /* Unified AFS error table */ + case UAEPERM: return -EPERM; + case UAENOENT: return -ENOENT; + case UAEACCES: return -EACCES; + case UAEBUSY: return -EBUSY; + case UAEEXIST: return -EEXIST; + case UAENOTDIR: return -ENOTDIR; + case UAEISDIR: return -EISDIR; + case UAEFBIG: return -EFBIG; + case UAENOSPC: return -ENOSPC; + case UAEROFS: return -EROFS; + case UAEMLINK: return -EMLINK; + case UAEDEADLK: return -EDEADLK; + case UAENAMETOOLONG: return -ENAMETOOLONG; + case UAENOLCK: return -ENOLCK; + case UAENOTEMPTY: return -ENOTEMPTY; + case UAELOOP: return -ELOOP; + case UAENOMEDIUM: return -ENOMEDIUM; + case UAEDQUOT: return -EDQUOT; /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ case RXKADINCONSISTENCY: return -EPROTO; diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c deleted file mode 100644 index 2a009d1939d7..000000000000 --- a/fs/afs/netdevices.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* AFS network device helpers - * - * Copyright (c) 2007 Patrick McHardy <kaber@trash.net> - */ - -#include <linux/string.h> -#include <linux/rtnetlink.h> -#include <linux/inetdevice.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <net/net_namespace.h> -#include "internal.h" - -/* - * get a list of this system's interface IPv4 addresses, netmasks and MTUs - * - maxbufs must be at least 1 - * - returns the number of interface records in the buffer - */ -int afs_get_ipv4_interfaces(struct afs_net *net, struct afs_interface *bufs, - size_t maxbufs, bool wantloopback) -{ - struct net_device *dev; - struct in_device *idev; - int n = 0; - - ASSERT(maxbufs > 0); - - rtnl_lock(); - for_each_netdev(net->net, dev) { - if (dev->type == ARPHRD_LOOPBACK && !wantloopback) - continue; - idev = __in_dev_get_rtnl(dev); - if (!idev) - continue; - for_primary_ifa(idev) { - bufs[n].address.s_addr = ifa->ifa_address; - bufs[n].netmask.s_addr = ifa->ifa_mask; - bufs[n].mtu = dev->mtu; - n++; - if (n >= maxbufs) - goto out; - } endfor_ifa(idev); - } -out: - rtnl_unlock(); - return n; -} diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h new file mode 100644 index 000000000000..1b3d1060bd34 --- /dev/null +++ b/fs/afs/protocol_uae.h @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Universal AFS Error codes (UAE). + * + * Copyright (C) 2003, Daria Phoebe Brashear + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + */ + +enum { + UAEPERM = 0x2f6df00, /* Operation not permitted */ + UAENOENT = 0x2f6df01, /* No such file or directory */ + UAESRCH = 0x2f6df02, /* No such process */ + UAEINTR = 0x2f6df03, /* Interrupted system call */ + UAEIO = 0x2f6df04, /* I/O error */ + UAENXIO = 0x2f6df05, /* No such device or address */ + UAE2BIG = 0x2f6df06, /* Arg list too long */ + UAENOEXEC = 0x2f6df07, /* Exec format error */ + UAEBADF = 0x2f6df08, /* Bad file number */ + UAECHILD = 0x2f6df09, /* No child processes */ + UAEAGAIN = 0x2f6df0a, /* Try again */ + UAENOMEM = 0x2f6df0b, /* Out of memory */ + UAEACCES = 0x2f6df0c, /* Permission denied */ + UAEFAULT = 0x2f6df0d, /* Bad address */ + UAENOTBLK = 0x2f6df0e, /* Block device required */ + UAEBUSY = 0x2f6df0f, /* Device or resource busy */ + UAEEXIST = 0x2f6df10, /* File exists */ + UAEXDEV = 0x2f6df11, /* Cross-device link */ + UAENODEV = 0x2f6df12, /* No such device */ + UAENOTDIR = 0x2f6df13, /* Not a directory */ + UAEISDIR = 0x2f6df14, /* Is a directory */ + UAEINVAL = 0x2f6df15, /* Invalid argument */ + UAENFILE = 0x2f6df16, /* File table overflow */ + UAEMFILE = 0x2f6df17, /* Too many open files */ + UAENOTTY = 0x2f6df18, /* Not a typewriter */ + UAETXTBSY = 0x2f6df19, /* Text file busy */ + UAEFBIG = 0x2f6df1a, /* File too large */ + UAENOSPC = 0x2f6df1b, /* No space left on device */ + UAESPIPE = 0x2f6df1c, /* Illegal seek */ + UAEROFS = 0x2f6df1d, /* Read-only file system */ + UAEMLINK = 0x2f6df1e, /* Too many links */ + UAEPIPE = 0x2f6df1f, /* Broken pipe */ + UAEDOM = 0x2f6df20, /* Math argument out of domain of func */ + UAERANGE = 0x2f6df21, /* Math result not representable */ + UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */ + UAENAMETOOLONG = 0x2f6df23, /* File name too long */ + UAENOLCK = 0x2f6df24, /* No record locks available */ + UAENOSYS = 0x2f6df25, /* Function not implemented */ + UAENOTEMPTY = 0x2f6df26, /* Directory not empty */ + UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */ + UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */ + UAENOMSG = 0x2f6df29, /* No message of desired type */ + UAEIDRM = 0x2f6df2a, /* Identifier removed */ + UAECHRNG = 0x2f6df2b, /* Channel number out of range */ + UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */ + UAEL3HLT = 0x2f6df2d, /* Level 3 halted */ + UAEL3RST = 0x2f6df2e, /* Level 3 reset */ + UAELNRNG = 0x2f6df2f, /* Link number out of range */ + UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */ + UAENOCSI = 0x2f6df31, /* No CSI structure available */ + UAEL2HLT = 0x2f6df32, /* Level 2 halted */ + UAEBADE = 0x2f6df33, /* Invalid exchange */ + UAEBADR = 0x2f6df34, /* Invalid request descriptor */ + UAEXFULL = 0x2f6df35, /* Exchange full */ + UAENOANO = 0x2f6df36, /* No anode */ + UAEBADRQC = 0x2f6df37, /* Invalid request code */ + UAEBADSLT = 0x2f6df38, /* Invalid slot */ + UAEBFONT = 0x2f6df39, /* Bad font file format */ + UAENOSTR = 0x2f6df3a, /* Device not a stream */ + UAENODATA = 0x2f6df3b, /* No data available */ + UAETIME = 0x2f6df3c, /* Timer expired */ + UAENOSR = 0x2f6df3d, /* Out of streams resources */ + UAENONET = 0x2f6df3e, /* Machine is not on the network */ + UAENOPKG = 0x2f6df3f, /* Package not installed */ + UAEREMOTE = 0x2f6df40, /* Object is remote */ + UAENOLINK = 0x2f6df41, /* Link has been severed */ + UAEADV = 0x2f6df42, /* Advertise error */ + UAESRMNT = 0x2f6df43, /* Srmount error */ + UAECOMM = 0x2f6df44, /* Communication error on send */ + UAEPROTO = 0x2f6df45, /* Protocol error */ + UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */ + UAEDOTDOT = 0x2f6df47, /* RFS specific error */ + UAEBADMSG = 0x2f6df48, /* Not a data message */ + UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */ + UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */ + UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */ + UAEREMCHG = 0x2f6df4c, /* Remote address changed */ + UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */ + UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */ + UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */ + UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */ + UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */ + UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */ + UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */ + UAESTRPIPE = 0x2f6df54, /* Streams pipe error */ + UAEUSERS = 0x2f6df55, /* Too many users */ + UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */ + UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */ + UAEMSGSIZE = 0x2f6df58, /* Message too long */ + UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */ + UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */ + UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */ + UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */ + UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */ + UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */ + UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */ + UAEADDRINUSE = 0x2f6df60, /* Address already in use */ + UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */ + UAENETDOWN = 0x2f6df62, /* Network is down */ + UAENETUNREACH = 0x2f6df63, /* Network is unreachable */ + UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */ + UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */ + UAECONNRESET = 0x2f6df66, /* Connection reset by peer */ + UAENOBUFS = 0x2f6df67, /* No buffer space available */ + UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */ + UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */ + UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */ + UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */ + UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */ + UAECONNREFUSED = 0x2f6df6d, /* Connection refused */ + UAEHOSTDOWN = 0x2f6df6e, /* Host is down */ + UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */ + UAEALREADY = 0x2f6df70, /* Operation already in progress */ + UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */ + UAESTALE = 0x2f6df72, /* Stale NFS file handle */ + UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */ + UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */ + UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */ + UAEISNAM = 0x2f6df76, /* Is a named type file */ + UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */ + UAEDQUOT = 0x2f6df78, /* Quota exceeded */ + UAENOMEDIUM = 0x2f6df79, /* No medium found */ + UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */ +}; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index d1dde2834b6d..0e5269374ac1 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -184,7 +184,7 @@ void afs_put_call(struct afs_call *call) if (call->type->destructor) call->type->destructor(call); - afs_put_server(call->net, call->server); + afs_put_server(call->net, call->server, afs_server_trace_put_call); afs_put_cb_interest(call->net, call->cbi); afs_put_addrlist(call->alist); kfree(call->request); diff --git a/fs/afs/server.c b/fs/afs/server.c index e900cd74361b..64d440aaabc0 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -13,6 +13,7 @@ static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ +static atomic_t afs_server_debug_id; static void afs_inc_servers_outstanding(struct afs_net *net) { @@ -47,7 +48,7 @@ struct afs_server *afs_find_server(struct afs_net *net, do { if (server) - afs_put_server(net, server); + afs_put_server(net, server, afs_server_trace_put_find_rsq); server = NULL; read_seqbegin_or_lock(&net->fs_addr_lock, &seq); @@ -112,7 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu * changes. */ if (server) - afs_put_server(net, server); + afs_put_server(net, server, afs_server_trace_put_uuid_rsq); server = NULL; read_seqbegin_or_lock(&net->fs_lock, &seq); @@ -127,7 +128,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu } else if (diff > 0) { p = p->rb_right; } else { - afs_get_server(server); + afs_get_server(server, afs_server_trace_get_by_uuid); break; } @@ -198,7 +199,7 @@ static struct afs_server *afs_install_server(struct afs_net *net, ret = 0; exists: - afs_get_server(server); + afs_get_server(server, afs_server_trace_get_install); write_sequnlock(&net->fs_lock); return server; } @@ -219,6 +220,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, goto enomem; atomic_set(&server->usage, 1); + server->debug_id = atomic_inc_return(&afs_server_debug_id); RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; @@ -230,6 +232,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, spin_lock_init(&server->probe_lock); afs_inc_servers_outstanding(net); + trace_afs_server(server, 1, afs_server_trace_alloc); _leave(" = %p", server); return server; @@ -325,9 +328,22 @@ void afs_servers_timer(struct timer_list *timer) } /* + * Get a reference on a server object. + */ +struct afs_server *afs_get_server(struct afs_server *server, + enum afs_server_trace reason) +{ + unsigned int u = atomic_inc_return(&server->usage); + + trace_afs_server(server, u, reason); + return server; +} + +/* * Release a reference on a server record. */ -void afs_put_server(struct afs_net *net, struct afs_server *server) +void afs_put_server(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) { unsigned int usage; @@ -338,7 +354,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server) usage = atomic_dec_return(&server->usage); - _enter("{%u}", usage); + trace_afs_server(server, usage, reason); if (likely(usage > 0)) return; @@ -350,6 +366,8 @@ static void afs_server_rcu(struct rcu_head *rcu) { struct afs_server *server = container_of(rcu, struct afs_server, rcu); + trace_afs_server(server, atomic_read(&server->usage), + afs_server_trace_free); afs_put_addrlist(rcu_access_pointer(server->addresses)); kfree(server); } @@ -365,7 +383,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) .index = alist->preferred, .error = 0, }; - _enter("%p", server); + + trace_afs_server(server, atomic_read(&server->usage), + afs_server_trace_give_up_cb); if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) afs_fs_give_up_all_callbacks(net, server, &ac, NULL); @@ -373,6 +393,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) wait_var_event(&server->probe_outstanding, atomic_read(&server->probe_outstanding) == 0); + trace_afs_server(server, atomic_read(&server->usage), + afs_server_trace_destroy); call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } @@ -392,6 +414,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) write_seqlock(&net->fs_lock); usage = 1; deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); + trace_afs_server(server, usage, afs_server_trace_gc); if (deleted) { rb_erase(&server->uuid_rb, &net->fs_servers); hlist_del_rcu(&server->proc_link); @@ -514,6 +537,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a _enter(""); + trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update); + alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, &server->uuid); if (IS_ERR(alist)) { diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index b4988bc8e6f2..888d91d195d9 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -16,7 +16,8 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) if (slist && refcount_dec_and_test(&slist->usage)) { for (i = 0; i < slist->nr_servers; i++) { afs_put_cb_interest(net, slist->servers[i].cb_interest); - afs_put_server(net, slist->servers[i].server); + afs_put_server(net, slist->servers[i].server, + afs_server_trace_put_slist); } kfree(slist); } @@ -67,7 +68,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { - afs_put_server(cell->net, server); + afs_put_server(cell->net, server, + afs_server_trace_put_slist_isort); continue; } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 08fdb3951c49..1a414300b654 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, atomic_set(&volume->usage, 1); INIT_LIST_HEAD(&volume->proc_link); rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); diff --git a/fs/afs/write.c b/fs/afs/write.c index 98eb7adbce91..cb76566763db 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -44,8 +44,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, return 0; } - req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), - GFP_KERNEL); + req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); if (!req) return -ENOMEM; @@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32, struct __aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __compat_aio_sigset ksig = { NULL, }; sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..8c6b50f34466 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -42,6 +42,11 @@ #include <asm/unaligned.h> #include <asm/cacheflush.h> #include <asm/page.h> +#include <asm/flat.h> + +#ifndef flat_get_relocate_addr +#define flat_get_relocate_addr(rel) (rel) +#endif /****************************************************************************/ @@ -63,6 +68,12 @@ #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ +#ifdef CONFIG_BINFMT_SHARED_FLAT +#define MAX_SHARED_LIBS (4) +#else +#define MAX_SHARED_LIBS (1) +#endif + struct lib_info { struct { unsigned long start_code; /* Start of text segment */ @@ -120,14 +131,15 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start sp -= bprm->envc + 1; sp -= bprm->argc + 1; - sp -= flat_argvp_envp_on_stack() ? 2 : 0; + if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) + sp -= 2; /* argvp + envp */ sp -= 1; /* &argc */ current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN; sp = (unsigned long __user *)current->mm->start_stack; __put_user(bprm->argc, sp++); - if (flat_argvp_envp_on_stack()) { + if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) { unsigned long argv, envp; argv = (unsigned long)(sp + 2); envp = (unsigned long)(sp + 2 + bprm->argc + 1); @@ -345,7 +357,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) start_code = p->lib_list[id].start_code; text_len = p->lib_list[id].text_len; - if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { + if (r > start_brk - start_data + text_len) { pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)", r, start_brk-start_data+text_len, text_len); goto failed; @@ -368,6 +380,7 @@ failed: /****************************************************************************/ +#ifdef CONFIG_BINFMT_FLAT_OLD static void old_reloc(unsigned long rl) { static const char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" }; @@ -405,6 +418,7 @@ static void old_reloc(unsigned long rl) pr_debug("Relocation became %lx\n", val); } +#endif /* CONFIG_BINFMT_FLAT_OLD */ /****************************************************************************/ @@ -415,7 +429,8 @@ static int load_flat_file(struct linux_binprm *bprm, unsigned long textpos, datapos, realdatastart; u32 text_len, data_len, bss_len, stack_len, full_data, flags; unsigned long len, memp, memp_size, extra, rlim; - u32 __user *reloc, *rp; + __be32 __user *reloc; + u32 __user *rp; struct inode *inode; int i, rev, relocs; loff_t fpos; @@ -454,6 +469,7 @@ static int load_flat_file(struct linux_binprm *bprm, if (flags & FLAT_FLAG_KTRACE) pr_info("Loading file: %s\n", bprm->filename); +#ifdef CONFIG_BINFMT_FLAT_OLD if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) { pr_err("bad flat file version 0x%x (supported 0x%lx and 0x%lx)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION); @@ -470,6 +486,23 @@ static int load_flat_file(struct linux_binprm *bprm, } /* + * fix up the flags for the older format, there were all kinds + * of endian hacks, this only works for the simple cases + */ + if (rev == OLD_FLAT_VERSION && + (flags || IS_ENABLED(CONFIG_BINFMT_FLAT_OLD_ALWAYS_RAM))) + flags = FLAT_FLAG_RAM; + +#else /* CONFIG_BINFMT_FLAT_OLD */ + if (rev != FLAT_VERSION) { + pr_err("bad flat file version 0x%x (supported 0x%lx)\n", + rev, FLAT_VERSION); + ret = -ENOEXEC; + goto err; + } +#endif /* !CONFIG_BINFMT_FLAT_OLD */ + + /* * Make sure the header params are sane. * 28 bits (256 MB) is way more than reasonable in this case. * If some top bits are set we have probable binary corruption. @@ -480,13 +513,6 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } - /* - * fix up the flags for the older format, there were all kinds - * of endian hacks, this only works for the simple cases - */ - if (rev == OLD_FLAT_VERSION && flat_old_ram_flag(flags)) - flags = FLAT_FLAG_RAM; - #ifndef CONFIG_BINFMT_ZFLAT if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) { pr_err("Support for ZFLAT executables is not enabled.\n"); @@ -547,7 +573,7 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } - len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); + len = data_len + extra; len = PAGE_ALIGN(len); realdatastart = vm_mmap(NULL, 0, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); @@ -561,9 +587,7 @@ static int load_flat_file(struct linux_binprm *bprm, vm_munmap(textpos, text_len); goto err; } - datapos = ALIGN(realdatastart + - MAX_SHARED_LIBS * sizeof(unsigned long), - FLAT_DATA_ALIGN); + datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN); pr_debug("Allocated data+bss+stack (%u bytes): %lx\n", data_len + bss_len + stack_len, datapos); @@ -587,13 +611,13 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } - reloc = (u32 __user *) + reloc = (__be32 __user *) (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = realdatastart; memp_size = len; } else { - len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32); + len = text_len + data_len + extra; len = PAGE_ALIGN(len); textpos = vm_mmap(NULL, 0, len, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); @@ -608,11 +632,9 @@ static int load_flat_file(struct linux_binprm *bprm, } realdatastart = textpos + ntohl(hdr->data_start); - datapos = ALIGN(realdatastart + - MAX_SHARED_LIBS * sizeof(u32), - FLAT_DATA_ALIGN); + datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN); - reloc = (u32 __user *) + reloc = (__be32 __user *) (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = textpos; memp_size = len; @@ -627,8 +649,9 @@ static int load_flat_file(struct linux_binprm *bprm, (text_len + full_data - sizeof(struct flat_hdr)), 0); - memmove((void *) datapos, (void *) realdatastart, - full_data); + if (datapos != realdatastart) + memmove((void *)datapos, (void *)realdatastart, + full_data); #else /* * This is used on MMU systems mainly for testing. @@ -684,8 +707,7 @@ static int load_flat_file(struct linux_binprm *bprm, if (IS_ERR_VALUE(result)) { ret = result; pr_err("Unable to read code+data+bss, errno %d\n", ret); - vm_munmap(textpos, text_len + data_len + extra + - MAX_SHARED_LIBS * sizeof(u32)); + vm_munmap(textpos, text_len + data_len + extra); goto err; } } @@ -775,20 +797,18 @@ static int load_flat_file(struct linux_binprm *bprm, * __start to address 4 so that is okay). */ if (rev > OLD_FLAT_VERSION) { - u32 __maybe_unused persistent = 0; for (i = 0; i < relocs; i++) { u32 addr, relval; + __be32 tmp; /* * Get the address of the pointer to be * relocated (of course, the address has to be * relocated first). */ - if (get_user(relval, reloc + i)) + if (get_user(tmp, reloc + i)) return -EFAULT; - relval = ntohl(relval); - if (flat_set_persistent(relval, &persistent)) - continue; + relval = ntohl(tmp); addr = flat_get_relocate_addr(relval); rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1); if (rp == (u32 __user *)RELOC_FAILED) { @@ -797,8 +817,7 @@ static int load_flat_file(struct linux_binprm *bprm, } /* Get the pointer's value. */ - ret = flat_get_addr_from_rp(rp, relval, flags, - &addr, &persistent); + ret = flat_get_addr_from_rp(rp, relval, flags, &addr); if (unlikely(ret)) goto err; @@ -807,8 +826,13 @@ static int load_flat_file(struct linux_binprm *bprm, * Do the relocation. PIC relocs in the data section are * already in target order */ - if ((flags & FLAT_FLAG_GOTPIC) == 0) - addr = ntohl(addr); + if ((flags & FLAT_FLAG_GOTPIC) == 0) { + /* + * Meh, the same value can have a different + * byte order based on a flag.. + */ + addr = ntohl((__force __be32)addr); + } addr = calc_reloc(addr, libinfo, id, 0); if (addr == RELOC_FAILED) { ret = -ENOEXEC; @@ -821,14 +845,15 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } } +#ifdef CONFIG_BINFMT_FLAT_OLD } else { for (i = 0; i < relocs; i++) { - u32 relval; + __be32 relval; if (get_user(relval, reloc + i)) return -EFAULT; - relval = ntohl(relval); - old_reloc(relval); + old_reloc(ntohl(relval)); } +#endif /* CONFIG_BINFMT_FLAT_OLD */ } flush_icache_range(start_code, end_code); @@ -856,9 +881,14 @@ err: static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +902,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; - - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; - - res = prepare_binprm(&bprm); + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); diff --git a/fs/block_dev.c b/fs/block_dev.c index 749f5984425d..f00b569a9f89 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -203,13 +203,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec; + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; loff_t pos = iocb->ki_pos; bool should_dirty = false; struct bio bio; ssize_t ret; blk_qc_t qc; - struct bvec_iter_all iter_all; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -259,13 +258,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, } __set_current_state(TASK_RUNNING); - bio_for_each_segment_all(bvec, &bio, iter_all) { - if (should_dirty && !PageCompound(bvec->bv_page)) - set_page_dirty_lock(bvec->bv_page); - if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) - put_page(bvec->bv_page); - } - + bio_release_pages(&bio, should_dirty); if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); @@ -335,13 +328,7 @@ static void blkdev_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - put_page(bvec->bv_page); - } + bio_release_pages(bio, false); bio_put(bio); } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1aee51a9f3bf..5faf057f6f37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10831,17 +10831,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, remove_em = (atomic_read(&block_group->trimming) == 0); spin_unlock(&block_group->lock); - if (remove_em) { - struct extent_map_tree *em_tree; - - em_tree = &fs_info->mapping_tree.map_tree; - write_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - /* once for the tree */ - free_extent_map(em); - } - mutex_unlock(&fs_info->chunk_mutex); ret = remove_block_group_free_space(trans, block_group); @@ -10858,6 +10847,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + if (remove_em) { + struct extent_map_tree *em_tree; + + em_tree = &fs_info->mapping_tree.map_tree; + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + /* once for the tree */ + free_extent_map(em); + } out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); @@ -11137,13 +11139,11 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * it while performing the free space search since we have already * held back allocations. */ -static int btrfs_trim_free_extents(struct btrfs_device *device, - struct fstrim_range *range, u64 *trimmed) +static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) { - u64 start, len = 0, end = 0; + u64 start = SZ_1M, len = 0, end = 0; int ret; - start = max_t(u64, range->start, SZ_1M); *trimmed = 0; /* Discard not supported = nothing to do. */ @@ -11186,22 +11186,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } - /* Keep going until we satisfy minlen or reach end of space */ - if (len < range->minlen) { - mutex_unlock(&fs_info->chunk_mutex); - start += len; - continue; - } - - /* If we are out of the passed range break */ - if (start > range->start + range->len - 1) { - mutex_unlock(&fs_info->chunk_mutex); - break; - } - - start = max(range->start, start); - len = min(range->len, len); - ret = btrfs_issue_discard(device->bdev, start, len, &bytes); if (!ret) @@ -11216,10 +11200,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; - /* We've trimmed enough */ - if (*trimmed >= range->len) - break; - if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -11303,7 +11283,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range, &group_trimmed); + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; dev_ret = ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3cd66efdb99d..cfeff1b8dce0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -309,8 +309,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_abort_transaction(trans, ret); goto out_end_trans; } - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); } else { ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); @@ -2922,8 +2920,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode_lock(inode); err = btrfs_delete_subvolume(dir, dentry); inode_unlock(inode); - if (!err) + if (!err) { + fsnotify_rmdir(dir, dentry); d_delete(dentry); + } out_dput: dput(dentry); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 10d9589001a9..bb5bd49573b4 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -747,6 +747,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) u64 total = 0; int i; +again: do { enqueued = 0; mutex_lock(&fs_devices->device_list_mutex); @@ -758,6 +759,10 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); + if (fs_devices->seed) { + fs_devices = fs_devices->seed; + goto again; + } if (enqueued == 0) return; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 2f078b77fe14..c1dfc97893ba 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -303,11 +303,12 @@ static ssize_t raid_bytes_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", val); } -static struct attribute *raid_attributes[] = { +static struct attribute *raid_attrs[] = { BTRFS_ATTR_PTR(raid, total_bytes), BTRFS_ATTR_PTR(raid, used_bytes), NULL }; +ATTRIBUTE_GROUPS(raid); static void release_raid_kobj(struct kobject *kobj) { @@ -317,7 +318,7 @@ static void release_raid_kobj(struct kobject *kobj) struct kobj_type btrfs_raid_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = release_raid_kobj, - .default_attrs = raid_attributes, + .default_groups = raid_groups, }; #define SPACE_INFO_ATTR(field) \ @@ -364,6 +365,7 @@ static struct attribute *space_info_attrs[] = { BTRFS_ATTR_PTR(space_info, total_bytes_pinned), NULL, }; +ATTRIBUTE_GROUPS(space_info); static void space_info_release(struct kobject *kobj) { @@ -375,7 +377,7 @@ static void space_info_release(struct kobject *kobj) struct kobj_type space_info_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = space_info_release, - .default_attrs = space_info_attrs, + .default_groups = space_info_groups, }; static const struct attribute *allocation_attrs[] = { @@ -910,12 +912,10 @@ void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); } -static int btrfs_init_debugfs(void) +static void btrfs_init_debugfs(void) { #ifdef CONFIG_DEBUG_FS btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); - if (!btrfs_debugfs_root_dentry) - return -ENOMEM; /* * Example code, how to export data through debugfs. @@ -929,7 +929,6 @@ static int btrfs_init_debugfs(void) #endif #endif - return 0; } int __init btrfs_init_sysfs(void) @@ -940,9 +939,7 @@ int __init btrfs_init_sysfs(void) if (!btrfs_kset) return -ENOMEM; - ret = btrfs_init_debugfs(); - if (ret) - goto out1; + btrfs_init_debugfs(); init_feature_attrs(); ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); @@ -959,7 +956,6 @@ out_remove_group: sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); out2: debugfs_remove_recursive(btrfs_debugfs_root_dentry); -out1: kset_unregister(btrfs_kset); return ret; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 78b6ba2029e8..95d9aebff2c4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -213,6 +213,9 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, } out: btrfs_free_path(path); + if (!ret) + set_bit(BTRFS_INODE_COPY_EVERYTHING, + &BTRFS_I(inode)->runtime_flags); return ret; } @@ -236,7 +239,6 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); out: @@ -388,8 +390,6 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, if (!ret) { inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); } diff --git a/fs/buffer.c b/fs/buffer.c index e450c55f6434..49a871570092 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2086,38 +2086,6 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, } EXPORT_SYMBOL(block_write_begin); -void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page) -{ - loff_t old_size = inode->i_size; - bool i_size_changed = false; - - /* - * No need to use i_size_read() here, the i_size cannot change under us - * because we hold i_rwsem. - * - * But it's important to update i_size while still holding page lock: - * page writeout could otherwise come in and zero beyond i_size. - */ - if (pos + copied > inode->i_size) { - i_size_write(inode, pos + copied); - i_size_changed = true; - } - - unlock_page(page); - - if (old_size < pos) - pagecache_isize_extended(inode, old_size, pos); - /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling - * filesystems. - */ - if (i_size_changed) - mark_inode_dirty(inode); -} - int block_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) @@ -2158,9 +2126,37 @@ int generic_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; + bool i_size_changed = false; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - __generic_write_end(mapping->host, pos, copied, page); + + /* + * No need to use i_size_read() here, the i_size cannot change under us + * because we hold i_rwsem. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos + copied > inode->i_size) { + i_size_write(inode, pos + copied); + i_size_changed = true; + } + + unlock_page(page); put_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (i_size_changed) + mark_inode_dirty(inode); return copied; } EXPORT_SYMBOL(generic_write_end); diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index b3fc5fe26a1a..83cd41fa2b01 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -245,21 +245,17 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) debugfs_remove(fsc->debugfs_mdsc); } -int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) +void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { char name[100]; - int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); - BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, fsc->client->debugfs_dir, fsc, &congestion_kb_fops); - if (!fsc->debugfs_congestion_kb) - goto out; snprintf(name, sizeof(name), "../../bdi/%s", dev_name(fsc->sb->s_bdi->dev)); @@ -267,52 +263,36 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) debugfs_create_symlink("bdi", fsc->client->debugfs_dir, name); - if (!fsc->debugfs_bdi) - goto out; fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", 0400, fsc->client->debugfs_dir, fsc, &mdsmap_show_fops); - if (!fsc->debugfs_mdsmap) - goto out; fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions", 0400, fsc->client->debugfs_dir, fsc, &mds_sessions_show_fops); - if (!fsc->debugfs_mds_sessions) - goto out; fsc->debugfs_mdsc = debugfs_create_file("mdsc", 0400, fsc->client->debugfs_dir, fsc, &mdsc_show_fops); - if (!fsc->debugfs_mdsc) - goto out; fsc->debugfs_caps = debugfs_create_file("caps", 0400, fsc->client->debugfs_dir, fsc, &caps_show_fops); - if (!fsc->debugfs_caps) - goto out; - - return 0; - -out: - ceph_fs_debugfs_cleanup(fsc); - return err; } #else /* CONFIG_DEBUG_FS */ -int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) +void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { return 0; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 183c37c0a8fc..c5517ffeb11c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1889,9 +1889,9 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, return 0; } -static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) +static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) { struct inode *src_inode = file_inode(src_file); struct inode *dst_inode = file_inode(dst_file); @@ -1909,6 +1909,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, if (src_inode == dst_inode) return -EINVAL; + if (src_inode->i_sb != dst_inode->i_sb) + return -EXDEV; if (ceph_snap(dst_inode) != CEPH_NOSNAP) return -EROFS; @@ -2100,6 +2102,21 @@ out: return ret; } +static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6af2d0d4a87a..c8a9b89b922d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2121,9 +2121,10 @@ retry: if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); - } else if (stop_on_nosnap && inode && + } else if (stop_on_nosnap && inode && dentry != temp && ceph_snap(inode) == CEPH_NOSNAP) { spin_unlock(&temp->d_lock); + pos++; /* get rid of any prepended '/' */ break; } else { pos -= temp->d_name.len; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d57fa60dcd43..ed1b65a6c2c3 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -937,9 +937,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) dout("mount opening path %s\n", path); } - err = ceph_fs_debugfs_init(fsc); - if (err < 0) - goto out; + ceph_fs_debugfs_init(fsc); root = open_root_dentry(fsc, path, started); if (IS_ERR(root)) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 5f27e1f7f2d6..fbe6869a3f95 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1102,7 +1102,7 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, int num_fcntl_locks, int num_flock_locks); /* debugfs.c */ -extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); +extern void ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); /* quota.c */ diff --git a/fs/char_dev.c b/fs/char_dev.c index d18cad28c1c3..00dfe17871ac 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -98,7 +98,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, int minorct, const char *name) { struct char_device_struct *cd, *curr, *prev = NULL; - int ret = -EBUSY; + int ret; int i; if (major >= CHRDEV_MAJOR_MAX) { @@ -129,6 +129,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, major = ret; } + ret = -EBUSY; i = major_to_index(major); for (curr = chrdevs[i]; curr; prev = curr, curr = curr->next) { if (curr->major < major) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index aae2b8b2adf5..523e9ea78a28 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -10,7 +10,7 @@ config CIFS select CRYPTO_SHA512 select CRYPTO_CMAC select CRYPTO_HMAC - select CRYPTO_ARC4 + select CRYPTO_LIB_ARC4 select CRYPTO_AEAD2 select CRYPTO_CCM select CRYPTO_ECB diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index d2a05e46d6f5..97b7497c13ef 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -33,7 +33,8 @@ #include <linux/ctype.h> #include <linux/random.h> #include <linux/highmem.h> -#include <crypto/skcipher.h> +#include <linux/fips.h> +#include <crypto/arc4.h> #include <crypto/aead.h> int __cifs_calc_signature(struct smb_rqst *rqst, @@ -772,63 +773,32 @@ setup_ntlmv2_rsp_ret: int calc_seckey(struct cifs_ses *ses) { - int rc; - struct crypto_skcipher *tfm_arc4; - struct scatterlist sgin, sgout; - struct skcipher_request *req; - unsigned char *sec_key; + unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */ + struct arc4_ctx *ctx_arc4; - sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL); - if (sec_key == NULL) - return -ENOMEM; + if (fips_enabled) + return -ENODEV; get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); - tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm_arc4)) { - rc = PTR_ERR(tfm_arc4); - cifs_dbg(VFS, "could not allocate crypto API arc4\n"); - goto out; - } - - rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response, - CIFS_SESS_KEY_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set response as a key\n", - __func__); - goto out_free_cipher; - } - - req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; - cifs_dbg(VFS, "could not allocate crypto API arc4 request\n"); - goto out_free_cipher; + ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL); + if (!ctx_arc4) { + cifs_dbg(VFS, "could not allocate arc4 context\n"); + return -ENOMEM; } - sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); - sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL); - - rc = crypto_skcipher_encrypt(req); - skcipher_request_free(req); - if (rc) { - cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc); - goto out_free_cipher; - } + arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); + arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key, + CIFS_CPHTXT_SIZE); /* make secondary_key/nonce as session key */ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE); /* and make len as that of session key only */ ses->auth_key.len = CIFS_SESS_KEY_SIZE; -out_free_cipher: - crypto_free_skcipher(tfm_arc4); -out: - kfree(sec_key); - return rc; + memzero_explicit(sec_key, CIFS_SESS_KEY_SIZE); + kzfree(ctx_arc4); + return 0; } void diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f5fcd6360056..24635b65effa 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -303,6 +303,7 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; cifs_inode->epoch = 0; + spin_lock_init(&cifs_inode->open_file_lock); generate_random_uuid(cifs_inode->lease_key); /* @@ -1148,6 +1149,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); free_xid(xid); + + if (rc == -EOPNOTSUPP || rc == -EXDEV) + rc = generic_copy_file_range(src_file, off, dst_file, + destoff, len, flags); return rc; } @@ -1590,7 +1595,6 @@ MODULE_DESCRIPTION ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and " "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); -MODULE_SOFTDEP("pre: arc4"); MODULE_SOFTDEP("pre: des"); MODULE_SOFTDEP("pre: ecb"); MODULE_SOFTDEP("pre: hmac"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 334ff5f9c3f3..4777b3c4a92c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1377,6 +1377,7 @@ struct cifsInodeInfo { struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; + spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ unsigned int epoch; /* used to track lease state changes */ @@ -1780,10 +1781,14 @@ require use of the stronger protocol */ * tcp_ses_lock protects: * list operations on tcp and SMB session lists * tcon->open_file_lock protects the list of open files hanging off the tcon + * inode->open_file_lock protects the openFileList hanging off the inode * cfile->file_info_lock protects counters and fields in cifs file struct * f_owner.lock protects certain per file struct operations * mapping->page_lock protects certain per page operations * + * Note that the cifs_tcon.open_file_lock should be taken before + * not after the cifsInodeInfo.open_file_lock + * * Semaphores * ---------- * sesSem operations on smb session diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8c4121da624e..714a359c7c8d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -476,6 +476,7 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); server->nr_targets = 1; #ifdef CONFIG_CIFS_DFS_UPCALL + spin_unlock(&GlobalMid_Lock); cifs_sb = find_super_by_tcp(server); if (IS_ERR(cifs_sb)) { rc = PTR_ERR(cifs_sb); @@ -493,6 +494,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__, server->nr_targets); + spin_lock(&GlobalMid_Lock); #endif if (server->tcpStatus == CifsExiting) { /* the demux thread will exit normally @@ -2629,7 +2631,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) task = xchg(&server->tsk, NULL); if (task) - force_sig(SIGKILL, task); + send_sig(SIGKILL, task, 1); } static struct TCP_Server_Info * diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 1e21b2528cfb..534cbba72789 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -77,7 +77,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) goto name_is_IP_address; /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false); + rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, + NULL, ip_addr, NULL, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 06e27ac6d82c..97090693d182 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -338,10 +338,12 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, atomic_inc(&tcon->num_local_opens); /* if readable file instance put first in list*/ + spin_lock(&cinode->open_file_lock); if (file->f_mode & FMODE_READ) list_add(&cfile->flist, &cinode->openFileList); else list_add_tail(&cfile->flist, &cinode->openFileList); + spin_unlock(&cinode->open_file_lock); spin_unlock(&tcon->open_file_lock); if (fid->purge_cache) @@ -413,7 +415,9 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); /* remove it from the lists */ + spin_lock(&cifsi->open_file_lock); list_del(&cifs_file->flist); + spin_unlock(&cifsi->open_file_lock); list_del(&cifs_file->tlist); atomic_dec(&tcon->num_local_opens); @@ -1950,9 +1954,9 @@ refind_writable: return 0; } - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); list_move_tail(&inv_file->flist, &cifs_inode->openFileList); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); cifsFileInfo_put(inv_file); ++refind; inv_file = NULL; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c4e75afa3258..9e430ae9314f 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * SMB1 (CIFS) version specific operations * * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com> - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2 as published - * by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/pagemap.h> diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index e32c264e3adb..82ade16c9501 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -457,7 +457,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, "STATUS_ALLOTTED_SPACE_EXCEEDED"}, - {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO, + {STATUS_INSUFFICIENT_RESOURCES, -EAGAIN, "STATUS_INSUFFICIENT_RESOURCES"}, {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e921e6511728..9fd56b0acd7e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3,19 +3,6 @@ * SMB2 version specific operations * * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com> - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2 as published - * by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/pagemap.h> @@ -2385,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, kfree(dfs_rsp); return rc; } + +static int +parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + unsigned int sub_len; + unsigned int sub_offset; + + /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */ + if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) { + cifs_dbg(VFS, "srv returned invalid symlink buffer\n"); + return -EIO; + } + + sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); + sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + if (sub_offset + 20 > plen || + sub_offset + sub_len + 20 > plen) { + cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); + return -EIO; + } + + *target_path = cifs_strndup_from_utf16( + symlink_buf->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) + return -ENOMEM; + + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + + return 0; +} + #define SMB2_SYMLINK_STRUCT_SIZE \ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp)) @@ -2414,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct kvec close_iov[1]; struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; - char *ioctl_buf; + struct reparse_data_buffer *reparse_buf; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + *target_path = NULL; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -2496,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == 0) && (is_reparse_point)) { /* See MS-FSCC 2.3.23 */ - ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset); + reparse_buf = (struct reparse_data_buffer *) + ((char *)ioctl_rsp + + le32_to_cpu(ioctl_rsp->OutputOffset)); plen = le32_to_cpu(ioctl_rsp->OutputCount); if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > rsp_iov[1].iov_len) { - cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen); + cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n", + plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < 8) { + cifs_dbg(VFS, "reparse buffer is too small. Must be " + "at least 8 bytes but was %d\n", plen); + rc = -EIO; + goto querty_exit; + } + + if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) { + cifs_dbg(VFS, "srv returned invalid reparse buf " + "length: %d\n", plen); rc = -EIO; goto querty_exit; } - /* Do stuff with ioctl_buf/plen */ + rc = parse_reparse_symlink( + (struct reparse_symlink_data_buffer *)reparse_buf, + plen, target_path, cifs_sb); goto querty_exit; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 29b699d532ef..75311a8a68bf 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3114,9 +3114,14 @@ void smb2_reconnect_server(struct work_struct *work) tcon_exist = true; } } + /* + * IPC has the same lifetime as its session and uses its + * refcount. + */ if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); tcon_exist = true; + ses->ses_count++; } } /* @@ -3135,7 +3140,10 @@ void smb2_reconnect_server(struct work_struct *work) else resched = true; list_del_init(&tcon->rlist); - cifs_put_tcon(tcon); + if (tcon->ipc) + cifs_put_smb_ses(tcon->ses); + else + cifs_put_tcon(tcon); } cifs_dbg(FYI, "Reconnecting tcons finished\n"); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c7d5813bebd8..858353d20c39 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer { __u8 PathBuffer[0]; /* Variable Length */ } __packed; -/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ +#define SYMLINK_FLAG_RELATIVE 0x00000001 + +struct reparse_symlink_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; /* Variable Length */ +} __packed; /* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ diff --git a/fs/compat.c b/fs/compat.c index 4a0aaaf53217..436d228cf71c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/compat.c * @@ -9,10 +10,6 @@ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/compat.h> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index d2ca5287762d..92112915de8e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -13,6 +13,7 @@ #undef DEBUG #include <linux/fs.h> +#include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/slab.h> @@ -1788,6 +1789,7 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + fsnotify_rmdir(d_inode(parent), dentry); d_delete(dentry); inode_unlock(d_inode(parent)); @@ -1916,6 +1918,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); d_delete(dentry); diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 24ed99e2eca0..5fdf24877c17 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -7,7 +7,6 @@ config FS_ENCRYPTION select CRYPTO_ECB select CRYPTO_XTS select CRYPTO_CTS - select CRYPTO_SHA256 select KEYS help Enable encryption of files and directories. This diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index b46021ebde85..82da2510721f 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -33,9 +33,8 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done) bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; - int ret = fscrypt_decrypt_page(page->mapping->host, page, - PAGE_SIZE, 0, page->index); - + int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len, + bv->bv_offset); if (ret) SetPageError(page); else if (done) @@ -53,9 +52,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_bio); static void completion_pages(struct work_struct *work) { - struct fscrypt_ctx *ctx = - container_of(work, struct fscrypt_ctx, r.work); - struct bio *bio = ctx->r.bio; + struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work); + struct bio *bio = ctx->bio; __fscrypt_decrypt_bio(bio, true); fscrypt_release_ctx(ctx); @@ -64,57 +62,29 @@ static void completion_pages(struct work_struct *work) void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) { - INIT_WORK(&ctx->r.work, completion_pages); - ctx->r.bio = bio; - fscrypt_enqueue_decrypt_work(&ctx->r.work); + INIT_WORK(&ctx->work, completion_pages); + ctx->bio = bio; + fscrypt_enqueue_decrypt_work(&ctx->work); } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); -void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - struct fscrypt_ctx *ctx; - struct page *bounce_page; - - /* The bounce data pages are unmapped. */ - if ((*page)->mapping) - return; - - /* The bounce data page is unmapped. */ - bounce_page = *page; - ctx = (struct fscrypt_ctx *)page_private(bounce_page); - - /* restore control page */ - *page = ctx->w.control_page; - - if (restore) - fscrypt_restore_control_page(bounce_page); -} -EXPORT_SYMBOL(fscrypt_pullback_bio_page); - int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { - struct fscrypt_ctx *ctx; - struct page *ciphertext_page = NULL; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + struct page *ciphertext_page; struct bio *bio; int ret, err = 0; - BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - - ctx = fscrypt_get_ctx(GFP_NOFS); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT); - if (IS_ERR(ciphertext_page)) { - err = PTR_ERR(ciphertext_page); - goto errout; - } + ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); + if (!ciphertext_page) + return -ENOMEM; while (len--) { - err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - PAGE_SIZE, 0, GFP_NOFS); + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), ciphertext_page, + blocksize, 0, GFP_NOFS); if (err) goto errout; @@ -124,14 +94,11 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, goto errout; } bio_set_dev(bio, inode->i_sb->s_bdev); - bio->bi_iter.bi_sector = - pblk << (inode->i_sb->s_blocksize_bits - 9); + bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = bio_add_page(bio, ciphertext_page, - inode->i_sb->s_blocksize, 0); - if (ret != inode->i_sb->s_blocksize) { + ret = bio_add_page(bio, ciphertext_page, blocksize, 0); + if (WARN_ON(ret != blocksize)) { /* should never happen! */ - WARN_ON(1); bio_put(bio); err = -EIO; goto errout; @@ -147,7 +114,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, } err = 0; errout: - fscrypt_release_ctx(ctx); + fscrypt_free_bounce_page(ciphertext_page); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 335a362ee446..45c3d0427fb2 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -59,23 +59,16 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work) EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); /** - * fscrypt_release_ctx() - Releases an encryption context - * @ctx: The encryption context to release. + * fscrypt_release_ctx() - Release a decryption context + * @ctx: The decryption context to release. * - * If the encryption context was allocated from the pre-allocated pool, returns - * it to that pool. Else, frees it. - * - * If there's a bounce page in the context, this frees that. + * If the decryption context was allocated from the pre-allocated pool, return + * it to that pool. Else, free it. */ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) { unsigned long flags; - if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) { - mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool); - ctx->w.bounce_page = NULL; - } - ctx->w.control_page = NULL; if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { kmem_cache_free(fscrypt_ctx_cachep, ctx); } else { @@ -87,12 +80,12 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) EXPORT_SYMBOL(fscrypt_release_ctx); /** - * fscrypt_get_ctx() - Gets an encryption context + * fscrypt_get_ctx() - Get a decryption context * @gfp_flags: The gfp flag for memory allocation * - * Allocates and initializes an encryption context. + * Allocate and initialize a decryption context. * - * Return: A new encryption context on success; an ERR_PTR() otherwise. + * Return: A new decryption context on success; an ERR_PTR() otherwise. */ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) { @@ -100,14 +93,8 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) unsigned long flags; /* - * We first try getting the ctx from a free list because in - * the common case the ctx will have an allocated and - * initialized crypto tfm, so it's probably a worthwhile - * optimization. For the bounce page, we first try getting it - * from the kernel allocator because that's just about as fast - * as getting it from a list and because a cache of free pages - * should generally be a "last resort" option for a filesystem - * to be able to do its job. + * First try getting a ctx from the free list so that we don't have to + * call into the slab allocator. */ spin_lock_irqsave(&fscrypt_ctx_lock, flags); ctx = list_first_entry_or_null(&fscrypt_free_ctxs, @@ -123,11 +110,31 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) } else { ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } - ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL; return ctx; } EXPORT_SYMBOL(fscrypt_get_ctx); +struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) +{ + return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); +} + +/** + * fscrypt_free_bounce_page() - free a ciphertext bounce page + * + * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(), + * or by fscrypt_alloc_bounce_page() directly. + */ +void fscrypt_free_bounce_page(struct page *bounce_page) +{ + if (!bounce_page) + return; + set_page_private(bounce_page, (unsigned long)NULL); + ClearPagePrivate(bounce_page); + mempool_free(bounce_page, fscrypt_bounce_page_pool); +} +EXPORT_SYMBOL(fscrypt_free_bounce_page); + void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { @@ -141,10 +148,11 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); } -int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, - u64 lblk_num, struct page *src_page, - struct page *dest_page, unsigned int len, - unsigned int offs, gfp_t gfp_flags) +/* Encrypt or decrypt a single filesystem block of file contents */ +int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, + u64 lblk_num, struct page *src_page, + struct page *dest_page, unsigned int len, + unsigned int offs, gfp_t gfp_flags) { union fscrypt_iv iv; struct skcipher_request *req = NULL; @@ -154,7 +162,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; - BUG_ON(len == 0); + if (WARN_ON_ONCE(len <= 0)) + return -EINVAL; + if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0)) + return -EINVAL; fscrypt_generate_iv(&iv, lblk_num, ci); @@ -186,126 +197,158 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, return 0; } -struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, - gfp_t gfp_flags) -{ - ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); - if (ctx->w.bounce_page == NULL) - return ERR_PTR(-ENOMEM); - ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL; - return ctx->w.bounce_page; -} - /** - * fscypt_encrypt_page() - Encrypts a page - * @inode: The inode for which the encryption should take place - * @page: The page to encrypt. Must be locked for bounce-page - * encryption. - * @len: Length of data to encrypt in @page and encrypted - * data in returned page. - * @offs: Offset of data within @page and returned - * page holding encrypted data. - * @lblk_num: Logical block number. This must be unique for multiple - * calls with same inode, except when overwriting - * previously written data. - * @gfp_flags: The gfp flag for memory allocation - * - * Encrypts @page using the ctx encryption context. Performs encryption - * either in-place or into a newly allocated bounce page. - * Called on the page write path. + * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page + * @page: The locked pagecache page containing the block(s) to encrypt + * @len: Total size of the block(s) to encrypt. Must be a nonzero + * multiple of the filesystem's block size. + * @offs: Byte offset within @page of the first block to encrypt. Must be + * a multiple of the filesystem's block size. + * @gfp_flags: Memory allocation flags * - * Bounce page allocation is the default. - * In this case, the contents of @page are encrypted and stored in an - * allocated bounce page. @page has to be locked and the caller must call - * fscrypt_restore_control_page() on the returned ciphertext page to - * release the bounce buffer and the encryption context. + * A new bounce page is allocated, and the specified block(s) are encrypted into + * it. In the bounce page, the ciphertext block(s) will be located at the same + * offsets at which the plaintext block(s) were located in the source page; any + * other parts of the bounce page will be left uninitialized. However, normally + * blocksize == PAGE_SIZE and the whole page is encrypted at once. * - * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in - * fscrypt_operations. Here, the input-page is returned with its content - * encrypted. + * This is for use by the filesystem's ->writepages() method. * - * Return: A page with the encrypted content on success. Else, an - * error value or NULL. + * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ -struct page *fscrypt_encrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t gfp_flags) +struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, + unsigned int len, + unsigned int offs, + gfp_t gfp_flags) { - struct fscrypt_ctx *ctx; - struct page *ciphertext_page = page; + const struct inode *inode = page->mapping->host; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + struct page *ciphertext_page; + u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + + (offs >> blockbits); + unsigned int i; int err; - BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0); + if (WARN_ON_ONCE(!PageLocked(page))) + return ERR_PTR(-EINVAL); - if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) { - /* with inplace-encryption we just encrypt the page */ - err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page, - ciphertext_page, len, offs, - gfp_flags); - if (err) - return ERR_PTR(err); + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + return ERR_PTR(-EINVAL); - return ciphertext_page; - } - - BUG_ON(!PageLocked(page)); - - ctx = fscrypt_get_ctx(gfp_flags); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - - /* The encryption operation will require a bounce page. */ - ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags); - if (IS_ERR(ciphertext_page)) - goto errout; + ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); + if (!ciphertext_page) + return ERR_PTR(-ENOMEM); - ctx->w.control_page = page; - err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, - page, ciphertext_page, len, offs, - gfp_flags); - if (err) { - ciphertext_page = ERR_PTR(err); - goto errout; + for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, + page, ciphertext_page, + blocksize, i, gfp_flags); + if (err) { + fscrypt_free_bounce_page(ciphertext_page); + return ERR_PTR(err); + } } SetPagePrivate(ciphertext_page); - set_page_private(ciphertext_page, (unsigned long)ctx); - lock_page(ciphertext_page); + set_page_private(ciphertext_page, (unsigned long)page); return ciphertext_page; +} +EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks); -errout: - fscrypt_release_ctx(ctx); - return ciphertext_page; +/** + * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place + * @inode: The inode to which this block belongs + * @page: The page containing the block to encrypt + * @len: Size of block to encrypt. Doesn't need to be a multiple of the + * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. + * @offs: Byte offset within @page at which the block to encrypt begins + * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based + * number of the block within the file + * @gfp_flags: Memory allocation flags + * + * Encrypt a possibly-compressed filesystem block that is located in an + * arbitrary page, not necessarily in the original pagecache page. The @inode + * and @lblk_num must be specified, as they can't be determined from @page. + * + * Return: 0 on success; -errno on failure + */ +int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page, + len, offs, gfp_flags); } -EXPORT_SYMBOL(fscrypt_encrypt_page); +EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); /** - * fscrypt_decrypt_page() - Decrypts a page in-place - * @inode: The corresponding inode for the page to decrypt. - * @page: The page to decrypt. Must be locked in case - * it is a writeback page (FS_CFLG_OWN_PAGES unset). - * @len: Number of bytes in @page to be decrypted. - * @offs: Start of data in @page. - * @lblk_num: Logical block number. + * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page + * @page: The locked pagecache page containing the block(s) to decrypt + * @len: Total size of the block(s) to decrypt. Must be a nonzero + * multiple of the filesystem's block size. + * @offs: Byte offset within @page of the first block to decrypt. Must be + * a multiple of the filesystem's block size. * - * Decrypts page in-place using the ctx encryption context. + * The specified block(s) are decrypted in-place within the pagecache page, + * which must still be locked and not uptodate. Normally, blocksize == + * PAGE_SIZE and the whole page is decrypted at once. * - * Called from the read completion callback. + * This is for use by the filesystem's ->readpages() method. * - * Return: Zero on success, non-zero otherwise. + * Return: 0 on success; -errno on failure */ -int fscrypt_decrypt_page(const struct inode *inode, struct page *page, - unsigned int len, unsigned int offs, u64 lblk_num) +int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, + unsigned int offs) { - if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES)) - BUG_ON(!PageLocked(page)); + const struct inode *inode = page->mapping->host; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + + (offs >> blockbits); + unsigned int i; + int err; + + if (WARN_ON_ONCE(!PageLocked(page))) + return -EINVAL; + + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + return -EINVAL; + + for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, + page, blocksize, i, GFP_NOFS); + if (err) + return err; + } + return 0; +} +EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks); - return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page, - len, offs, GFP_NOFS); +/** + * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place + * @inode: The inode to which this block belongs + * @page: The page containing the block to decrypt + * @len: Size of block to decrypt. Doesn't need to be a multiple of the + * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. + * @offs: Byte offset within @page at which the block to decrypt begins + * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based + * number of the block within the file + * + * Decrypt a possibly-compressed filesystem block that is located in an + * arbitrary page, not necessarily in the original pagecache page. The @inode + * and @lblk_num must be specified, as they can't be determined from @page. + * + * Return: 0 on success; -errno on failure + */ +int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, + len, offs, GFP_NOFS); } -EXPORT_SYMBOL(fscrypt_decrypt_page); +EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); /* * Validate dentries in encrypted directories to make sure we aren't potentially @@ -355,18 +398,6 @@ const struct dentry_operations fscrypt_d_ops = { .d_revalidate = fscrypt_d_revalidate, }; -void fscrypt_restore_control_page(struct page *page) -{ - struct fscrypt_ctx *ctx; - - ctx = (struct fscrypt_ctx *)page_private(page); - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); - unlock_page(page); - fscrypt_release_ctx(ctx); -} -EXPORT_SYMBOL(fscrypt_restore_control_page); - static void fscrypt_destroy(void) { struct fscrypt_ctx *pos, *n; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index eccea3d8f923..00d150ff3033 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -12,7 +12,6 @@ */ #include <linux/scatterlist.h> -#include <linux/ratelimit.h> #include <crypto/skcipher.h> #include "fscrypt_private.h" diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7da276159593..8978eec9d766 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -94,7 +94,6 @@ typedef enum { } fscrypt_direction_t; #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 static inline bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) @@ -117,14 +116,12 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); -extern int fscrypt_do_page_crypto(const struct inode *inode, - fscrypt_direction_t rw, u64 lblk_num, - struct page *src_page, - struct page *dest_page, - unsigned int len, unsigned int offs, - gfp_t gfp_flags); -extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, - gfp_t gfp_flags); +extern int fscrypt_crypt_block(const struct inode *inode, + fscrypt_direction_t rw, u64 lblk_num, + struct page *src_page, struct page *dest_page, + unsigned int len, unsigned int offs, + gfp_t gfp_flags); +extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index bd525f7573a4..c1d6715d88e9 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -5,7 +5,6 @@ * Encryption hooks for higher-level filesystem operations. */ -#include <linux/ratelimit.h> #include "fscrypt_private.h" /** diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index dcd91a3fbe49..207ebed918c1 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -12,7 +12,6 @@ #include <keys/user-type.h> #include <linux/hashtable.h> #include <linux/scatterlist.h> -#include <linux/ratelimit.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/sha.h> diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index d536889ac31b..4941fe8471ce 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -81,6 +81,8 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) if (ret == -ENODATA) { if (!S_ISDIR(inode->i_mode)) ret = -ENOTDIR; + else if (IS_DEADDIR(inode)) + ret = -ENOENT; else if (!inode->i_sb->s_cop->empty_dir(inode)) ret = -ENOTEMPTY; else @@ -720,12 +720,11 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + void *old; + dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); - } - - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -734,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas, * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ - void *old = dax_lock_entry(xas, new_entry); + old = dax_lock_entry(xas, new_entry); WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); entry = new_entry; @@ -1188,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, unsigned flags = 0; if (iov_iter_rw(iter) == WRITE) { - lockdep_assert_held_exclusive(&inode->i_rwsem); + lockdep_assert_held_write(&inode->i_rwsem); flags |= IOMAP_WRITE; } else { lockdep_assert_held(&inode->i_rwsem); diff --git a/fs/dcache.c b/fs/dcache.c index c435398f2c81..f41121e5d1ec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2372,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup); void d_delete(struct dentry * dentry) { struct inode *inode = dentry->d_inode; - int isdir = d_is_dir(dentry); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); @@ -2387,7 +2386,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); } - fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index ddd708b09fa1..93e4ca6b2ad7 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -997,25 +997,19 @@ static const struct file_operations u32_array_fops = { * @array as data. If the @mode variable is so set it can be read from. * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. - * - * The function returns a pointer to dentry on success. If an error occurs, - * %ERR_PTR(-ERROR) or NULL will be returned. If debugfs is not enabled in - * the kernel, the value %ERR_PTR(-ENODEV) will be returned. */ -struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, u32 elements) +void debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, u32 *array, u32 elements) { struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) - return NULL; + return; data->array = array; data->elements = elements; - return debugfs_create_file_unsafe(name, mode, parent, data, - &u32_array_fops); + debugfs_create_file_unsafe(name, mode, parent, data, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index acef14ad53db..042b688ed124 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -2,13 +2,16 @@ /* * inode.c - part of debugfs, a tiny little debug file system * - * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2004,2019 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. + * Copyright (C) 2019 Linux Foundation <gregkh@linuxfoundation.org> * * debugfs is for people to use instead of /proc or /sys. * See ./Documentation/core-api/kernel-api.rst for more details. */ +#define pr_fmt(fmt) "debugfs: " fmt + #include <linux/module.h> #include <linux/fs.h> #include <linux/mount.h> @@ -285,15 +288,17 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) struct dentry *dentry; int error; - pr_debug("debugfs: creating file '%s'\n",name); + pr_debug("creating file '%s'\n", name); if (IS_ERR(parent)) return parent; error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); - if (error) + if (error) { + pr_err("Unable to pin filesystem for file '%s'\n", name); return ERR_PTR(error); + } /* If the parent is not specified, we create it in the root. * We need the root dentry to do this, which is in the super @@ -306,6 +311,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) inode_lock(d_inode(parent)); dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry) && d_really_is_positive(dentry)) { + if (d_is_dir(dentry)) + pr_err("Directory '%s' with parent '%s' already present!\n", + name, parent->d_name.name); + else + pr_err("File '%s' in directory '%s' already present!\n", + name, parent->d_name.name); dput(dentry); dentry = ERR_PTR(-EEXIST); } @@ -349,8 +360,11 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, return dentry; inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) + if (unlikely(!inode)) { + pr_err("out of free dentries, can not create file '%s'\n", + name); return failed_creating(dentry); + } inode->i_mode = mode; inode->i_private = data; @@ -511,8 +525,11 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) return dentry; inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) + if (unlikely(!inode)) { + pr_err("out of free dentries, can not create directory '%s'\n", + name); return failed_creating(dentry); + } inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &simple_dir_inode_operations; @@ -550,8 +567,11 @@ struct dentry *debugfs_create_automount(const char *name, return dentry; inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) + if (unlikely(!inode)) { + pr_err("out of free dentries, can not create automount '%s'\n", + name); return failed_creating(dentry); + } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; @@ -606,6 +626,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { + pr_err("out of free dentries, can not create symlink '%s'\n", + name); kfree(link); return failed_creating(dentry); } @@ -617,13 +639,10 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, } EXPORT_SYMBOL_GPL(debugfs_create_symlink); -static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent) +static void __debugfs_file_removed(struct dentry *dentry) { struct debugfs_fsdata *fsd; - simple_unlink(d_inode(parent), dentry); - d_delete(dentry); - /* * Paired with the closing smp_mb() implied by a successful * cmpxchg() in debugfs_file_get(): either @@ -644,16 +663,18 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) if (simple_positive(dentry)) { dget(dentry); - if (!d_is_reg(dentry)) { - if (d_is_dir(dentry)) - ret = simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); + if (d_is_dir(dentry)) { + ret = simple_rmdir(d_inode(parent), dentry); if (!ret) - d_delete(dentry); + fsnotify_rmdir(d_inode(parent), dentry); } else { - __debugfs_remove_file(dentry, parent); + simple_unlink(d_inode(parent), dentry); + fsnotify_unlink(d_inode(parent), dentry); } + if (!ret) + d_delete(dentry); + if (d_is_reg(dentry)) + __debugfs_file_removed(dentry); dput(dentry); } return ret; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 2c14ae044dce..beeadca23b05 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -621,6 +621,7 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_delete(dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/direct-io.c b/fs/direct-io.c index ac7fb19b6ade..ae196784f487 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -538,8 +538,8 @@ static struct bio *dio_await_one(struct dio *dio) */ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) { - struct bio_vec *bvec; blk_status_t err = bio->bi_status; + bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty; if (err) { if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) @@ -548,19 +548,10 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) dio->io_error = -EIO; } - if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { + if (dio->is_async && should_dirty) { bio_check_pages_dirty(bio); /* transfers ownership */ } else { - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - if (dio->op == REQ_OP_READ && !PageCompound(page) && - dio->should_dirty) - set_page_dirty_lock(page); - put_page(page); - } + bio_release_pages(bio, should_dirty); bio_put(bio); } return err; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 4c2c85a223ac..afb8340918b8 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -158,6 +158,7 @@ static struct attribute *dlm_attrs[] = { &dlm_attr_recover_nodeid.attr, NULL, }; +ATTRIBUTE_GROUPS(dlm); static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -187,7 +188,7 @@ static const struct sysfs_ops dlm_attr_ops = { }; static struct kobj_type dlm_ktype = { - .default_attrs = dlm_attrs, + .default_groups = dlm_groups, .sysfs_ops = &dlm_attr_ops, .release = lockspace_kobj_release, }; diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index a3cc10b1bfe1..e9e27a271af0 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/efi.h> diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 8c6ab6c95727..96c0c86f3fff 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/efi.h> diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index b4505188e799..30ae44cb7453 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef EFIVAR_FS_INTERNAL_H #define EFIVAR_FS_INTERNAL_H diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 5b68e4294faa..5bc3c4a4c563 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/ctype.h> diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c6f513100cc9..4c74c768ae43 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, error = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, error == -EINTR); return error; } @@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, err = do_epoll_wait(epfd, events, maxevents, timeout); - restore_user_sigmask(sigmask, &sigsaved); + restore_user_sigmask(sigmask, &sigsaved, err == -EINTR); return err; } diff --git a/fs/exec.c b/fs/exec.c index 89a500bb897a..c71cbfe6826a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1663,7 +1663,7 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval < 0 && !bprm->mm) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return retval; } if (retval != -ENOEXEC || !bprm->file) { diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 33db13365c5e..547c165299c0 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1197,7 +1197,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) /* * Returns 1 if the passed-in block region is valid; 0 if some part overlaps - * with filesystem metadata blocksi. + * with filesystem metadata blocks. */ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, unsigned int count) @@ -1212,7 +1212,6 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, (start_blk + count >= sbi->s_sb_block)) return 0; - return 1; } diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a0c5ea91fcd4..fda7d3f5b4be 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -172,9 +172,7 @@ static void ext2_preread_inode(struct inode *inode) struct backing_dev_info *bdi; bdi = inode_to_bdi(inode); - if (bdi_read_congested(bdi)) - return; - if (bdi_write_congested(bdi)) + if (bdi_rw_congested(bdi)) return; block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); @@ -511,6 +509,7 @@ repeat_in_this_group: /* * Scanned all blockgroups. */ + brelse(bitmap_bh); err = -ENOSPC; goto fail; got: diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e474127dd255..7004ce581a32 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1400,7 +1400,7 @@ void ext2_set_file_ops(struct inode *inode) struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; - struct buffer_head * bh; + struct buffer_head * bh = NULL; struct ext2_inode *raw_inode; struct inode *inode; long ret = -EIO; @@ -1446,7 +1446,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) */ if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ - brelse (bh); ret = -ESTALE; goto bad_inode; } @@ -1463,7 +1462,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) { ext2_error(sb, "ext2_iget", "bad extended attribute block %u", ei->i_file_acl); - brelse(bh); ret = -EFSCORRUPTED; goto bad_inode; } @@ -1526,6 +1524,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) return inode; bad_inode: + brelse(bh); iget_failed(inode); return ERR_PTR(ret); } @@ -1640,7 +1639,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) } int ext2_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_falgs) + u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct ext2_inode_info *ei = EXT2_I(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 1d7ab73b1014..44eb6e7eb492 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -303,16 +303,16 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_XIP) + if (test_opt(sb, XIP)) seq_puts(seq, ",xip"); - if (sbi->s_mount_opt & EXT2_MOUNT_DAX) + if (test_opt(sb, DAX)) seq_puts(seq, ",dax"); if (!test_opt(sb, RESERVATION)) @@ -935,8 +935,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resgid = opts.s_resgid; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? - SB_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); sb->s_iflags |= SB_I_CGROUPWB; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && @@ -967,11 +966,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { + if (test_opt(sb, DAX)) { if (!bdev_dax_supported(sb->s_bdev, blocksize)) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); - sbi->s_mount_opt &= ~EXT2_MOUNT_DAX; + clear_opt(sbi->s_mount_opt, DAX); } } @@ -1404,7 +1403,7 @@ out_set: sbi->s_resuid = new_opts.s_resuid; sbi->s_resgid = new_opts.s_resgid; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); spin_unlock(&sbi->s_lock); return 0; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 1e33e0ac8cf1..79369c13cc55 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -134,6 +134,53 @@ ext2_xattr_handler(int name_index) return handler; } +static bool +ext2_xattr_header_valid(struct ext2_xattr_header *header) +{ + if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + header->h_blocks != cpu_to_le32(1)) + return false; + + return true; +} + +static bool +ext2_xattr_entry_valid(struct ext2_xattr_entry *entry, + char *end, size_t end_offs) +{ + struct ext2_xattr_entry *next; + size_t size; + + next = EXT2_XATTR_NEXT(entry); + if ((char *)next >= end) + return false; + + if (entry->e_value_block != 0) + return false; + + size = le32_to_cpu(entry->e_value_size); + if (size > end_offs || + le16_to_cpu(entry->e_value_offs) + size > end_offs) + return false; + + return true; +} + +static int +ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name, + struct ext2_xattr_entry *entry) +{ + int cmp; + + cmp = name_index - entry->e_name_index; + if (!cmp) + cmp = name_len - entry->e_name_len; + if (!cmp) + cmp = memcmp(name, entry->e_name, name_len); + + return cmp; +} + /* * ext2_xattr_get() * @@ -152,7 +199,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, struct ext2_xattr_entry *entry; size_t name_len, size; char *end; - int error; + int error, not_found; struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", @@ -176,9 +223,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_get", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -188,29 +235,25 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", /* find named attribute */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = - EXT2_XATTR_NEXT(entry); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - if (name_index == entry->e_name_index && - name_len == entry->e_name_len && - memcmp(name, entry->e_name, name_len) == 0) + + not_found = ext2_xattr_cmp_entry(name_index, name_len, name, + entry); + if (!not_found) goto found; - entry = next; + if (not_found < 0) + break; + + entry = EXT2_XATTR_NEXT(entry); } if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); error = -ENODATA; goto cleanup; found: - /* check the buffer size */ - if (entry->e_value_block != 0) - goto bad_block; size = le32_to_cpu(entry->e_value_size); - if (size > inode->i_sb->s_blocksize || - le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) - goto bad_block; - if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); if (buffer) { @@ -266,9 +309,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_list", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -278,11 +321,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", /* check the on-disk data structure */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry); - - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - entry = next; + entry = EXT2_XATTR_NEXT(entry); } if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); @@ -367,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct ext2_xattr_header *header = NULL; - struct ext2_xattr_entry *here, *last; + struct ext2_xattr_entry *here = NULL, *last = NULL; size_t name_len, free, min_offs = sb->s_blocksize; int not_found = 1, error; char *end; @@ -406,47 +448,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, le32_to_cpu(HDR(bh)->h_refcount)); header = HDR(bh); end = bh->b_data + bh->b_size; - if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - header->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(sb, "ext2_xattr_set", + if (!ext2_xattr_header_valid(header)) { +bad_block: + ext2_error(sb, "ext2_xattr_set", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; goto cleanup; } - /* Find the named attribute. */ - here = FIRST_ENTRY(bh); - while (!IS_LAST_ENTRY(here)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); - if ((char *)next >= end) - goto bad_block; - if (!here->e_value_block && here->e_value_size) { - size_t offs = le16_to_cpu(here->e_value_offs); - if (offs < min_offs) - min_offs = offs; - } - not_found = name_index - here->e_name_index; - if (!not_found) - not_found = name_len - here->e_name_len; - if (!not_found) - not_found = memcmp(name, here->e_name,name_len); - if (not_found <= 0) - break; - here = next; - } - last = here; - /* We still need to compute min_offs and last. */ + /* + * Find the named attribute. If not found, 'here' will point + * to entry where the new attribute should be inserted to + * maintain sorting. + */ + last = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(last)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize)) goto bad_block; - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; } - last = next; + if (not_found > 0) { + not_found = ext2_xattr_cmp_entry(name_index, + name_len, + name, last); + if (not_found <= 0) + here = last; + } + last = EXT2_XATTR_NEXT(last); } + if (not_found > 0) + here = last; /* Check whether we have enough space left. */ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); @@ -454,7 +488,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* We will use a new extended attribute block. */ free = sb->s_blocksize - sizeof(struct ext2_xattr_header) - sizeof(__u32); - here = last = NULL; /* avoid gcc uninitialized warning. */ } if (not_found) { @@ -470,14 +503,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", error = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; - if (!here->e_value_block && here->e_value_size) { - size_t size = le32_to_cpu(here->e_value_size); - - if (le16_to_cpu(here->e_value_offs) + size > - sb->s_blocksize || size > sb->s_blocksize) - goto bad_block; - free += EXT2_XATTR_SIZE(size); - } + free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size)); free += EXT2_XATTR_LEN(name_len); } error = -ENOSPC; @@ -506,11 +532,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set", unlock_buffer(bh); ea_bdebug(bh, "cloning"); - header = kmalloc(bh->b_size, GFP_KERNEL); + header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); error = -ENOMEM; if (header == NULL) goto cleanup; - memcpy(header, HDR(bh), bh->b_size); header->h_refcount = cpu_to_le32(1); offset = (char *)here - bh->b_data; @@ -542,7 +567,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", here->e_name_len = name_len; memcpy(here->e_name, name, name_len); } else { - if (!here->e_value_block && here->e_value_size) { + if (here->e_value_size) { char *first_val = (char *)header + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; @@ -569,7 +594,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", last = ENTRY(header+1); while (!IS_LAST_ENTRY(last)) { size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && o < offs) + if (o < offs) last->e_value_offs = cpu_to_le16(o + size); last = EXT2_XATTR_NEXT(last); @@ -784,8 +809,7 @@ ext2_xattr_delete_inode(struct inode *inode) goto cleanup; } ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { + if (!ext2_xattr_header_valid(HDR(bh))) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e5d6ee61ff48..0b202e00d93f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, } /** - * ext4_should_retry_alloc() + * ext4_should_retry_alloc() - check if a block allocation should be retried * @sb: super block - * @retries number of attemps has been made + * @retries: number of attemps has been made * * ext4_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index c7843b149a1e..86054f31fe4d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -33,6 +33,9 @@ static int ext4_dx_readdir(struct file *, struct dir_context *); /** + * is_dx_dir() - check if a directory is using htree indexing + * @inode: directory inode + * * Check if the given dir-inode refers to an htree-indexed directory * (or a directory which could potentially get converted to use htree * indexing). @@ -109,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; - int dir_has_error = 0; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); if (IS_ENCRYPTED(inode)) { @@ -145,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return err; } - offset = ctx->pos & (sb->s_blocksize - 1); - while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; @@ -155,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto errout; } cond_resched(); + offset = ctx->pos & (sb->s_blocksize - 1); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); + if (err == 0) { + /* m_len should never be zero but let's avoid + * an infinite loop if it somehow is */ + if (map.m_len == 0) + map.m_len = 1; + ctx->pos += map.m_len * sb->s_blocksize; + continue; + } if (err > 0) { pgoff_t index = map.m_pblk >> (PAGE_SHIFT - inode->i_blkbits); @@ -176,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } if (!bh) { - if (!dir_has_error) { - EXT4_ERROR_FILE(file, 0, - "directory contains a " - "hole at offset %llu", - (unsigned long long) ctx->pos); - dir_has_error = 1; - } /* corrupt size? Maybe no more blocks to read */ if (ctx->pos > inode->i_blocks << 9) break; @@ -192,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) /* Check the checksum */ if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { + !ext4_dirblock_csum_verify(inode, bh)) { EXT4_ERROR_FILE(file, 0, "directory fails checksum " "at offset %llu", (unsigned long long)ctx->pos); @@ -674,7 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, return memcmp(str, name->name, len); } - return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr); + return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); } static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1cb67859e051..bf660aa7a9e0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -421,7 +421,8 @@ struct flex_groups { EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL)) +#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ + EXT4_PROJINHERIT_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) @@ -2077,6 +2078,9 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif +#ifdef CONFIG_UNICODE + struct fscrypt_str cf_name; +#endif }; #define fname_name(p) ((p)->disk_name.name) @@ -2302,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); +#ifdef CONFIG_UNICODE +extern void ext4_fname_setup_ci_filename(struct inode *dir, + const struct qstr *iname, + struct fscrypt_str *fname); +#endif + #ifdef CONFIG_FS_ENCRYPTION static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, const struct fscrypt_name *src) @@ -2328,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif return 0; } @@ -2343,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name); +#endif return 0; } @@ -2356,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->crypto_buf.name = NULL; fname->usr_fname = NULL; fname->disk_name.name = NULL; + +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif } #else /* !CONFIG_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, @@ -2366,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif + return 0; } @@ -2376,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname); } -static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } +static inline void ext4_fname_free_filename(struct ext4_filename *fname) +{ +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} #endif /* !CONFIG_FS_ENCRYPTION */ /* dir.c */ @@ -2568,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *); extern int ext4_ind_migrate(struct inode *inode); /* namei.c */ -extern int ext4_dirent_csum_verify(struct inode *inode, - struct ext4_dir_entry *dirent); +extern int ext4_dirblock_csum_verify(struct inode *inode, + struct buffer_head *bh); extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, @@ -3070,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle, extern int ext4_read_inline_dir(struct file *filp, struct dir_context *ctx, int *has_inline_data); -extern int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data); +extern int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data); extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, @@ -3113,14 +3147,13 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, struct ext4_dir_entry_2 *de, int blocksize, int csum_size, unsigned int parent_ino, int dotdot_real_len); -extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize); -extern int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh); +extern void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize); +extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, + struct buffer_head *bh); extern int ext4_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry); + const struct qstr *fname, + const struct qstr *entry, bool quick); #define S_SHIFT 12 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 75a5309f2231..ef8fcf7d0d3b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal) } static inline int ext4_jbd2_inode_add_write(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_write(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_write(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline int ext4_jbd2_inode_add_wait(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_wait(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_wait(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d40ed940001e..92266a2da7d6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5676,8 +5676,8 @@ out_mutex: } /** - * ext4_swap_extents - Swap extents between two inodes - * + * ext4_swap_extents() - Swap extents between two inodes + * @handle: handle for this transaction * @inode1: First inode * @inode2: Second inode * @lblk1: Start block for first inode diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 023a3eb3afa3..7521de2dcf3a 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); if (!es) goto out_wrap; - node = &es->rb_node; while (*nr_to_scan > 0) { if (es->es_lblk > end) { ei->i_es_shrink_lblk = end + 1; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2c5baa5e8291..f4a24a46245e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret <= 0) return ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 2024d3fa5504..36699a131168 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, } /** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. + * ext4_alloc_branch() - allocate and set up a chain of blocks + * @handle: handle for this transaction + * @ar: structure describing the allocation request + * @indirect_blks: number of allocated indirect blocks + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. * * This function allocates blocks, zeroes out all but the last one, * links them into chain and (if we are synchronous) writes them to disk. @@ -396,15 +394,11 @@ failed: } /** - * ext4_splice_branch - splice the allocated branch onto inode. + * ext4_splice_branch() - splice the allocated branch onto inode. * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) + * @ar: structure describing the allocation request * @where: location of missing link * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding * * This function fills the missing link and does all housekeeping needed in * inode (->i_blocks, etc.). In case of success we end up with the full diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f73bc3925282..88cdf3c90bd1 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, { int err, csum_size = 0, header_size = 0; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; void *target = dir_block->b_data; /* @@ -1158,13 +1157,11 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, inode->i_sb->s_blocksize - csum_size); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, - inode->i_sb->s_blocksize); - initialize_dirent_tail(t, inode->i_sb->s_blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(dir_block, + inode->i_sb->s_blocksize); set_buffer_uptodate(dir_block); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) return err; set_buffer_verified(dir_block); @@ -1327,11 +1324,11 @@ out: * inlined dir. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ -int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data) +int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data) { int err = 0, count = 0; unsigned int parent_ino; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7f77c643008..420fe3deed39 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -731,10 +731,16 @@ out_sem: !(flags & EXT4_GET_BLOCKS_ZERO) && !ext4_is_quota_file(inode) && ext4_should_order_data(inode)) { + loff_t start_byte = + (loff_t)map->m_lblk << inode->i_blkbits; + loff_t length = (loff_t)map->m_len << inode->i_blkbits; + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) - ret = ext4_jbd2_inode_add_wait(handle, inode); + ret = ext4_jbd2_inode_add_wait(handle, inode, + start_byte, length); else - ret = ext4_jbd2_inode_add_write(handle, inode); + ret = ext4_jbd2_inode_add_write(handle, inode, + start_byte, length); if (ret) return ret; } @@ -1164,8 +1170,9 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, int err = 0; unsigned blocksize = inode->i_sb->s_blocksize; unsigned bbits; - struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; - bool decrypt = false; + struct buffer_head *bh, *head, *wait[2]; + int nr_wait = 0; + int i; BUG_ON(!PageLocked(page)); BUG_ON(from > PAGE_SIZE); @@ -1217,23 +1224,32 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, !buffer_unwritten(bh) && (block_start < from || block_end > to)) { ll_rw_block(REQ_OP_READ, 0, 1, &bh); - *wait_bh++ = bh; - decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); + wait[nr_wait++] = bh; } } /* * If we issued read requests, let them complete. */ - while (wait_bh > wait) { - wait_on_buffer(*--wait_bh); - if (!buffer_uptodate(*wait_bh)) + for (i = 0; i < nr_wait; i++) { + wait_on_buffer(wait[i]); + if (!buffer_uptodate(wait[i])) err = -EIO; } - if (unlikely(err)) + if (unlikely(err)) { page_zero_new_buffers(page, from, to); - else if (decrypt) - err = fscrypt_decrypt_page(page->mapping->host, page, - PAGE_SIZE, 0, page->index); + } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) { + for (i = 0; i < nr_wait; i++) { + int err2; + + err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize, + bh_offset(wait[i])); + if (err2) { + clear_buffer_uptodate(wait[i]); + err = err2; + } + } + } + return err; } #endif @@ -4065,9 +4081,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) { /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); - BUG_ON(blocksize != PAGE_SIZE); - WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, - page, PAGE_SIZE, 0, page->index)); + WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks( + page, blocksize, bh_offset(bh))); } } if (ext4_should_journal_data(inode)) { @@ -4085,7 +4100,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, err = 0; mark_buffer_dirty(bh); if (ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode); + err = ext4_jbd2_inode_add_write(handle, inode, from, + length); } unlock: @@ -4570,6 +4586,7 @@ static int __ext4_get_inode_loc(struct inode *inode, struct buffer_head *bh; struct super_block *sb = inode->i_sb; ext4_fsblk_t block; + struct blk_plug plug; int inodes_per_block, inode_offset; iloc->bh = NULL; @@ -4658,6 +4675,7 @@ make_io: * If we need to do any I/O, try to pre-readahead extra * blocks from the inode table. */ + blk_start_plug(&plug); if (EXT4_SB(sb)->s_inode_readahead_blks) { ext4_fsblk_t b, end, table; unsigned num; @@ -4688,6 +4706,7 @@ make_io: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + blk_finish_plug(&plug); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, @@ -5520,6 +5539,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (unlikely(IS_APPEND(inode) && + (ia_valid & (ATTR_MODE | ATTR_UID | + ATTR_GID | ATTR_TIMES_SET)))) + return -EPERM; + error = setattr_prepare(dentry, attr); if (error) return error; @@ -5571,7 +5598,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; - int shrink = (attr->ia_size <= inode->i_size); + int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -5585,18 +5612,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) inode_inc_iversion(inode); - if (ext4_should_order_data(inode) && - (attr->ia_size < inode->i_size)) { - error = ext4_begin_ordered_truncate(inode, + if (shrink) { + if (ext4_should_order_data(inode)) { + error = ext4_begin_ordered_truncate(inode, attr->ia_size); - if (error) - goto err_out; + if (error) + goto err_out; + } + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. + */ + inode_dio_wait(inode); + } + + down_write(&EXT4_I(inode)->i_mmap_sem); + + rc = ext4_break_layouts(inode); + if (rc) { + up_write(&EXT4_I(inode)->i_mmap_sem); + return rc; } + if (attr->ia_size != inode->i_size) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); if (IS_ERR(handle)) { error = PTR_ERR(handle); - goto err_out; + goto out_mmap_sem; } if (ext4_handle_valid(handle) && shrink) { error = ext4_orphan_add(handle, inode); @@ -5624,42 +5666,31 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) i_size_write(inode, attr->ia_size); up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); - if (error) { - if (orphan && inode->i_nlink) - ext4_orphan_del(NULL, inode); - goto err_out; + if (error) + goto out_mmap_sem; + if (!shrink) { + pagecache_isize_extended(inode, oldsize, + inode->i_size); + } else if (ext4_should_journal_data(inode)) { + ext4_wait_for_tail_page_commit(inode); } } - if (!shrink) { - pagecache_isize_extended(inode, oldsize, inode->i_size); - } else { - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. - */ - inode_dio_wait(inode); - } - if (orphan && ext4_should_journal_data(inode)) - ext4_wait_for_tail_page_commit(inode); - down_write(&EXT4_I(inode)->i_mmap_sem); - - rc = ext4_break_layouts(inode); - if (rc) { - up_write(&EXT4_I(inode)->i_mmap_sem); - error = rc; - goto err_out; - } /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. */ truncate_pagecache(inode, inode->i_size); - if (shrink) { + /* + * Call ext4_truncate() even if i_size didn't change to + * truncate possible preallocated blocks. + */ + if (attr->ia_size <= oldsize) { rc = ext4_truncate(inode); if (rc) error = rc; } +out_mmap_sem: up_write(&EXT4_I(inode)->i_mmap_sem); } @@ -6190,6 +6221,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) get_block_t *get_block; int retries = 0; + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 566dfac28b3f..442f7ef873fc 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16]) } #endif +/* + * If immutable is set and we are not clearing it, we're not allowed to change + * anything else in the inode. Don't error out if we're only trying to set + * immutable on an immutable file. + */ +static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, + unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int oldflags = ei->i_flags; + + if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) + return 0; + + if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) + return -EPERM; + if (ext4_has_feature_project(inode->i_sb) && + __kprojid_val(ei->i_projid) != new_projid) + return -EPERM; + + return 0; +} + static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { @@ -333,6 +356,20 @@ static int ext4_ioctl_setflags(struct inode *inode, } } + /* + * Wait for all pending directio and then flush all the dirty pages + * for this file. The flush marks all the pages readonly, so any + * subsequent attempt to write to the file (particularly mmap pages) + * will come through the filesystem and fail. + */ + if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && + (flags & EXT4_IMMUTABLE_FL)) { + inode_dio_wait(inode); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto flags_out; + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -722,6 +759,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ext4_ioc_getfsmap(sb, (void __user *)arg); case EXT4_IOC_GETFLAGS: flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + if (S_ISREG(inode->i_mode)) + flags &= ~EXT4_PROJINHERIT_FL; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { int err; @@ -749,7 +788,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return err; inode_lock(inode); - err = ext4_ioctl_setflags(inode, flags); + err = ext4_ioctl_check_immutable(inode, + from_kprojid(&init_user_ns, ei->i_projid), + flags); + if (!err) + err = ext4_ioctl_setflags(inode, flags); inode_unlock(inode); mnt_drop_write_file(filp); return err; @@ -1114,6 +1157,9 @@ resizefs_out: goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | (flags & EXT4_FL_XFLAG_VISIBLE); + err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags); + if (err) + goto out; err = ext4_ioctl_setflags(inode, flags); if (err) goto out; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99ba720dbb7a..a3e2767bdf2f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * ext4_free_blocks() -- Free given blocks and update quota * @handle: handle for this transaction * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1083a9f3f16a..30ce3dc69378 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -13,11 +13,10 @@ #include "ext4_extents.h" /** - * get_ext_path - Find an extent path for designated logical block number. - * - * @inode: an inode which is searched + * get_ext_path() - Find an extent path for designated logical block number. + * @inode: inode to be searched * @lblock: logical block number to find an extent path - * @path: pointer to an extent path pointer (for output) + * @ppath: pointer to an extent path pointer (for output) * * ext4_find_extent wrapper. Return 0 on success, or a negative error value * on failure. @@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, } /** - * ext4_double_down_write_data_sem - Acquire two inodes' write lock - * of i_data_sem + * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem + * @first: inode to be locked + * @second: inode to be locked * * Acquire write lock of i_data_sem of the two inodes */ @@ -390,7 +390,8 @@ data_copy: /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ - *err = ext4_jbd2_inode_add_write(handle, orig_inode); + *err = ext4_jbd2_inode_add_write(handle, orig_inode, + (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); unlock_pages: unlock_page(pagep[0]); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cd01c4a67ffb..129029534075 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -82,8 +82,18 @@ static struct buffer_head *ext4_append(handle_t *handle, static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); +/* + * Hints to ext4_read_dirblock regarding whether we expect a directory + * block being read to be an index block, or a block containing + * directory entries (and if the latter, whether it was found via a + * logical block in an htree index block). This is used to control + * what sort of sanity checkinig ext4_read_dirblock() will do on the + * directory block read from the storage device. EITHER will means + * the caller doesn't know what kind of directory block will be read, + * so no specific verification will be done. + */ typedef enum { - EITHER, INDEX, DIRENT + EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ @@ -109,11 +119,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } - if (!bh) { + if (!bh && (type == INDEX || type == DIRENT_HTREE)) { ext4_error_inode(inode, func, line, block, - "Directory hole found"); + "Directory hole found for htree %s block", + (type == INDEX) ? "index" : "leaf"); return ERR_PTR(-EFSCORRUPTED); } + if (!bh) + return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { @@ -150,7 +163,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, } } if (!is_dx_block) { - if (ext4_dirent_csum_verify(inode, dirent)) + if (ext4_dirblock_csum_verify(inode, bh)) set_buffer_verified(bh); else { ext4_error_inode(inode, func, line, block, @@ -280,9 +293,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); /* checksumming functions */ -void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize) +void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize) { + struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); + memset(t, 0, sizeof(struct ext4_dir_entry_tail)); t->det_rec_len = ext4_rec_len_to_disk( sizeof(struct ext4_dir_entry_tail), blocksize); @@ -291,17 +306,17 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, /* Walk through a dirent block to find a checksum "dirent" at the tail */ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, - struct ext4_dir_entry *de) + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; #ifdef PARANOID struct ext4_dir_entry *d, *top; - d = de; - top = (struct ext4_dir_entry *)(((void *)de) + + d = (struct ext4_dir_entry *)bh->b_data; + top = (struct ext4_dir_entry *)(bh->b_data + (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); + sizeof(struct ext4_dir_entry_tail))); while (d < top && d->rec_len) d = (struct ext4_dir_entry *)(((void *)d) + le16_to_cpu(d->rec_len)); @@ -311,7 +326,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, t = (struct ext4_dir_entry_tail *)d; #else - t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); + t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb)); #endif if (t->det_reserved_zero1 || @@ -323,8 +338,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, return t; } -static __le32 ext4_dirent_csum(struct inode *inode, - struct ext4_dir_entry *dirent, int size) +static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); @@ -344,49 +358,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func, "No space for directory leaf checksum. Please run e2fsck -D."); } -int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) +int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return 1; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return 0; } - if (t->det_checksum != ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent)) + if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data)) return 0; return 1; } -static void ext4_dirent_csum_set(struct inode *inode, - struct ext4_dir_entry *dirent) +static void ext4_dirblock_csum_set(struct inode *inode, + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return; } - t->det_checksum = ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent); + t->det_checksum = ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data); } -int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) +int ext4_handle_dirty_dirblock(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) { - ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); + ext4_dirblock_csum_set(inode, bh); return ext4_handle_dirty_metadata(handle, inode, bh); } @@ -980,7 +994,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1090,10 +1104,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - count = htree_inlinedir_to_tree(dir_file, dir, 0, - &hinfo, start_hash, - start_minor_hash, - &has_inline_data); + count = ext4_inlinedir_to_tree(dir_file, dir, 0, + &hinfo, start_hash, + start_minor_hash, + &has_inline_data); if (has_inline_data) { *next_hash = ~0; return count; @@ -1259,19 +1273,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) #ifdef CONFIG_UNICODE /* * Test whether a case-insensitive directory entry matches the filename - * being searched for. + * being searched for. If quick is set, assume the name being looked up + * is already in the casefolded form. * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry) + const struct qstr *entry, bool quick) { const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); const struct unicode_map *um = sbi->s_encoding; int ret; - ret = utf8_strncasecmp(um, name, entry); + if (quick) + ret = utf8_strncasecmp_folded(um, name, entry); + else + ret = utf8_strncasecmp(um, name, entry); + if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. @@ -1287,6 +1306,32 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, return ret; } + +void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, + struct fscrypt_str *cf_name) +{ + int len; + + if (!IS_CASEFOLDED(dir)) { + cf_name->name = NULL; + return; + } + + cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS); + if (!cf_name->name) + return; + + len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + iname, cf_name->name, + EXT4_NAME_LEN); + if (len <= 0) { + kfree(cf_name->name); + cf_name->name = NULL; + return; + } + cf_name->len = (unsigned) len; + +} #endif /* @@ -1313,8 +1358,15 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) - return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0); + if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (fname->cf_name.name) { + struct qstr cf = {.name = fname->cf_name.name, + .len = fname->cf_name.len}; + return !ext4_ci_compare(parent, &cf, &entry, true); + } + return !ext4_ci_compare(parent, fname->usr_fname, &entry, + false); + } #endif return fscrypt_match_name(&f, de->name, de->name_len); @@ -1484,8 +1536,7 @@ restart: if (!buffer_verified(bh) && !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { + !ext4_dirblock_csum_verify(dir, bh)) { EXT4_ERROR_INODE(dir, "checksumming directory " "block %lu", (unsigned long)block); brelse(bh); @@ -1586,7 +1637,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; @@ -1769,7 +1820,6 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; - struct ext4_dir_entry_tail *t; int csum_size = 0; int err = 0, i; @@ -1830,11 +1880,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, (char *) de2, blocksize); if (csum_size) { - t = EXT4_DIRENT_TAIL(data2, blocksize); - initialize_dirent_tail(t, blocksize); - - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); + ext4_initialize_dirent_tail(*bh, blocksize); + ext4_initialize_dirent_tail(bh2, blocksize); } dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1, @@ -1848,7 +1895,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); + err = ext4_handle_dirty_dirblock(handle, dir, bh2); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); @@ -1976,7 +2023,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, inode_inc_iversion(dir); ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); return 0; @@ -1995,8 +2042,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; - struct ext4_dir_entry_tail *t; - char *data1, *top; + char *data2, *top; unsigned len; int retval; unsigned blocksize; @@ -2036,21 +2082,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, return PTR_ERR(bh2); } ext4_set_inode_flag(dir, EXT4_INODE_INDEX); - data1 = bh2->b_data; + data2 = bh2->b_data; - memcpy (data1, de, len); - de = (struct ext4_dir_entry_2 *) data1; - top = data1 + len; + memcpy(data2, de, len); + de = (struct ext4_dir_entry_2 *) data2; + top = data2 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, - blocksize); + de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh2, blocksize); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); @@ -2080,7 +2123,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (retval) goto out_frames; - retval = ext4_handle_dirty_dirent_node(handle, dir, bh2); + retval = ext4_handle_dirty_dirblock(handle, dir, bh2); if (retval) goto out_frames; @@ -2120,7 +2163,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; struct super_block *sb; struct ext4_sb_info *sbi; struct ext4_filename fname; @@ -2170,6 +2212,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); + if (bh == NULL) { + bh = ext4_bread(handle, dir, block, + EXT4_GET_BLOCKS_CREATE); + goto add_to_new_block; + } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2190,6 +2237,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, brelse(bh); } bh = ext4_append(handle, dir, &block); +add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2199,10 +2247,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh, blocksize); retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh); out: @@ -2234,7 +2280,7 @@ again: return PTR_ERR(frame); entries = frame->entries; at = frame->at; - bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; @@ -2460,7 +2506,7 @@ static int ext4_delete_entry(handle_t *handle, goto out; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (unlikely(err)) goto out; @@ -2662,7 +2708,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, { struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; ext4_lblk_t block = 0; unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; @@ -2686,13 +2731,11 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, de = (struct ext4_dir_entry_2 *)dir_block->b_data; ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); set_nlink(inode, 2); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(dir_block, blocksize); BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) goto out; set_buffer_verified(dir_block); @@ -2782,7 +2825,10 @@ bool ext4_empty_dir(struct inode *inode) EXT4_ERROR_INODE(inode, "invalid size"); return true; } - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, + * so treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) return true; @@ -2804,6 +2850,10 @@ bool ext4_empty_dir(struct inode *inode) brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); + if (bh == NULL) { + offset += sb->s_blocksize; + continue; + } if (IS_ERR(bh)) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; @@ -3369,7 +3419,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, so + * treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; @@ -3430,9 +3483,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, ent->inode, ent->dir_bh); } else { - retval = ext4_handle_dirty_dirent_node(handle, - ent->inode, - ent->dir_bh); + retval = ext4_handle_dirty_dirblock(handle, ent->inode, + ent->dir_bh); } } else { retval = ext4_mark_inode_dirty(handle, ent->inode); @@ -3462,8 +3514,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { - retval = ext4_handle_dirty_dirent_node(handle, - ent->dir, ent->bh); + retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); if (unlikely(retval)) { ext4_std_error(ent->dir->i_sb, retval); return retval; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4690618a92e9..a18a47a2a1d1 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -66,9 +66,7 @@ static void ext4_finish_bio(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; -#ifdef CONFIG_FS_ENCRYPTION - struct page *data_page = NULL; -#endif + struct page *bounce_page = NULL; struct buffer_head *bh, *head; unsigned bio_start = bvec->bv_offset; unsigned bio_end = bio_start + bvec->bv_len; @@ -78,13 +76,10 @@ static void ext4_finish_bio(struct bio *bio) if (!page) continue; -#ifdef CONFIG_FS_ENCRYPTION - if (!page->mapping) { - /* The bounce data pages are unmapped. */ - data_page = page; - fscrypt_pullback_bio_page(&page, false); + if (fscrypt_is_bounce_page(page)) { + bounce_page = page; + page = fscrypt_pagecache_page(bounce_page); } -#endif if (bio->bi_status) { SetPageError(page); @@ -111,10 +106,7 @@ static void ext4_finish_bio(struct bio *bio) bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); local_irq_restore(flags); if (!under_io) { -#ifdef CONFIG_FS_ENCRYPTION - if (data_page) - fscrypt_restore_control_page(data_page); -#endif + fscrypt_free_bounce_page(bounce_page); end_page_writeback(page); } } @@ -415,7 +407,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct writeback_control *wbc, bool keep_towrite) { - struct page *data_page = NULL; + struct page *bounce_page = NULL; struct inode *inode = page->mapping->host; unsigned block_start; struct buffer_head *bh, *head; @@ -475,14 +467,22 @@ int ext4_bio_write_page(struct ext4_io_submit *io, bh = head = page_buffers(page); + /* + * If any blocks are being written to an encrypted file, encrypt them + * into a bounce page. For simplicity, just encrypt until the last + * block which might be needed. This may cause some unneeded blocks + * (e.g. holes) to be unnecessarily encrypted, but this is rare and + * can't happen in the common case of blocksize == PAGE_SIZE. + */ if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; + unsigned int enc_bytes = round_up(len, i_blocksize(inode)); retry_encrypt: - data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, - page->index, gfp_flags); - if (IS_ERR(data_page)) { - ret = PTR_ERR(data_page); + bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, + 0, gfp_flags); + if (IS_ERR(bounce_page)) { + ret = PTR_ERR(bounce_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { if (io->io_bio) { ext4_io_submit(io); @@ -491,7 +491,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } - data_page = NULL; + bounce_page = NULL; goto out; } } @@ -500,8 +500,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { if (!buffer_async_write(bh)) continue; - ret = io_submit_add_bh(io, inode, - data_page ? data_page : page, bh); + ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh); if (ret) { /* * We only get here on ENOMEM. Not much else @@ -517,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, /* Error stopped previous loop? Clean up buffers... */ if (ret) { out: - if (data_page) - fscrypt_restore_control_page(data_page); + fscrypt_free_bounce_page(bounce_page); printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); redirty_page_for_writepage(wbc, page); do { diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 04b4f53f0659..b3cd7655a6ff 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(journal_task), NULL, }; +ATTRIBUTE_GROUPS(ext4); /* Features this copy of ext4 supports */ EXT4_ATTR_FEATURE(lazy_itable_init); @@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(metadata_csum_seed), NULL, }; +ATTRIBUTE_GROUPS(ext4_feat); static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) { @@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = { }; static struct kobj_type ext4_sb_ktype = { - .default_attrs = ext4_attrs, + .default_groups = ext4_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_sb_release, }; static struct kobj_type ext4_feat_ktype = { - .default_attrs = ext4_feat_attrs, + .default_groups = ext4_feat_groups, .sysfs_ops = &ext4_attr_ops, .release = (void (*)(struct kobject *))kfree, }; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eda4181d2092..a546ac8685ea 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -185,7 +185,7 @@ static void f2fs_write_end_io(struct bio *bio) continue; } - fscrypt_pullback_bio_page(&page, true); + fscrypt_finalize_bounce_page(&page); if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); @@ -362,10 +362,9 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, bio_for_each_segment_all(bvec, io->bio, iter_all) { - if (bvec->bv_page->mapping) - target = bvec->bv_page; - else - target = fscrypt_control_page(bvec->bv_page); + target = bvec->bv_page; + if (fscrypt_is_bounce_page(target)) + target = fscrypt_pagecache_page(target); if (inode && inode == target->mapping->host) return true; @@ -1727,8 +1726,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - PAGE_SIZE, 0, fio->page->index, gfp_flags); + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, + PAGE_SIZE, 0, + gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { @@ -1900,8 +1900,7 @@ got_it: err = f2fs_inplace_write_data(fio); if (err) { if (f2fs_encrypted_file(inode)) - fscrypt_pullback_bio_page(&fio->encrypted_page, - true); + fscrypt_finalize_bounce_page(&fio->encrypted_page); if (PageWriteback(page)) end_page_writeback(page); } else { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 729f46a3c9ee..5c85166677d4 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -501,6 +501,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(current_reserved_blocks), NULL, }; +ATTRIBUTE_GROUPS(f2fs); static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION @@ -520,6 +521,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(sb_checksum), NULL, }; +ATTRIBUTE_GROUPS(f2fs_feat); static const struct sysfs_ops f2fs_attr_ops = { .show = f2fs_attr_show, @@ -527,7 +529,7 @@ static const struct sysfs_ops f2fs_attr_ops = { }; static struct kobj_type f2fs_sb_ktype = { - .default_attrs = f2fs_attrs, + .default_groups = f2fs_groups, .sysfs_ops = &f2fs_attr_ops, .release = f2fs_sb_release, }; @@ -541,7 +543,7 @@ static struct kset f2fs_kset = { }; static struct kobj_type f2fs_feat_ktype = { - .default_attrs = f2fs_feat_attrs, + .default_groups = f2fs_feat_groups, .sysfs_ops = &f2fs_attr_ops, }; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e41cbe8e81b9..9ebfb1b28430 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -715,6 +715,7 @@ void wbc_detach_inode(struct writeback_control *wbc) void wbc_account_io(struct writeback_control *wbc, struct page *page, size_t bytes) { + struct cgroup_subsys_state *css; int id; /* @@ -726,7 +727,12 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, if (!wbc->wb) return; - id = mem_cgroup_css_from_page(page)->id; + css = mem_cgroup_css_from_page(page); + /* dead cgroups shouldn't contribute to inode ownership arbitration */ + if (!(css->flags & CSS_ONLINE)) + return; + + id = css->id; if (id == wbc->wb_id) { wbc->wb_bytes += bytes; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 24ea19cfe07e..ea8237513dfa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1317,16 +1317,6 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, unsigned reqsize; unsigned int hash; - /* - * Require sane minimum read buffer - that has capacity for fixed part - * of any request header + negotated max_write room for data. If the - * requirement is not satisfied return EINVAL to the filesystem server - * to indicate that it is not following FUSE server/client contract. - * Don't dequeue / abort any request. - */ - if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, 4096 + fc->max_write)) - return -EINVAL; - restart: spin_lock(&fiq->waitq.lock); err = -EAGAIN; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b8f9c83835d5..5ae2828beb00 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3112,9 +3112,9 @@ out: return err; } -static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) +static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) { struct fuse_file *ff_in = file_in->private_data; struct fuse_file *ff_out = file_out->private_data; @@ -3142,6 +3142,9 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (fc->no_copy_file_range) return -EOPNOTSUPP; + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + if (fc->writeback_cache) { inode_lock(inode_in); err = fuse_writeback_range(inode_in, pos_in, pos_in + len); @@ -3152,6 +3155,10 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, inode_lock(inode_out); + err = file_modified(file_out); + if (err) + goto out; + if (fc->writeback_cache) { err = fuse_writeback_range(inode_out, pos_out, pos_out + len); if (err) @@ -3190,10 +3197,26 @@ out: clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); inode_unlock(inode_out); + file_accessed(file_in); return err; } +static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read_iter = fuse_file_read_iter, diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index abeac61cfed3..f42048cc5454 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -82,15 +82,11 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, } /** - * gfs2_writepage_common - Common bits of writepage - * @page: The page to be written + * gfs2_writepage - Write page for writeback mappings + * @page: The page * @wbc: The writeback control - * - * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. */ - -static int gfs2_writepage_common(struct page *page, - struct writeback_control *wbc) +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); @@ -109,7 +105,9 @@ static int gfs2_writepage_common(struct page *page, page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); goto out; } - return 1; + + return nobh_writepage(page, gfs2_get_block_noalloc, wbc); + redirty: redirty_page_for_writepage(wbc, page); out: @@ -117,24 +115,6 @@ out: return 0; } -/** - * gfs2_writepage - Write page for writeback mappings - * @page: The page - * @wbc: The writeback control - * - */ - -static int gfs2_writepage(struct page *page, struct writeback_control *wbc) -{ - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - return nobh_writepage(page, gfs2_get_block_noalloc, wbc); -} - /* This is the same as calling block_write_full_page, but it also * writes pages outside of i_size */ @@ -454,8 +434,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, * * Returns: errno */ - -int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) { struct buffer_head *dibh; u64 dsize = i_size_read(&ip->i_inode); @@ -518,7 +497,7 @@ static int __gfs2_readpage(void *file, struct page *page) error = mpage_readpage(page, gfs2_block_map); } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; return error; @@ -635,7 +614,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) ret = -EIO; return ret; } @@ -686,47 +665,6 @@ out: } /** - * gfs2_stuffed_write_end - Write end for stuffed files - * @inode: The inode - * @dibh: The buffer_head containing the on-disk inode - * @pos: The file position - * @copied: How much was actually copied by the VFS - * @page: The page - * - * This copies the data from the page into the inode block after - * the inode data structure itself. - * - * Returns: copied bytes or errno - */ -int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page) -{ - struct gfs2_inode *ip = GFS2_I(inode); - u64 to = pos + copied; - void *kaddr; - unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); - - BUG_ON(pos + copied > gfs2_max_stuffed_size(ip)); - - kaddr = kmap_atomic(page); - memcpy(buf + pos, kaddr + pos, copied); - flush_dcache_page(page); - kunmap_atomic(kaddr); - - WARN_ON(!PageUptodate(page)); - unlock_page(page); - put_page(page); - - if (copied) { - if (inode->i_size < to) - i_size_write(inode, to); - mark_inode_dirty(inode); - } - return copied; -} - -/** * jdata_set_page_dirty - Page dirtying function * @page: The page to dirty * @@ -759,7 +697,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) return 0; if (!gfs2_is_stuffed(ip)) - dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); + dblock = iomap_bmap(mapping, lblock, &gfs2_iomap_ops); gfs2_glock_dq_uninit(&i_gh); @@ -888,7 +826,7 @@ cannot_release: return 0; } -static const struct address_space_operations gfs2_writeback_aops = { +static const struct address_space_operations gfs2_aops = { .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, @@ -902,21 +840,6 @@ static const struct address_space_operations gfs2_writeback_aops = { .error_remove_page = generic_error_remove_page, }; -static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_writepage, - .writepages = gfs2_writepages, - .readpage = gfs2_readpage, - .readpages = gfs2_readpages, - .set_page_dirty = __set_page_dirty_buffers, - .bmap = gfs2_bmap, - .invalidatepage = gfs2_invalidatepage, - .releasepage = gfs2_releasepage, - .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, -}; - static const struct address_space_operations gfs2_jdata_aops = { .writepage = gfs2_jdata_writepage, .writepages = gfs2_jdata_writepages, @@ -932,15 +855,8 @@ static const struct address_space_operations gfs2_jdata_aops = { void gfs2_set_aops(struct inode *inode) { - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - - if (gfs2_is_jdata(ip)) + if (gfs2_is_jdata(GFS2_I(inode))) inode->i_mapping->a_ops = &gfs2_jdata_aops; - else if (gfs2_is_writeback(sdp)) - inode->i_mapping->a_ops = &gfs2_writeback_aops; - else if (gfs2_is_ordered(sdp)) - inode->i_mapping->a_ops = &gfs2_ordered_aops; else - BUG(); + inode->i_mapping->a_ops = &gfs2_aops; } diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index fa8e5d0144dd..ff9877a68780 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -8,10 +8,6 @@ #include "incore.h" -extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page); -extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page); extern void adjust_fs_space(struct inode *inode); extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, unsigned int from, unsigned int len); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c78ccaf83ef8..79581b9bdebb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -595,7 +595,6 @@ enum alloc_state { * gfs2_iomap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode * @iomap: The iomap structure - * @flags: iomap flags * @mp: The metapath, with proper height information calculated * * In this routine we may have to alloc: @@ -622,7 +621,7 @@ enum alloc_state { */ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, - unsigned flags, struct metapath *mp) + struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -991,9 +990,12 @@ static void gfs2_write_unlock(struct inode *inode) static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, unsigned len, struct iomap *iomap) { + unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int blocks; - return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0); + blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; + return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); } static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, @@ -1085,7 +1087,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } if (iomap->type == IOMAP_HOLE) { - ret = gfs2_iomap_alloc(inode, iomap, flags, mp); + ret = gfs2_iomap_alloc(inode, iomap, mp); if (ret) { gfs2_trans_end(sdp); gfs2_inplace_release(ip); @@ -1179,6 +1181,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); + if (iomap->flags & IOMAP_F_SIZE_CHANGED) + mark_inode_dirty(inode); gfs2_write_unlock(inode); out: @@ -1229,7 +1233,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, if (create) { ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp); if (!ret && iomap.type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp); + ret = gfs2_iomap_alloc(inode, &iomap, &mp); release_metapath(&mp); } else { ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); @@ -1459,7 +1463,7 @@ int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length, ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); if (!ret && iomap->type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp); + ret = gfs2_iomap_alloc(inode, iomap, &mp); release_metapath(&mp); return ret; } @@ -1859,9 +1863,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, prev_bnr != bh->b_blocknr)) { - printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, " - "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n", - sdp->sd_fsname, + fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u," + "s_h:%u, mp_h:%u\n", (unsigned long long)ip->i_no_addr, prev_bnr, ip->i_height, strip_h, mp_h); } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 88e4f955c518..6f35d19eec25 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -750,7 +750,7 @@ static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode, struct gfs2_dirent *dent; dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_find_offset, name, ptr); - if (!dent || IS_ERR(dent)) + if (IS_ERR_OR_NULL(dent)) return dent; return do_init_dirent(inode, dent, name, bh, (unsigned)(ptr - (void *)dent)); @@ -854,7 +854,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, return ERR_PTR(error); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); got_dent: - if (unlikely(dent == NULL || IS_ERR(dent))) { + if (IS_ERR_OR_NULL(dent)) { brelse(bh); bh = NULL; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 1cb0c3afd3dc..52fa1ef8400b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -379,31 +379,30 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) } /** - * gfs2_allocate_page_backing - Use bmap to allocate blocks + * gfs2_allocate_page_backing - Allocate blocks for a write fault * @page: The (locked) page to allocate backing for * - * We try to allocate all the blocks required for the page in - * one go. This might fail for various reasons, so we keep - * trying until all the blocks to back this page are allocated. - * If some of the blocks are already allocated, thats ok too. + * We try to allocate all the blocks required for the page in one go. This + * might fail for various reasons, so we keep trying until all the blocks to + * back this page are allocated. If some of the blocks are already allocated, + * that is ok too. */ - static int gfs2_allocate_page_backing(struct page *page) { - struct inode *inode = page->mapping->host; - struct buffer_head bh; - unsigned long size = PAGE_SIZE; - u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits); + u64 pos = page_offset(page); + u64 size = PAGE_SIZE; do { - bh.b_state = 0; - bh.b_size = size; - gfs2_block_map(inode, lblock, &bh, 1); - if (!buffer_mapped(&bh)) + struct iomap iomap = { }; + + if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap)) return -EIO; - size -= bh.b_size; - lblock += (bh.b_size >> inode->i_blkbits); - } while(size > 0); + + iomap.length = min(iomap.length, size); + size -= iomap.length; + pos += iomap.length; + } while (size > 0); + return 0; } @@ -424,7 +423,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; - u64 pos = page->index << PAGE_SHIFT; + u64 pos = page_offset(page); unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; loff_t size; @@ -1182,7 +1181,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) cmd = F_SETLK; fl->fl_type = F_UNLCK; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) { if (fl->fl_type == F_UNLCK) locks_lock_file_wait(file, fl); return -EIO; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f1ebcb42cbf5..e23fb8b7b020 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -544,7 +544,7 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) && target != LM_ST_UNLOCKED) return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | @@ -581,7 +581,7 @@ __acquires(&gl->gl_lockref.lock) } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN, + GLOCK_BUG_ON(gl, !test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); } } else { /* lock_nolock */ @@ -681,7 +681,7 @@ static void delete_work_func(struct work_struct *work) goto out; inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); - if (inode && !IS_ERR(inode)) { + if (!IS_ERR_OR_NULL(inode)) { d_prune_aliases(inode); iput(inode); } @@ -1075,7 +1075,7 @@ trap_recursive: fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); fs_err(sdp, "lock type: %d req lock state : %d\n", gh->gh_gl->gl_name.ln_type, gh->gh_state); - gfs2_dump_glock(NULL, gl); + gfs2_dump_glock(NULL, gl, true); BUG(); } @@ -1094,7 +1094,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error = 0; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; if (test_bit(GLF_LRU, &gl->gl_flags)) @@ -1610,16 +1610,16 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) glock_hash_walk(thaw_glock, sdp); } -static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) { spin_lock(&gl->gl_lockref.lock); - gfs2_dump_glock(seq, gl); + gfs2_dump_glock(seq, gl, fsid); spin_unlock(&gl->gl_lockref.lock); } static void dump_glock_func(struct gfs2_glock *gl) { - dump_glock(NULL, gl); + dump_glock(NULL, gl, true); } /** @@ -1704,10 +1704,12 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) * dump_holder - print information about a glock holder * @seq: the seq_file struct * @gh: the glock holder + * @fs_id_buf: pointer to file system id (if requested) * */ -static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) +static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, + const char *fs_id_buf) { struct task_struct *gh_owner = NULL; char flags_buf[32]; @@ -1715,8 +1717,8 @@ static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) rcu_read_lock(); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", - state2str(gh->gh_state), + gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", + fs_id_buf, state2str(gh->gh_state), hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), gh->gh_error, gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, @@ -1766,6 +1768,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * gfs2_dump_glock - print information about a glock * @seq: The seq_file struct * @gl: the glock + * @fsid: If true, also dump the file system id * * The file format is as follows: * One line per object, capital letters are used to indicate objects @@ -1779,19 +1782,24 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * */ -void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) { const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned long long dtime; const struct gfs2_holder *gh; char gflags_buf[32]; + char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + memset(fs_id_buf, 0, sizeof(fs_id_buf)); + if (fsid && sdp) /* safety precaution */ + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); dtime = jiffies - gl->gl_demote_time; dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", - state2str(gl->gl_state), + gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " + "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, gflags2str(gflags_buf, gl), @@ -1802,10 +1810,10 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl) (int)gl->gl_lockref.count, gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) - dump_holder(seq, gh); + dump_holder(seq, gh, fs_id_buf); if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) - glops->go_dump(seq, gl); + glops->go_dump(seq, gl, fs_id_buf); } static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) @@ -2006,7 +2014,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - dump_glock(seq, iter_ptr); + dump_glock(seq, iter_ptr, false); return 0; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 149d7f6af085..e4e0bed5257c 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -199,8 +199,11 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, struct gfs2_holder *gh); extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl); -#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) +extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, + bool fsid); +#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \ + gfs2_dump_glock(NULL, gl, true); \ + BUG(); } } while(0) extern __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); @@ -266,7 +269,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object) { spin_lock(&gl->gl_lockref.lock); if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL)) - gfs2_dump_glock(NULL, gl); + gfs2_dump_glock(NULL, gl, true); gl->gl_object = object; spin_unlock(&gl->gl_lockref.lock); } @@ -278,7 +281,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object) * * I'd love to similarly add this: * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object)) - * gfs2_dump_glock(NULL, gl); + * gfs2_dump_glock(NULL, gl, true); * Unfortunately, that's not possible because as soon as gfs2_delete_inode * frees the block in the rgrp, another process can reassign it for an I_NEW * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget. diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index cf4c767005b1..ff213690e364 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -461,10 +461,12 @@ static int inode_go_lock(struct gfs2_holder *gh) * inode_go_dump - print information about an inode * @seq: The iterator * @ip: the inode + * @fs_id_buf: file system id (may be empty) * */ -static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl) +static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf) { struct gfs2_inode *ip = gl->gl_object; struct inode *inode = &ip->i_inode; @@ -477,7 +479,8 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl) nrpages = inode->i_data.nrpages; xa_unlock_irq(&inode->i_data.i_pages); - gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu p:%lu\n", + gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu " + "p:%lu\n", fs_id_buf, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, IF2DT(ip->i_inode.i_mode), ip->i_flags, @@ -503,7 +506,8 @@ static void freeze_go_sync(struct gfs2_glock *gl) atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { - printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error); + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", + error); gfs2_assert_withdraw(sdp, 0); } queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); @@ -536,7 +540,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) gfs2_consist(sdp); /* Initialize some head of the log stuff */ - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { sdp->sd_log_sequence = head.lh_sequence + 1; gfs2_log_pointers_init(sdp, head.lh_blkno); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c9af93ac6c73..7a993d7c022e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -240,7 +240,8 @@ struct gfs2_glock_operations { int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); - void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl); + void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); const int go_type; const unsigned long go_flags; @@ -504,7 +505,6 @@ struct gfs2_trans { unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; - unsigned int tr_num_revoke_rm; struct list_head tr_list; struct list_head tr_databuf; @@ -609,7 +609,7 @@ struct gfs2_tune { enum { SDF_JOURNAL_CHECKED = 0, SDF_JOURNAL_LIVE = 1, - SDF_SHUTDOWN = 2, + SDF_WITHDRAWN = 2, SDF_NOBARRIERS = 3, SDF_NORECOVERY = 4, SDF_DEMOTE = 5, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b296c59832a7..2e2a8a2fb51d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -793,7 +793,7 @@ fail_free_acls: fail_gunlock: gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(ghs); - if (inode && !IS_ERR(inode)) { + if (!IS_ERR_OR_NULL(inode)) { clear_nlink(inode); if (!free_vfs_inode) mark_inode_dirty(inode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index c4c9700c366e..58e237fba565 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -882,7 +882,6 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) old->tr_num_buf_rm += new->tr_num_buf_rm; old->tr_num_databuf_rm += new->tr_num_databuf_rm; old->tr_num_revoke += new->tr_num_revoke; - old->tr_num_revoke_rm += new->tr_num_revoke_rm; list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); @@ -904,7 +903,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) set_bit(TR_ATTACHED, &tr->tr_flags); } - sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; + sdp->sd_log_commited_revoke += tr->tr_num_revoke; reserved = calc_reserved(sdp); maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; gfs2_assert_withdraw(sdp, maxres >= reserved); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 1921cda034fd..5b17979af539 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -759,9 +759,27 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start, if (gfs2_meta_check(sdp, bh_ip)) error = -EIO; - else + else { + struct gfs2_meta_header *mh = + (struct gfs2_meta_header *)bh_ip->b_data; + + if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) { + struct gfs2_rgrpd *rgd; + + rgd = gfs2_blk2rgrpd(sdp, blkno, false); + if (rgd && rgd->rd_addr == blkno && + rgd->rd_bits && rgd->rd_bits->bi_bh) { + fs_info(sdp, "Replaying 0x%llx but we " + "already have a bh!\n", + (unsigned long long)blkno); + fs_info(sdp, "busy:%d, pinned:%d\n", + buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0, + buffer_pinned(rgd->rd_bits->bi_bh)); + gfs2_dump_glock(NULL, rgd->rd_gl, true); + } + } mark_buffer_dirty(bh_ip); - + } brelse(bh_log); brelse(bh_ip); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 456763e18def..662ef36c1874 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -251,7 +251,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) { *bhp = NULL; return -EIO; } @@ -309,7 +309,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) { - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; wait_on_buffer(bh); @@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) gfs2_io_error_bh_wd(sdp, bh); return -EIO; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; return 0; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 08823bb3b2d0..4a8e5a7310f0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -61,6 +61,13 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_complain_secs = 10; } +void free_sbd(struct gfs2_sbd *sdp) +{ + if (sdp->sd_lkstats) + free_percpu(sdp->sd_lkstats); + kfree(sdp); +} + static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; @@ -72,10 +79,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) sdp->sd_vfs = sb; sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats); - if (!sdp->sd_lkstats) { - kfree(sdp); - return NULL; - } + if (!sdp->sd_lkstats) + goto fail; sb->s_fs_info = sdp; set_bit(SDF_NOJOURNALID, &sdp->sd_flags); @@ -134,8 +139,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) mutex_init(&sdp->sd_freeze_mutex); return sdp; -} +fail: + free_sbd(sdp); + return NULL; +} /** * gfs2_check_sb - Check superblock @@ -568,7 +576,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) INIT_WORK(&jd->jd_work, gfs2_recover_func); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); - if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { + if (IS_ERR_OR_NULL(jd->jd_inode)) { if (!jd->jd_inode) error = -ENOENT; else @@ -996,7 +1004,7 @@ hostdata_error: void gfs2_lm_unmount(struct gfs2_sbd *sdp) { const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && + if (likely(!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) && lm->lm_unmount) lm->lm_unmount(sdp); } @@ -1086,8 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) { /* In this case, we haven't initialized sysfs, so we have to manually free the sdp. */ - free_percpu(sdp->sd_lkstats); - kfree(sdp); + free_sbd(sdp); sb->s_fs_info = NULL; return error; } @@ -1190,7 +1197,6 @@ fail_lm: gfs2_lm_unmount(sdp); fail_debug: gfs2_delete_debugfs_file(sdp); - free_percpu(sdp->sd_lkstats); /* gfs2_sys_fs_del must be the last thing we do, since it causes * sysfs to call function gfs2_sbd_release, which frees sdp. */ gfs2_sys_fs_del(sdp); @@ -1370,7 +1376,6 @@ static void gfs2_kill_sb(struct super_block *sb) sdp->sd_root_dir = NULL; sdp->sd_master_dir = NULL; shrink_dcache_sb(sb); - free_percpu(sdp->sd_lkstats); kill_block_super(sb); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8189b581236d..69c4b77f127b 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1475,7 +1475,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) { if (error == 0 || error == -EROFS) return; - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); sdp->sd_log_error = error; wake_up(&sdp->sd_logd_waitq); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 2299a3fa1911..c529f8749a89 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -388,7 +388,8 @@ void gfs2_recover_func(struct work_struct *work) } t_tlck = ktime_get(); - fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); + fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n", + jd->jd_jid, head.lh_tail, head.lh_blkno); for (pass = 0; pass < 2; pass++) { lops_before_scan(jd, &head, pass); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 36f20a89d0c2..49ac0a5e74ea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -610,11 +610,12 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip) return gfs2_qa_alloc(ip); } -static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, + const char *fs_id_buf) { struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); - gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf, (unsigned long long)ip->i_no_addr, (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); @@ -1111,32 +1112,33 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + struct gfs2_sbd *sdp = rgd->rd_sbd; int valid = 1; if (rgl->rl_flags != str->rg_flags) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u", - (unsigned long long)rgd->rd_addr, + fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u", + (unsigned long long)rgd->rd_addr, be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags)); valid = 0; } if (rgl->rl_free != str->rg_free) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u", - (unsigned long long)rgd->rd_addr, - be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free)); + fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u", + (unsigned long long)rgd->rd_addr, + be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free)); valid = 0; } if (rgl->rl_dinodes != str->rg_dinodes) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u", - (unsigned long long)rgd->rd_addr, - be32_to_cpu(rgl->rl_dinodes), - be32_to_cpu(str->rg_dinodes)); + fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u", + (unsigned long long)rgd->rd_addr, + be32_to_cpu(rgl->rl_dinodes), + be32_to_cpu(str->rg_dinodes)); valid = 0; } if (rgl->rl_igeneration != str->rg_igeneration) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch " - "%llu/%llu", (unsigned long long)rgd->rd_addr, - (unsigned long long)be64_to_cpu(rgl->rl_igeneration), - (unsigned long long)be64_to_cpu(str->rg_igeneration)); + fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu", + (unsigned long long)rgd->rd_addr, + (unsigned long long)be64_to_cpu(rgl->rl_igeneration), + (unsigned long long)be64_to_cpu(str->rg_igeneration)); valid = 0; } return valid; @@ -2246,10 +2248,12 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd, * gfs2_rgrp_dump - print out an rgrp * @seq: The iterator * @gl: The glock in question + * @fs_id_buf: pointer to file system id (if requested) * */ -void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) +void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf) { struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_blkreserv *trs; @@ -2257,14 +2261,15 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) if (rgd == NULL) return; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, rgd->rd_reserved, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; - gfs2_print_dbg(seq, " L: f:%02x b:%u i:%u\n", + gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf, be32_to_cpu(rgl->rl_flags), be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); @@ -2272,7 +2277,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); - dump_rs(seq, trs); + dump_rs(seq, trs, fs_id_buf); } spin_unlock(&rgd->rd_rsspin); } @@ -2280,10 +2285,13 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; + char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", (unsigned long long)rgd->rd_addr); fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); - gfs2_rgrp_dump(NULL, rgd->rd_gl); + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); + gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); rgd->rd_flags |= GFS2_RDF_ERROR; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 6a3adf0ee0b7..c14a673ae36f 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -69,7 +69,8 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist); extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); -extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl); +extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf); extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b70cea5c8c59..0acc5834f653 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -394,6 +394,7 @@ static int init_threads(struct gfs2_sbd *sdp) fail: kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; return error; } @@ -451,8 +452,12 @@ fail: freeze_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&freeze_gh); fail_threads: - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); + if (sdp->sd_quotad_process) + kthread_stop(sdp->sd_quotad_process); + sdp->sd_quotad_process = NULL; + if (sdp->sd_logd_process) + kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; return error; } @@ -800,7 +805,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) if (!(flags & I_DIRTY_INODE)) return; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); @@ -849,12 +854,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, &freeze_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + if (error && !test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return error; flush_workqueue(gfs2_delete_workqueue); - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); + if (sdp->sd_quotad_process) + kthread_stop(sdp->sd_quotad_process); + sdp->sd_quotad_process = NULL; + if (sdp->sd_logd_process) + kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); @@ -969,14 +978,14 @@ void gfs2_freeze_func(struct work_struct *work) error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, &freeze_gh); if (error) { - printk(KERN_INFO "GFS2: couldn't get freeze lock : %d\n", error); + fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error); gfs2_assert_withdraw(sdp, 0); } else { atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); error = thaw_super(sb); if (error) { - printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n", - error); + fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", + error); gfs2_assert_withdraw(sdp, 0); } if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) @@ -1004,7 +1013,7 @@ static int gfs2_freeze(struct super_block *sb) if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) goto out; - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + if (test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { error = -EINVAL; goto out; } @@ -1014,20 +1023,14 @@ static int gfs2_freeze(struct super_block *sb) if (!error) break; - switch (error) { - case -EBUSY: + if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); - break; - - default: + else fs_err(sdp, "error freezing FS: %d\n", error); - break; - } fs_err(sdp, "retrying...\n"); msleep(1000); } - error = 0; set_bit(SDF_FS_FROZEN, &sdp->sd_flags); out: mutex_unlock(&sdp->sd_freeze_mutex); @@ -1273,8 +1276,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) error = gfs2_make_fs_ro(sdp); else error = gfs2_make_fs_rw(sdp); - if (error) - return error; } sdp->sd_args = args; @@ -1300,7 +1301,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_unlock(>->gt_spin); gfs2_online_uevent(sdp); - return 0; + return error; } /** diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index c5f42f0c503b..9d49eaadb9d9 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -44,6 +44,8 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, extern int gfs2_statfs_sync(struct super_block *sb, int type); extern void gfs2_freeze_func(struct work_struct *work); +extern void free_sbd(struct gfs2_sbd *sdp); + extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; extern const struct export_operations gfs2_export_ops; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 159aedf63c2a..dd15b8e4af2c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -118,7 +118,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) { - unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags); + unsigned int b = test_bit(SDF_WITHDRAWN, &sdp->sd_flags); return snprintf(buf, PAGE_SIZE, "%u\n", b); } @@ -296,17 +296,18 @@ static struct attribute *gfs2_attrs[] = { &gfs2_attr_demote_rq.attr, NULL, }; +ATTRIBUTE_GROUPS(gfs2); static void gfs2_sbd_release(struct kobject *kobj) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - kfree(sdp); + free_sbd(sdp); } static struct kobj_type gfs2_ktype = { .release = gfs2_sbd_release, - .default_attrs = gfs2_attrs, + .default_groups = gfs2_groups, .sysfs_ops = &gfs2_attr_ops, }; @@ -679,7 +680,6 @@ fail_lock_module: fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); fail_reg: - free_percpu(sdp->sd_lkstats); fs_err(sdp, "error %d adding sysfs files\n", error); kobject_put(&sdp->sd_kobj); sb->s_fs_info = NULL; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 6f67ef7aa412..35e3059255fe 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -77,10 +77,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n", tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, test_bit(TR_TOUCHED, &tr->tr_flags)); - fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n", tr->tr_num_buf_new, tr->tr_num_buf_rm, tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); + tr->tr_num_revoke); } void gfs2_trans_end(struct gfs2_sbd *sdp) @@ -263,7 +263,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); sdp->sd_log_num_revoke--; kmem_cache_free(gfs2_bufdata_cachep, bd); - tr->tr_num_revoke_rm++; + tr->tr_num_revoke--; if (--n == 0) break; } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index a7e55234211f..83f6c582773a 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -41,7 +41,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) struct va_format vaf; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && - test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return 0; if (fmt) { @@ -178,9 +178,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = rgd->rd_sbd; + char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; int rv; - gfs2_rgrp_dump(NULL, rgd->rd_gl); + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); + gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); rv = gfs2_lm_withdraw(sdp, "fatal: filesystem consistency error\n" " RG = %llu\n" @@ -256,7 +258,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line, bool withdraw) { - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) fs_err(sdp, "fatal: I/O error\n" " block = %llu\n" diff --git a/fs/inode.c b/fs/inode.c index 446d05e25f39..0f1e3b563c47 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -362,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); @@ -1899,6 +1899,26 @@ int file_update_time(struct file *file) } EXPORT_SYMBOL(file_update_time); +/* Caller must hold the file's inode lock */ +int file_modified(struct file *file) +{ + int err; + + /* + * Clear the security bits if the process is not being run by root. + * This keeps people from modifying setuid and setgid binaries. + */ + err = file_remove_privs(file); + if (err) + return err; + + if (unlikely(file->f_mode & FMODE_NOCMTIME)) + return 0; + + return file_update_time(file); +} +EXPORT_SYMBOL(file_modified); + int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) diff --git a/fs/internal.h b/fs/internal.h index a48ef81be37d..2f3c3de51fad 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -40,8 +40,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) extern void guard_bio_eod(int rw, struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); -void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page); /* * char_dev.c diff --git a/fs/io_uring.c b/fs/io_uring.c index 0fbb486a320e..4ed4b110a154 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -579,6 +579,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ @@ -997,9 +998,6 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); if (offset) iov_iter_advance(iter, offset); - - /* don't drop a reference to these pages */ - iter->type |= ITER_BVEC_FLAG_NO_REF; return 0; } @@ -1801,10 +1799,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, req->sequence = ctx->cached_sq_head - 1; } - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || @@ -2201,11 +2197,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - if (ret == -ERESTARTSYS) - ret = -EINTR; if (sig) - restore_user_sigmask(sig, &sigsaved); + restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); + + if (ret == -ERESTARTSYS) + ret = -EINTR; return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } @@ -2777,8 +2774,10 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); #if defined(CONFIG_UNIX) - if (ctx->ring_sock) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ sock_release(ctx->ring_sock); + } #endif io_mem_free(ctx->sq_ring); diff --git a/fs/iomap.c b/fs/iomap.c index 23ef63fd1669..217c3e5a13d6 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -287,7 +287,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap_readpage_ctx *ctx = data; struct page *page = ctx->cur_page; struct iomap_page *iop = iomap_page_create(inode, page); - bool is_contig = false; + bool same_page = false, is_contig = false; loff_t orig_pos = pos; unsigned poff, plen; sector_t sector; @@ -315,10 +315,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * Try to merge into a previous segment if we can. */ sector = iomap_sector(iomap, pos); - if (ctx->bio && bio_end_sector(ctx->bio) == sector) { - if (__bio_try_merge_page(ctx->bio, page, plen, poff, true)) - goto done; + if (ctx->bio && bio_end_sector(ctx->bio) == sector) is_contig = true; + + if (is_contig && + __bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) { + if (!same_page && iop) + atomic_inc(&iop->read_count); + goto done; } /* @@ -329,7 +333,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (iop) atomic_inc(&iop->read_count); - if (!ctx->bio || !is_contig || bio_full(ctx->bio)) { + if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -773,6 +777,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page, struct iomap *iomap) { const struct iomap_page_ops *page_ops = iomap->page_ops; + loff_t old_size = inode->i_size; int ret; if (iomap->type == IOMAP_INLINE) { @@ -784,9 +789,21 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, ret = __iomap_write_end(inode, pos, len, copied, page, iomap); } - __generic_write_end(inode, pos, ret, page); + /* + * Update the in-memory inode size after copying the data into the page + * cache. It's up to the file system to write the updated size to disk, + * preferably after I/O completion so that no stale data is exposed. + */ + if (pos + ret > old_size) { + i_size_write(inode, pos + ret); + iomap->flags |= IOMAP_F_SIZE_CHANGED; + } + unlock_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); if (page_ops && page_ops->page_done) - page_ops->page_done(inode, pos, copied, page, iomap); + page_ops->page_done(inode, pos, ret, page, iomap); put_page(page); if (ret < len) @@ -1595,13 +1612,7 @@ static void iomap_dio_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - put_page(bvec->bv_page); - } + bio_release_pages(bio, false); bio_put(bio); } } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index efd0ce9489ae..132fb92098c7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -184,17 +184,18 @@ static int journal_wait_on_commit_record(journal_t *journal, /* * write the filemap data using writepage() address_space_operations. * We don't do block allocation here even for delalloc. We don't - * use writepages() because with dealyed allocation we may be doing + * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ -static int journal_submit_inode_data_buffers(struct address_space *mapping) +static int journal_submit_inode_data_buffers(struct address_space *mapping, + loff_t dirty_start, loff_t dirty_end) { int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, - .range_start = 0, - .range_end = i_size_read(mapping->host), + .range_start = dirty_start, + .range_end = dirty_end, }; ret = generic_writepages(mapping, &wbc); @@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WRITE_DATA)) continue; mapping = jinode->i_vfs_inode->i_mapping; @@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal, * only allocated blocks here. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping); + err = journal_submit_inode_data_buffers(mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WAIT_DATA)) continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_keep_errors( - jinode->i_vfs_inode->i_mapping); + err = filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; } } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 43df0c943229..953990eb70a9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access); EXPORT_SYMBOL(jbd2_journal_set_triggers); EXPORT_SYMBOL(jbd2_journal_dirty_metadata); EXPORT_SYMBOL(jbd2_journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif EXPORT_SYMBOL(jbd2_journal_flush); EXPORT_SYMBOL(jbd2_journal_revoke); @@ -94,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); EXPORT_SYMBOL(jbd2_journal_inode_add_write); EXPORT_SYMBOL(jbd2_journal_inode_add_wait); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); @@ -203,7 +202,7 @@ loop: if (journal->j_flags & JBD2_UNMOUNT) goto end_loop; - jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", + jbd_debug(1, "commit_sequence=%u, commit_request=%u\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { @@ -324,7 +323,7 @@ static void journal_kill_thread(journal_t *journal) * IO is in progress. do_get_write_access() handles this. * * The function returns a pointer to the buffer_head to be used for IO. - * + * * * Return value: * <0: Error @@ -500,7 +499,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) */ journal->j_commit_request = target; - jbd_debug(1, "JBD2: requesting commit %d/%d\n", + jbd_debug(1, "JBD2: requesting commit %u/%u\n", journal->j_commit_request, journal->j_commit_sequence); journal->j_running_transaction->t_requested = jiffies; @@ -513,7 +512,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", journal->j_commit_request, journal->j_commit_sequence, - target, journal->j_running_transaction ? + target, journal->j_running_transaction ? journal->j_running_transaction->t_tid : 0); return 0; } @@ -698,12 +697,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR - "%s: error: j_commit_request=%d, tid=%d\n", + "%s: error: j_commit_request=%u, tid=%u\n", __func__, journal->j_commit_request, tid); } #endif while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", + jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n", tid, journal->j_commit_sequence); read_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_commit); @@ -944,7 +943,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) trace_jbd2_update_log_tail(journal, tid, block, freed); jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %lu), " + "Cleaning journal tail from %u to %u (offset %lu), " "freeing %lu\n", journal->j_tail_sequence, tid, block, freed); @@ -1318,7 +1317,7 @@ static int journal_reset(journal_t *journal) */ if (sb->s_start == 0) { jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", + "(start %ld, seq %u, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); journal->j_flags |= JBD2_FLUSHED; @@ -1453,7 +1452,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) return; } - jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", + jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n", journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); @@ -2574,6 +2573,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) jinode->i_next_transaction = NULL; jinode->i_vfs_inode = inode; jinode->i_flags = 0; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; INIT_LIST_HEAD(&jinode->i_list); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8ca4fddc705f..990e7b5062e7 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) * File inode in the inode list of the handle's transaction */ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, - unsigned long flags) + unsigned long flags, loff_t start_byte, loff_t end_byte) { transaction_t *transaction = handle->h_transaction; journal_t *journal; @@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); - /* - * First check whether inode isn't already on the transaction's - * lists without taking the lock. Note that this check is safe - * without the lock as we cannot race with somebody removing inode - * from the transaction. The reason is that we remove inode from the - * transaction only in journal_release_jbd_inode() and when we commit - * the transaction. We are guarded from the first case by holding - * a reference to the inode. We are safe against the second case - * because if jinode->i_transaction == transaction, commit code - * cannot touch the transaction because we hold reference to it, - * and if jinode->i_next_transaction == transaction, commit code - * will only file the inode where we want it. - */ - if ((jinode->i_transaction == transaction || - jinode->i_next_transaction == transaction) && - (jinode->i_flags & flags) == flags) - return 0; - spin_lock(&journal->j_list_lock); jinode->i_flags |= flags; + + if (jinode->i_dirty_end) { + jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); + jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); + } else { + jinode->i_dirty_start = start_byte; + jinode->i_dirty_end = end_byte; + } + /* Is inode already attached where we need it? */ if (jinode->i_transaction == transaction || jinode->i_next_transaction == transaction) @@ -2631,12 +2622,28 @@ done: int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) { return jbd2_journal_file_inode(handle, jinode, - JI_WRITE_DATA | JI_WAIT_DATA); + JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); } int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) { - return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, + LLONG_MAX); +} + +int jbd2_journal_inode_ranged_write(handle_t *handle, + struct jbd2_inode *jinode, loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, + JI_WRITE_DATA | JI_WAIT_DATA, start_byte, + start_byte + length - 1); +} + +int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, + loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, + start_byte, start_byte + length - 1); } /* diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 7d8654a1472e..f8fb89b10227 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -109,9 +109,9 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) return ret; } -int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) +int jffs2_do_readpage_unlock(void *data, struct page *pg) { - int ret = jffs2_do_readpage_nolock(inode, pg); + int ret = jffs2_do_readpage_nolock(data, pg); unlock_page(pg); return ret; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 112d85849db1..8a20ddd25f2d 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct page *pg; pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT, - (void *)jffs2_do_readpage_unlock, inode); + jffs2_do_readpage_unlock, inode); if (IS_ERR(pg)) return (void *)pg; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index a2dbbb3f4c74..bd3d5f0ddc34 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -155,7 +155,7 @@ extern const struct file_operations jffs2_file_operations; extern const struct inode_operations jffs2_file_inode_operations; extern const struct address_space_operations jffs2_file_address_operations; int jffs2_fsync(struct file *, loff_t, loff_t, int); -int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); +int jffs2_do_readpage_unlock(void *data, struct page *pg); /* ioctl.c */ long jffs2_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 62f98225abb3..b11f2afa84f1 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -47,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c) c->len=4; } -static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) +static struct nlm_lockowner * +nlmclnt_get_lockowner(struct nlm_lockowner *lockowner) { refcount_inc(&lockowner->count); return lockowner; } -static void nlm_put_lockowner(struct nlm_lockowner *lockowner) +static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -82,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host) return res; } -static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner) { struct nlm_lockowner *lockowner; list_for_each_entry(lockowner, &host->h_lockowners, list) { if (lockowner->owner != owner) continue; - return nlm_get_lockowner(lockowner); + return nlmclnt_get_lockowner(lockowner); } return NULL; } -static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner) { struct nlm_lockowner *res, *new = NULL; spin_lock(&host->h_lock); - res = __nlm_find_lockowner(host, owner); + res = __nlmclnt_find_lockowner(host, owner); if (res == NULL) { spin_unlock(&host->h_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); spin_lock(&host->h_lock); - res = __nlm_find_lockowner(host, owner); + res = __nlmclnt_find_lockowner(host, owner); if (res == NULL && new != NULL) { res = new; refcount_set(&new->count, 1); @@ -457,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) { spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; - new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); + new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner); list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); } @@ -467,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl) spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); list_del(&fl->fl_u.nfs_fl.list); spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); - nlm_put_lockowner(fl->fl_u.nfs_fl.owner); + nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner); } static const struct file_lock_operations nlmclnt_lock_ops = { @@ -478,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = { static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host) { fl->fl_u.nfs_fl.state = 0; - fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner); + fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner); INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list); fl->fl_ops = &nlmclnt_lock_ops; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1bddf70d9656..e4d3f783e06a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file; - lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { + /* lockowner allocation has failed */ + nlmsvc_release_host(host); + return nlm_lck_denied_nolocks; + } } return 0; @@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp) resp->status = nlmsvc_share_file(host, file, argp); dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp) resp->status = nlmsvc_unshare_file(host, file, argp); dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index ea719cdd6a36..61d3cc2283dc 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -332,6 +332,93 @@ restart: mutex_unlock(&file->f_mutex); } +static struct nlm_lockowner * +nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) +{ + refcount_inc(&lockowner->count); + return lockowner; +} + +static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +{ + if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) + return; + list_del(&lockowner->list); + spin_unlock(&lockowner->host->h_lock); + nlmsvc_release_host(lockowner->host); + kfree(lockowner); +} + +static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid) +{ + struct nlm_lockowner *lockowner; + list_for_each_entry(lockowner, &host->h_lockowners, list) { + if (lockowner->pid != pid) + continue; + return nlmsvc_get_lockowner(lockowner); + } + return NULL; +} + +static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid) +{ + struct nlm_lockowner *res, *new = NULL; + + spin_lock(&host->h_lock); + res = __nlmsvc_find_lockowner(host, pid); + + if (res == NULL) { + spin_unlock(&host->h_lock); + new = kmalloc(sizeof(*res), GFP_KERNEL); + spin_lock(&host->h_lock); + res = __nlmsvc_find_lockowner(host, pid); + if (res == NULL && new != NULL) { + res = new; + /* fs/locks.c will manage the refcount through lock_ops */ + refcount_set(&new->count, 1); + new->pid = pid; + new->host = nlm_get_host(host); + list_add(&new->list, &host->h_lockowners); + new = NULL; + } + } + + spin_unlock(&host->h_lock); + kfree(new); + return res; +} + +void +nlmsvc_release_lockowner(struct nlm_lock *lock) +{ + if (lock->fl.fl_owner) + nlmsvc_put_lockowner(lock->fl.fl_owner); +} + +static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner; + new->fl_owner = nlmsvc_get_lockowner(nlm_lo); +} + +static void nlmsvc_locks_release_private(struct file_lock *fl) +{ + nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner); +} + +static const struct file_lock_operations nlmsvc_lock_ops = { + .fl_copy_lock = nlmsvc_locks_copy_lock, + .fl_release_private = nlmsvc_locks_release_private, +}; + +void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host, + pid_t pid) +{ + fl->fl_owner = nlmsvc_find_lockowner(host, pid); + if (fl->fl_owner != NULL) + fl->fl_ops = &nlmsvc_lock_ops; +} + /* * Initialize arguments for GRANTED call. The nlm_rqst structure * has been cleared already. @@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) /* set default data area */ call->a_args.lock.oh.data = call->a_owner; - call->a_args.lock.svid = lock->fl.fl_pid; + call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; if (lock->oh.len > NLMCLNT_OHSIZE) { void *data = kmalloc(lock->oh.len, GFP_KERNEL); @@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, { int error; __be32 ret; + struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", locks_inode(file->f_file)->i_sb->s_id, @@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } + /* If there's a conflicting lock, remember to clean up the test lock */ + test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; + error = vfs_test_lock(file->f_file, &lock->fl); if (error) { /* We can't currently deal with deferred test requests */ @@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->caller = "somehost"; /* FIXME */ conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ - conflock->svid = lock->fl.fl_pid; + conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; conflock->fl.fl_type = lock->fl.fl_type; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); + + /* Clean up the test lock */ + lock->fl.fl_owner = NULL; + nlmsvc_put_lockowner(test_owner); + ret = nlm_lck_denied; out: return ret; @@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl) printk(KERN_WARNING "lockd: notification for unknown block!\n"); } -static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) -{ - return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; -} - -/* - * Since NLM uses two "keys" for tracking locks, we need to hash them down - * to one for the blocked_hash. Here, we're just xor'ing the host address - * with the pid in order to create a key value for picking a hash bucket. - */ -static unsigned long -nlmsvc_owner_key(struct file_lock *fl) -{ - return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid; -} - const struct lock_manager_operations nlmsvc_lock_operations = { - .lm_compare_owner = nlmsvc_same_owner, - .lm_owner_key = nlmsvc_owner_key, .lm_notify = nlmsvc_notify_blocked, .lm_grant = nlmsvc_grant_deferred, }; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index ea77c66d3cc3..d0bb7a6bf005 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file; - lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { + /* lockowner allocation has failed */ + nlmsvc_release_host(host); + return nlm_lck_denied_nolocks; + } } return 0; @@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock)); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock)); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp) resp->status = cast_status(nlmsvc_share_file(host, file, argp)); dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp) resp->status = cast_status(nlmsvc_unshare_file(host, file, argp)); dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0e610f422406..028fc152da22 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -180,7 +180,7 @@ again: /* update current lock count */ file->f_locks++; - lockhost = (struct nlm_host *) fl->fl_owner; + lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host; if (match(lockhost, host)) { struct file_lock lock = *fl; diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 7147e4aebecc..982629f7b120 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock) lock->svid = ntohl(*p++); locks_init_lock(fl); - fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ start = ntohl(*p++); @@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; if (!(p = nlm_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 7ed9edf9aed4..5fa9f48a9dba 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) lock->svid = ntohl(*p++); locks_init_lock(fl); - fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ p = xdr_decode_hyper(p, &start); @@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; if (!(p = nlm4_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/locks.c b/fs/locks.c index ec1e4a5df629..686eae21daf6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -658,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) */ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { - if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) - return fl2->fl_lmops == fl1->fl_lmops && - fl1->fl_lmops->lm_compare_owner(fl1, fl2); return fl1->fl_owner == fl2->fl_owner; } @@ -701,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl) static unsigned long posix_owner_key(struct file_lock *fl) { - if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) - return fl->fl_lmops->lm_owner_key(fl); return (unsigned long)fl->fl_owner; } @@ -1534,11 +1529,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { - if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) - return false; - if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) - return false; - return locks_conflict(breaker, lease); + bool rc; + + if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { + rc = false; + goto trace; + } + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) { + rc = false; + goto trace; + } + + rc = locks_conflict(breaker, lease); +trace: + trace_leases_conflict(rc, lease, breaker); + return rc; } static bool @@ -1753,10 +1758,10 @@ int fcntl_getlease(struct file *filp) } /** - * check_conflicting_open - see if the given dentry points to a file that has + * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. - * @dentry: dentry to check + * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * @@ -1764,30 +1769,42 @@ int fcntl_getlease(struct file *filp) * conflict with the lease we're trying to set. */ static int -check_conflicting_open(const struct dentry *dentry, const long arg, int flags) +check_conflicting_open(struct file *filp, const long arg, int flags) { - int ret = 0; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); + int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) - return -EAGAIN; + if (arg == F_RDLCK) + return inode_is_open_for_write(inode) ? -EAGAIN : 0; + else if (arg != F_WRLCK) + return 0; - if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || - (atomic_read(&inode->i_count) > 1))) - ret = -EAGAIN; + /* + * Make sure that only read/write count is from lease requestor. + * Note that this will result in denying write leases when i_writecount + * is negative, which is what we want. (We shouldn't grant write leases + * on files open for execution.) + */ + if (filp->f_mode & FMODE_WRITE) + self_wcount = 1; + else if (filp->f_mode & FMODE_READ) + self_rcount = 1; - return ret; + if (atomic_read(&inode->i_writecount) != self_wcount || + atomic_read(&inode->i_readcount) != self_rcount) + return -EAGAIN; + + return 0; } static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct file_lock *fl, *my_fl = NULL, *lease; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; @@ -1822,7 +1839,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) goto out; @@ -1879,7 +1896,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr * precedes these checks. */ smp_mb(); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) { locks_unlink_lock_ctx(lease); goto out; diff --git a/fs/namei.c b/fs/namei.c index 20831c2fbb34..209c51a5226c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); + fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); @@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (!error) { dont_mount(dentry); detach_mounts(dentry); + fsnotify_unlink(dir, dentry); } } } diff --git a/fs/namespace.c b/fs/namespace.c index b26778bdc236..6fbc9126367a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2105,6 +2105,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); + child->mnt.mnt_flags &= ~MNT_LOCKED; commit_tree(child); } put_mountpoint(smp); @@ -2595,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path) if (!check_mnt(p)) goto out; - /* The thing moved should be either ours or completely unattached. */ - if (attached && !check_mnt(old)) + /* The thing moved must be mounted... */ + if (!is_mounted(&old->mnt)) goto out; - if (!attached && !(ns && is_anon_ns(ns))) + /* ... and either ours or the root of anon namespace */ + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) goto out; if (old->mnt.mnt_flags & MNT_LOCKED) @@ -3445,6 +3447,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, ns->root = mnt; ns->mounts = 1; list_add(&mnt->mnt_list, &ns->list); + mntget(newmount.mnt); /* Attach to an apparent O_PATH fd with a note that we need to unmount * it, not just simply put it. diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index e6a700f01452..aec769a500a1 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -22,7 +22,8 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, char *ip_addr = NULL; int ip_len; - ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false); + ip_len = dns_query(net, NULL, name, namelen, NULL, &ip_addr, NULL, + false); if (ip_len > 0) ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..19f856f45689 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index cf42a8b939e3..f4157eb1f69d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -129,10 +129,13 @@ nfs4_file_flush(struct file *file, fl_owner_t id) } #ifdef CONFIG_NFS_V4_2 -static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t count, unsigned int flags) +static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) { + /* Only offload copy if superblock is the same */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) @@ -140,6 +143,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } +static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) +{ + ssize_t ret; + + ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, + flags); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(file_in, pos_in, file_out, + pos_out, count, flags); + return ret; +} + static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) { loff_t ret; diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 4884fdae28fb..1e7296395d71 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -291,7 +291,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, - desc, "", 0, idmap); + desc, NULL, "", 0, idmap); mutex_unlock(&idmap->idmap_mutex); } if (!IS_ERR(rkey)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e38f4af20950..6418cb6c079b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1256,10 +1256,20 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); - p->o_arg.umask = current_umask(); p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); p->o_arg.share_access = nfs4_map_atomic_open_share(server, fmode, flags); + if (flags & O_CREAT) { + p->o_arg.umask = current_umask(); + p->o_arg.label = nfs4_label_copy(p->a_label, label); + if (c->sattr != NULL && c->sattr->ia_valid != 0) { + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); + + memcpy(p->o_arg.u.verifier.data, c->verf, + sizeof(p->o_arg.u.verifier.data)); + } + } /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS * will return permission denied for all bits until close */ if (!(flags & O_EXCL)) { @@ -1283,7 +1293,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.label = nfs4_label_copy(p->a_label, label); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: @@ -1296,13 +1305,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_DELEG_PREV_FH: p->o_arg.fh = NFS_FH(d_inode(dentry)); } - if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) { - p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); - - memcpy(p->o_arg.u.verifier.data, c->verf, - sizeof(p->o_arg.u.verifier.data)); - } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 52d533967485..0effeee28352 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) nfs_cancel_async_unlink(dentry); return; } - - /* - * vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(dentry, 0); } #define SILLYNAME_PREFIX ".nfs" diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 4fb1f72a25fb..66d4c55eb48e 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, { loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; - struct timespec ts; int error; - ts = timespec64_to_timespec(inode->i_mtime); if (lcp->lc_mtime.tv_nsec == UTIME_NOW || - timespec_compare(&lcp->lc_mtime, &ts) < 0) - lcp->lc_mtime = timespec64_to_timespec(current_time(inode)); + timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) + lcp->lc_mtime = current_time(inode); iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; - iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime); + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; if (new_size > i_size_read(inode)) { iattr.ia_valid |= ATTR_SIZE; diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 4a98537efb0f..10ec5ecdf117 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -10,6 +10,7 @@ #define NFSCACHE_H #include <linux/sunrpc/svc.h> +#include "netns.h" /* * Representation of a reply cache entry. @@ -77,8 +78,8 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) -int nfsd_reply_cache_init(void); -void nfsd_reply_cache_shutdown(void); +int nfsd_reply_cache_init(struct nfsd_net *); +void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); int nfsd_reply_cache_stats_open(struct inode *, struct file *); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 84831253203d..76bee0a0d308 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -127,24 +127,16 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, }; -int nfsd_fault_inject_init(void) +void nfsd_fault_inject_init(void) { unsigned int i; struct nfsd_fault_inject_op *op; umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; debug_dir = debugfs_create_dir("nfsd", NULL); - if (!debug_dir) - goto fail; for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { op = &inject_ops[i]; - if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) - goto fail; + debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd); } - return 0; - -fail: - nfsd_fault_inject_cleanup(); - return -ENOMEM; } diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 7c686a270d60..bdfe5bcb3dcd 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -42,6 +42,11 @@ struct nfsd_net { bool grace_ended; time_t boot_time; + /* internal mount of the "nfsd" pseudofilesystem: */ + struct vfsmount *nfsd_mnt; + + struct dentry *nfsd_client_dir; + /* * reclaim_str_hashtbl[] holds known client info from previous reset/reboot * used in reboot/reset lease grace period processing @@ -106,6 +111,7 @@ struct nfsd_net { */ unsigned int max_connections; + u32 clientid_base; u32 clientid_counter; u32 clverifier_counter; @@ -127,6 +133,44 @@ struct nfsd_net { */ bool *nfsd_versions; bool *nfsd4_minorversions; + + /* + * Duplicate reply cache + */ + struct nfsd_drc_bucket *drc_hashtbl; + struct kmem_cache *drc_slab; + + /* max number of entries allowed in the cache */ + unsigned int max_drc_entries; + + /* number of significant bits in the hash value */ + unsigned int maskbits; + unsigned int drc_hashsize; + + /* + * Stats and other tracking of on the duplicate reply cache. + * These fields and the "rc" fields in nfsdstats are modified + * with only the per-bucket cache lock, which isn't really safe + * and should be fixed if we want the statistics to be + * completely accurate. + */ + + /* total number of entries */ + atomic_t num_drc_entries; + + /* cache misses due only to checksum comparison failures */ + unsigned int payload_misses; + + /* amount of memory (in bytes) currently consumed by the DRC */ + unsigned int drc_mem_usage; + + /* longest hash chain seen */ + unsigned int longest_chain; + + /* size of cache when we saw the longest hash chain */ + unsigned int longest_chain_cachesize; + + struct shrinker nfsd_reply_cache_shrinker; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 2961016097ac..d1f285245af8 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->type = itm->type; strlcpy(new->name, itm->name, sizeof(new->name)); - strlcpy(new->authname, itm->authname, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->authname)); } static void diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 618e66078ee5..7857942c5ca6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -42,6 +42,7 @@ #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> #include <linux/jhash.h> +#include <linux/string_helpers.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass { */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); +/* + * A waitqueue where a writer to clients/#/ctl destroying a client can + * wait for cl_rpc_users to drop to 0 and then for the client to be + * unhashed. + */ +static DECLARE_WAIT_QUEUE_HEAD(expiry_wq); + static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; @@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); return nfs_ok; } @@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp) lockdep_assert_held(&nn->client_lock); - if (!atomic_dec_and_test(&clp->cl_refcount)) + if (!atomic_dec_and_test(&clp->cl_rpc_users)) return; if (!is_client_expired(clp)) renew_client_locked(clp); + else + wake_up_all(&expiry_wq); } static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock)) return; if (!is_client_expired(clp)) renew_client_locked(clp); + else + wake_up_all(&expiry_wq); spin_unlock(&nn->client_lock); } @@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla idr_preload(GFP_KERNEL); spin_lock(&cl->cl_lock); - new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT); + /* Reserving 0 for start of file in nfsdfs "states" file: */ + new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT); spin_unlock(&cl->cl_lock); idr_preload_end(); if (new_id < 0) @@ -1563,7 +1576,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) * Never use more than a third of the remaining memory, * unless it's the only way to give this client a slot: */ - avail = clamp_t(int, avail, slotsize, total_avail/3); + avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); @@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); + xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL); if (clp->cl_name.data == NULL) goto err_no_name; clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE, @@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) goto err_no_hashtbl; for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); - clp->cl_name.len = name.len; INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); + atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -1879,6 +1891,25 @@ err_no_name: return NULL; } +static void __free_client(struct kref *k) +{ + struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref); + struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); + + free_svc_cred(&clp->cl_cred); + kfree(clp->cl_ownerstr_hashtbl); + kfree(clp->cl_name.data); + kfree(clp->cl_nii_domain.data); + kfree(clp->cl_nii_name.data); + idr_destroy(&clp->cl_stateids); + kmem_cache_free(client_slab, clp); +} + +static void drop_client(struct nfs4_client *clp) +{ + kref_put(&clp->cl_nfsdfs.cl_ref, __free_client); +} + static void free_client(struct nfs4_client *clp) { @@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp) free_session(ses); } rpc_destroy_wait_queue(&clp->cl_cb_waitq); - free_svc_cred(&clp->cl_cred); - kfree(clp->cl_ownerstr_hashtbl); - kfree(clp->cl_name.data); - idr_destroy(&clp->cl_stateids); - kmem_cache_free(client_slab, clp); + if (clp->cl_nfsd_dentry) { + nfsd_client_rmdir(clp->cl_nfsd_dentry); + clp->cl_nfsd_dentry = NULL; + wake_up_all(&expiry_wq); + } + drop_client(clp); } /* must be called under the client_lock */ @@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp) static __be32 mark_client_expired_locked(struct nfs4_client *clp) { - if (atomic_read(&clp->cl_refcount)) + if (atomic_read(&clp->cl_rpc_users)) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; @@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); free_client(clp); + wake_up_all(&expiry_wq); } static void @@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) return s; } +static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) +{ + struct nfsdfs_client *nc; + nc = get_nfsdfs_client(inode); + if (!nc) + return NULL; + return container_of(nc, struct nfs4_client, cl_nfsdfs); +} + +static void seq_quote_mem(struct seq_file *m, char *data, int len) +{ + seq_printf(m, "\""); + seq_escape_mem_ascii(m, data, len); + seq_printf(m, "\""); +} + +static int client_info_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct nfs4_client *clp; + u64 clid; + + clp = get_nfsdfs_clp(inode); + if (!clp) + return -ENXIO; + memcpy(&clid, &clp->cl_clientid, sizeof(clid)); + seq_printf(m, "clientid: 0x%llx\n", clid); + seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); + seq_printf(m, "name: "); + seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); + seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); + if (clp->cl_nii_domain.data) { + seq_printf(m, "Implementation domain: "); + seq_quote_mem(m, clp->cl_nii_domain.data, + clp->cl_nii_domain.len); + seq_printf(m, "\nImplementation name: "); + seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); + seq_printf(m, "\nImplementation time: [%ld, %ld]\n", + clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); + } + drop_client(clp); + + return 0; +} + +static int client_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, client_info_show, inode); +} + +static const struct file_operations client_info_fops = { + .open = client_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void *states_start(struct seq_file *s, loff_t *pos) + __acquires(&clp->cl_lock) +{ + struct nfs4_client *clp = s->private; + unsigned long id = *pos; + void *ret; + + spin_lock(&clp->cl_lock); + ret = idr_get_next_ul(&clp->cl_stateids, &id); + *pos = id; + return ret; +} + +static void *states_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct nfs4_client *clp = s->private; + unsigned long id = *pos; + void *ret; + + id = *pos; + id++; + ret = idr_get_next_ul(&clp->cl_stateids, &id); + *pos = id; + return ret; +} + +static void states_stop(struct seq_file *s, void *v) + __releases(&clp->cl_lock) +{ + struct nfs4_client *clp = s->private; + + spin_unlock(&clp->cl_lock); +} + +static void nfs4_show_superblock(struct seq_file *s, struct file *f) +{ + struct inode *inode = file_inode(f); + + seq_printf(s, "superblock: \"%02x:%02x:%ld\"", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), + inode->i_ino); +} + +static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) +{ + seq_printf(s, "owner: "); + seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); +} + +static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_ol_stateid *ols; + struct nfs4_file *nf; + struct file *file; + struct nfs4_stateowner *oo; + unsigned int access, deny; + + if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID) + return 0; /* XXX: or SEQ_SKIP? */ + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; + file = find_any_file(nf); + + seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid); + + access = bmap_to_share_mode(ols->st_access_bmap); + deny = bmap_to_share_mode(ols->st_deny_bmap); + + seq_printf(s, "access: \%s\%s, ", + access & NFS4_SHARE_ACCESS_READ ? "r" : "-", + access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); + seq_printf(s, "deny: \%s\%s, ", + deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", + deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); + + nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); + fput(file); + + return 0; +} + +static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_ol_stateid *ols; + struct nfs4_file *nf; + struct file *file; + struct nfs4_stateowner *oo; + + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; + file = find_any_file(nf); + + seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid); + + /* + * Note: a lock stateid isn't really the same thing as a lock, + * it's the locking state held by one owner on a file, and there + * may be multiple (or no) lock ranges associated with it. + * (Same for the matter is true of open stateids.) + */ + + nfs4_show_superblock(s, file); + /* XXX: open stateid? */ + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); + fput(file); + + return 0; +} + +static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_delegation *ds; + struct nfs4_file *nf; + struct file *file; + + ds = delegstateid(st); + nf = st->sc_file; + file = nf->fi_deleg_file; + + seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid); + + /* Kinda dead code as long as we only support read delegs: */ + seq_printf(s, "access: %s, ", + ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + + /* XXX: lease time, whether it's being recalled. */ + + nfs4_show_superblock(s, file); + seq_printf(s, " }\n"); + + return 0; +} + +static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_layout_stateid *ls; + struct file *file; + + ls = container_of(st, struct nfs4_layout_stateid, ls_stid); + file = ls->ls_file; + + seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid); + + /* XXX: What else would be useful? */ + + nfs4_show_superblock(s, file); + seq_printf(s, " }\n"); + + return 0; +} + +static int states_show(struct seq_file *s, void *v) +{ + struct nfs4_stid *st = v; + + switch (st->sc_type) { + case NFS4_OPEN_STID: + return nfs4_show_open(s, st); + case NFS4_LOCK_STID: + return nfs4_show_lock(s, st); + case NFS4_DELEG_STID: + return nfs4_show_deleg(s, st); + case NFS4_LAYOUT_STID: + return nfs4_show_layout(s, st); + default: + return 0; /* XXX: or SEQ_SKIP? */ + } + /* XXX: copy stateids? */ +} + +static struct seq_operations states_seq_ops = { + .start = states_start, + .next = states_next, + .stop = states_stop, + .show = states_show +}; + +static int client_states_open(struct inode *inode, struct file *file) +{ + struct seq_file *s; + struct nfs4_client *clp; + int ret; + + clp = get_nfsdfs_clp(inode); + if (!clp) + return -ENXIO; + + ret = seq_open(file, &states_seq_ops); + if (ret) + return ret; + s = file->private_data; + s->private = clp; + return 0; +} + +static int client_opens_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct nfs4_client *clp = m->private; + + /* XXX: alternatively, we could get/drop in seq start/stop */ + drop_client(clp); + return 0; +} + +static const struct file_operations client_states_fops = { + .open = client_states_open, + .read = seq_read, + .llseek = seq_lseek, + .release = client_opens_release, +}; + +/* + * Normally we refuse to destroy clients that are in use, but here the + * administrator is telling us to just do it. We also want to wait + * so the caller has a guarantee that the client's locks are gone by + * the time the write returns: + */ +static void force_expire_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + bool already_expired; + + spin_lock(&clp->cl_lock); + clp->cl_time = 0; + spin_unlock(&clp->cl_lock); + + wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); + spin_lock(&nn->client_lock); + already_expired = list_empty(&clp->cl_lru); + if (!already_expired) + unhash_client_locked(clp); + spin_unlock(&nn->client_lock); + + if (!already_expired) + expire_client(clp); + else + wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL); +} + +static ssize_t client_ctl_write(struct file *file, const char __user *buf, + size_t size, loff_t *pos) +{ + char *data; + struct nfs4_client *clp; + + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); + if (size != 7 || 0 != memcmp(data, "expire\n", 7)) + return -EINVAL; + clp = get_nfsdfs_clp(file_inode(file)); + if (!clp) + return -ENXIO; + force_expire_client(clp); + drop_client(clp); + return 7; +} + +static const struct file_operations client_ctl_fops = { + .write = client_ctl_write, + .release = simple_transaction_release, +}; + +static const struct tree_descr client_files[] = { + [0] = {"info", &client_info_fops, S_IRUSR}, + [1] = {"states", &client_states_fops, S_IRUSR}, + [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, + [3] = {""}, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + gen_clid(clp, nn); + kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); - rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; + clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files); + if (!clp->cl_nfsd_dentry) { + free_client(clp); + return NULL; + } return clp; } @@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp) || !list_empty(&clp->async_copies); } +static __be32 copy_impl_id(struct nfs4_client *clp, + struct nfsd4_exchange_id *exid) +{ + if (!exid->nii_domain.data) + return 0; + xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL); + if (!clp->cl_nii_domain.data) + return nfserr_jukebox; + xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); + if (!clp->cl_nii_name.data) + return nfserr_jukebox; + clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec; + clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec; + return 0; +} + __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new = create_client(exid->clname, rqstp, &verf); if (new == NULL) return nfserr_jukebox; + status = copy_impl_id(new, exid); + if (status) + goto out_nolock; switch (exid->spa_how) { case SP4_MACH_CRED: @@ -2667,7 +3065,6 @@ out_new: new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1]; - gen_clid(new, nn); add_to_unconfirmed(new); swap(new, conf); out_copy: @@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy_clid(new, conf); gen_confirm(new, nn); } else /* case 4 (new client) or cases 2, 3 (client reboot): */ - gen_clid(new, nn); + ; new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj if (!sop) return NULL; - sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); + xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL); if (!sop->so_owner.data) { kmem_cache_free(slab, sop); return NULL; } - sop->so_owner.len = owner->len; INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; @@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid, spin_unlock(&nn->client_lock); return nfserr_expired; } - atomic_inc(&found->cl_refcount); + atomic_inc(&found->cl_rpc_users); spin_unlock(&nn->client_lock); /* Cache the nfs4_client in cstate! */ @@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) if (fl->fl_lmops == &nfsd_posix_mng_ops) { lo = (struct nfs4_lockowner *) fl->fl_owner; - deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, - lo->lo_owner.so_owner.len, GFP_KERNEL); + xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner, + GFP_KERNEL); if (!deny->ld_owner.data) /* We just don't care that much */ goto nevermind; - deny->ld_owner.len = lo->lo_owner.so_owner.len; deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { nevermind: @@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid, static inline void put_client(struct nfs4_client *clp) { - atomic_dec(&clp->cl_refcount); + atomic_dec(&clp->cl_rpc_users); } static struct nfs4_client * @@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, return; lockdep_assert_held(&nn->client_lock); - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); list_add(&lst->st_locks, collect); } @@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, * Despite the fact that these functions deal * with 64-bit integers for "count", we must * ensure that it doesn't blow up the - * clp->cl_refcount. Throw a warning if we + * clp->cl_rpc_users. Throw a warning if we * start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); @@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, if (func) { func(oop); if (collect) { - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); list_add(&oop->oo_perclient, collect); } } @@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, /* * Despite the fact that these functions deal with * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_refcount. Throw a + * it doesn't blow up the clp->cl_rpc_users. Throw a * warning if we start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); @@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, if (dp->dl_time != 0) continue; - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, victims); } @@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, /* * Despite the fact that these functions deal with * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_refcount. Throw a + * it doesn't blow up the clp->cl_rpc_users. Throw a * warning if we start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 52c4f6daa649..442811809f3d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) return ret; } -/* - * We require the high 32 bits of 'seconds' to be 0, and - * we ignore all 32 bits of 'nseconds'. - */ static __be32 -nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv) +nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { DECODE_HEAD; - u64 sec; READ_BUF(12); - p = xdr_decode_hyper(p, &sec); - tv->tv_sec = sec; + p = xdr_decode_hyper(p, &tv->tv_sec); tv->tv_nsec = be32_to_cpup(p++); if (tv->tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, struct xdr_netobj *label, int *umask) { - struct timespec ts; int expected_len, len = 0; u32 dummy32; char *buf; @@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &ts); - iattr->ia_atime = timespec_to_timespec64(ts); + status = nfsd4_decode_time(argp, &iattr->ia_atime); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &ts); - iattr->ia_mtime = timespec_to_timespec64(ts); + status = nfsd4_decode_time(argp, &iattr->ia_mtime); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, goto xdr_error; } - /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ dummy = be32_to_cpup(p++); @@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, goto xdr_error; if (dummy == 1) { - /* nii_domain */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + status = nfsd4_decode_opaque(argp, &exid->nii_domain); + if (status) + goto xdr_error; /* nii_name */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + status = nfsd4_decode_opaque(argp, &exid->nii_name); + if (status) + goto xdr_error; /* nii_date */ - READ_BUF(12); - p += 3; + status = nfsd4_decode_time(argp, &exid->nii_time); + if (status) + goto xdr_error; } DECODE_TAIL; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index da52b594362a..26ad75ae2be0 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,6 +9,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/sunrpc/svc_xprt.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/sunrpc/addr.h> @@ -35,48 +36,12 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; -static struct nfsd_drc_bucket *drc_hashtbl; -static struct kmem_cache *drc_slab; - -/* max number of entries allowed in the cache */ -static unsigned int max_drc_entries; - -/* number of significant bits in the hash value */ -static unsigned int maskbits; -static unsigned int drc_hashsize; - -/* - * Stats and other tracking of on the duplicate reply cache. All of these and - * the "rc" fields in nfsdstats are protected by the cache_lock - */ - -/* total number of entries */ -static atomic_t num_drc_entries; - -/* cache misses due only to checksum comparison failures */ -static unsigned int payload_misses; - -/* amount of memory (in bytes) currently consumed by the DRC */ -static unsigned int drc_mem_usage; - -/* longest hash chain seen */ -static unsigned int longest_chain; - -/* size of cache when we saw the longest hash chain */ -static unsigned int longest_chain_cachesize; - static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc); -static struct shrinker nfsd_reply_cache_shrinker = { - .scan_objects = nfsd_reply_cache_scan, - .count_objects = nfsd_reply_cache_count, - .seeks = 1, -}; - /* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. @@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = { * ...with a hard cap of 256k entries. In the worst case, each entry will be * ~1k, so the above numbers should give a rough max of the amount of memory * used in k. + * + * XXX: these limits are per-container, so memory used will increase + * linearly with number of containers. Maybe that's OK. */ static unsigned int nfsd_cache_size_limit(void) @@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit) } static u32 -nfsd_cache_hash(__be32 xid) +nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) { - return hash_32(be32_to_cpu(xid), maskbits); + return hash_32(be32_to_cpu(xid), nn->maskbits); } static struct svc_cacherep * -nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) +nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, + struct nfsd_net *nn) { struct svc_cacherep *rp; - rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) } static void -nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) +nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, + struct nfsd_net *nn) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { - drc_mem_usage -= rp->c_replvec.iov_len; + nn->drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } if (rp->c_state != RC_UNUSED) { rb_erase(&rp->c_node, &b->rb_head); list_del(&rp->c_lru); - atomic_dec(&num_drc_entries); - drc_mem_usage -= sizeof(*rp); + atomic_dec(&nn->num_drc_entries); + nn->drc_mem_usage -= sizeof(*rp); } - kmem_cache_free(drc_slab, rp); + kmem_cache_free(nn->drc_slab, rp); } static void -nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, + struct nfsd_net *nn) { spin_lock(&b->cache_lock); - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); spin_unlock(&b->cache_lock); } -int nfsd_reply_cache_init(void) +int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; unsigned int i; int status = 0; - max_drc_entries = nfsd_cache_size_limit(); - atomic_set(&num_drc_entries, 0); - hashsize = nfsd_hashsize(max_drc_entries); - maskbits = ilog2(hashsize); + nn->max_drc_entries = nfsd_cache_size_limit(); + atomic_set(&nn->num_drc_entries, 0); + hashsize = nfsd_hashsize(nn->max_drc_entries); + nn->maskbits = ilog2(hashsize); - status = register_shrinker(&nfsd_reply_cache_shrinker); + nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; + nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; + nn->nfsd_reply_cache_shrinker.seeks = 1; + status = register_shrinker(&nn->nfsd_reply_cache_shrinker); if (status) - return status; - - drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), - 0, 0, NULL); - if (!drc_slab) goto out_nomem; - drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); - if (!drc_hashtbl) { - drc_hashtbl = vzalloc(array_size(hashsize, - sizeof(*drc_hashtbl))); - if (!drc_hashtbl) - goto out_nomem; + nn->drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + if (!nn->drc_slab) + goto out_shrinker; + + nn->drc_hashtbl = kcalloc(hashsize, + sizeof(*nn->drc_hashtbl), GFP_KERNEL); + if (!nn->drc_hashtbl) { + nn->drc_hashtbl = vzalloc(array_size(hashsize, + sizeof(*nn->drc_hashtbl))); + if (!nn->drc_hashtbl) + goto out_slab; } for (i = 0; i < hashsize; i++) { - INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); - spin_lock_init(&drc_hashtbl[i].cache_lock); + INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); + spin_lock_init(&nn->drc_hashtbl[i].cache_lock); } - drc_hashsize = hashsize; + nn->drc_hashsize = hashsize; return 0; +out_slab: + kmem_cache_destroy(nn->drc_slab); +out_shrinker: + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); - nfsd_reply_cache_shutdown(); return -ENOMEM; } -void nfsd_reply_cache_shutdown(void) +void nfsd_reply_cache_shutdown(struct nfsd_net *nn) { struct svc_cacherep *rp; unsigned int i; - unregister_shrinker(&nfsd_reply_cache_shrinker); + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); - for (i = 0; i < drc_hashsize; i++) { - struct list_head *head = &drc_hashtbl[i].lru_head; + for (i = 0; i < nn->drc_hashsize; i++) { + struct list_head *head = &nn->drc_hashtbl[i].lru_head; while (!list_empty(head)) { rp = list_first_entry(head, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp); + nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i], + rp, nn); } } - kvfree(drc_hashtbl); - drc_hashtbl = NULL; - drc_hashsize = 0; + kvfree(nn->drc_hashtbl); + nn->drc_hashtbl = NULL; + nn->drc_hashsize = 0; - kmem_cache_destroy(drc_slab); - drc_slab = NULL; + kmem_cache_destroy(nn->drc_slab); + nn->drc_slab = NULL; } /* @@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) } static long -prune_bucket(struct nfsd_drc_bucket *b) +prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b) */ if (rp->c_state == RC_INPROG) continue; - if (atomic_read(&num_drc_entries) <= max_drc_entries && + if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); freed++; } return freed; @@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b) * Also prune the oldest ones when the total exceeds the max number of entries. */ static long -prune_cache_entries(void) +prune_cache_entries(struct nfsd_net *nn) { unsigned int i; long freed = 0; - for (i = 0; i < drc_hashsize; i++) { - struct nfsd_drc_bucket *b = &drc_hashtbl[i]; + for (i = 0; i < nn->drc_hashsize; i++) { + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i]; if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b); + freed += prune_bucket(b, nn); spin_unlock(&b->cache_lock); } return freed; @@ -292,13 +271,19 @@ prune_cache_entries(void) static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - return atomic_read(&num_drc_entries); + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_reply_cache_shrinker); + + return atomic_read(&nn->num_drc_entries); } static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - return prune_cache_entries(); + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_reply_cache_shrinker); + + return prune_cache_entries(nn); } /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes @@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp) } static int -nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp) +nfsd_cache_key_cmp(const struct svc_cacherep *key, + const struct svc_cacherep *rp, struct nfsd_net *nn) { if (key->c_key.k_xid == rp->c_key.k_xid && key->c_key.k_csum != rp->c_key.k_csum) - ++payload_misses; + ++nn->payload_misses; return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } @@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp * inserts an empty key on failure. */ static struct svc_cacherep * -nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) +nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key, + struct nfsd_net *nn) { struct svc_cacherep *rp, *ret = key; struct rb_node **p = &b->rb_head.rb_node, @@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) parent = *p; rp = rb_entry(parent, struct svc_cacherep, c_node); - cmp = nfsd_cache_key_cmp(key, rp); + cmp = nfsd_cache_key_cmp(key, rp, nn); if (cmp < 0) p = &parent->rb_left; else if (cmp > 0) @@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) rb_insert_color(&key->c_node, &b->rb_head); out: /* tally hash chain length stats */ - if (entries > longest_chain) { - longest_chain = entries; - longest_chain_cachesize = atomic_read(&num_drc_entries); - } else if (entries == longest_chain) { + if (entries > nn->longest_chain) { + nn->longest_chain = entries; + nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries); + } else if (entries == nn->longest_chain) { /* prefer to keep the smallest cachesize possible here */ - longest_chain_cachesize = min_t(unsigned int, - longest_chain_cachesize, - atomic_read(&num_drc_entries)); + nn->longest_chain_cachesize = min_t(unsigned int, + nn->longest_chain_cachesize, + atomic_read(&nn->num_drc_entries)); } lru_put_end(b, ret); @@ -400,11 +387,12 @@ out: int nfsd_cache_lookup(struct svc_rqst *rqstp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; __wsum csum; - u32 hash = nfsd_cache_hash(xid); - struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; + u32 hash = nfsd_cache_hash(xid, nn); + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ - rp = nfsd_reply_cache_alloc(rqstp, csum); + rp = nfsd_reply_cache_alloc(rqstp, csum, nn); if (!rp) { dprintk("nfsd: unable to allocate DRC entry!\n"); return rtn; } spin_lock(&b->cache_lock); - found = nfsd_cache_insert(b, rp); + found = nfsd_cache_insert(b, rp, nn); if (found != rp) { - nfsd_reply_cache_free_locked(NULL, rp); + nfsd_reply_cache_free_locked(NULL, rp, nn); rp = found; goto found_entry; } @@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; - atomic_inc(&num_drc_entries); - drc_mem_usage += sizeof(*rp); + atomic_inc(&nn->num_drc_entries); + nn->drc_mem_usage += sizeof(*rp); /* go ahead and prune the cache */ - prune_bucket(b); + prune_bucket(b, nn); out: spin_unlock(&b->cache_lock); return rtn; @@ -477,7 +465,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); } goto out; @@ -502,6 +490,7 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; u32 hash; @@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_key.k_xid); - b = &drc_hashtbl[hash]; + hash = nfsd_cache_hash(rp->c_key.k_xid, nn); + b = &nn->drc_hashtbl[hash]; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } @@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) bufsize = len << 2; cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } cachv->iov_len = bufsize; memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } spin_lock(&b->cache_lock); - drc_mem_usage += bufsize; + nn->drc_mem_usage += bufsize; lru_put_end(b, rp); rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_type = cachetype; @@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - seq_printf(m, "max entries: %u\n", max_drc_entries); + struct nfsd_net *nn = v; + + seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", - atomic_read(&num_drc_entries)); - seq_printf(m, "hash buckets: %u\n", 1 << maskbits); - seq_printf(m, "mem usage: %u\n", drc_mem_usage); + atomic_read(&nn->num_drc_entries)); + seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); + seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); - seq_printf(m, "payload misses: %u\n", payload_misses); - seq_printf(m, "longest chain len: %u\n", longest_chain); - seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); + seq_printf(m, "payload misses: %u\n", nn->payload_misses); + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); + seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) { - return single_open(file, nfsd_reply_cache_stats_show, NULL); + struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, + nfsd_net_id); + + return single_open(file, nfsd_reply_cache_stats_show, nn); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 62c58cfeb8d8..0a9a49ded546 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -16,6 +16,7 @@ #include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/module.h> +#include <linux/fsnotify.h> #include "idmap.h" #include "nfsd.h" @@ -53,6 +54,7 @@ enum { NFSD_RecoveryDir, NFSD_V4EndGrace, #endif + NFSD_MaxReserved }; /* @@ -1147,8 +1149,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) * populating the filesystem. */ +/* Basically copying rpc_get_inode. */ +static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) +{ + struct inode *inode = new_inode(sb); + if (!inode) + return NULL; + /* Following advice from simple_fill_super documentation: */ + inode->i_ino = iunique(sb, NFSD_MaxReserved); + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + switch (mode & S_IFMT) { + case S_IFDIR: + inode->i_fop = &simple_dir_operations; + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + default: + break; + } + return inode; +} + +static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + + inode = nfsd_get_inode(dir->i_sb, mode); + if (!inode) + return -ENOMEM; + d_add(dentry, inode); + inc_nlink(dir); + fsnotify_mkdir(dir, dentry); + return 0; +} + +static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int ret = -ENOMEM; + + inode_lock(dir); + dentry = d_alloc_name(parent, name); + if (!dentry) + goto out_err; + ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600); + if (ret) + goto out_err; + if (ncl) { + d_inode(dentry)->i_private = ncl; + kref_get(&ncl->cl_ref); + } +out: + inode_unlock(dir); + return dentry; +out_err: + dentry = ERR_PTR(ret); + goto out; +} + +static void clear_ncl(struct inode *inode) +{ + struct nfsdfs_client *ncl = inode->i_private; + + inode->i_private = NULL; + synchronize_rcu(); + kref_put(&ncl->cl_ref, ncl->cl_release); +} + + +static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode) +{ + struct nfsdfs_client *nc = inode->i_private; + + if (nc) + kref_get(&nc->cl_ref); + return nc; +} + +struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) +{ + struct nfsdfs_client *nc; + + rcu_read_lock(); + nc = __get_nfsdfs_client(inode); + rcu_read_unlock(); + return nc; +} +/* from __rpc_unlink */ +static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) +{ + int ret; + + clear_ncl(d_inode(dentry)); + dget(dentry); + ret = simple_unlink(dir, dentry); + d_delete(dentry); + dput(dentry); + WARN_ON_ONCE(ret); +} + +static void nfsdfs_remove_files(struct dentry *root) +{ + struct dentry *dentry, *tmp; + + list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) { + if (!simple_positive(dentry)) { + WARN_ON_ONCE(1); /* I think this can't happen? */ + continue; + } + nfsdfs_remove_file(d_inode(root), dentry); + } +} + +/* XXX: cut'n'paste from simple_fill_super; figure out if we could share + * code instead. */ +static int nfsdfs_create_files(struct dentry *root, + const struct tree_descr *files) +{ + struct inode *dir = d_inode(root); + struct inode *inode; + struct dentry *dentry; + int i; + + inode_lock(dir); + for (i = 0; files->name && files->name[0]; i++, files++) { + if (!files->name) + continue; + dentry = d_alloc_name(root, files->name); + if (!dentry) + goto out; + inode = nfsd_get_inode(d_inode(root)->i_sb, + S_IFREG | files->mode); + if (!inode) { + dput(dentry); + goto out; + } + inode->i_fop = files->ops; + inode->i_private = __get_nfsdfs_client(dir); + d_add(dentry, inode); + fsnotify_create(dir, dentry); + } + inode_unlock(dir); + return 0; +out: + nfsdfs_remove_files(root); + inode_unlock(dir); + return -ENOMEM; +} + +/* on success, returns positive number unique to that client. */ +struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files) +{ + struct dentry *dentry; + char name[11]; + int ret; + + sprintf(name, "%u", id); + + dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); + if (IS_ERR(dentry)) /* XXX: tossing errors? */ + return NULL; + ret = nfsdfs_create_files(dentry, files); + if (ret) { + nfsd_client_rmdir(dentry); + return NULL; + } + return dentry; +} + +/* Taken from __rpc_rmdir: */ +void nfsd_client_rmdir(struct dentry *dentry) +{ + struct inode *dir = d_inode(dentry->d_parent); + struct inode *inode = d_inode(dentry); + int ret; + + inode_lock(dir); + nfsdfs_remove_files(dentry); + clear_ncl(inode); + dget(dentry); + ret = simple_rmdir(dir, dentry); + WARN_ON_ONCE(ret); + d_delete(dentry); + inode_unlock(dir); +} + static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + struct dentry *dentry; + int ret; + static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", @@ -1178,7 +1373,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) /* last one */ {""} }; get_net(sb->s_fs_info); - return simple_fill_super(sb, 0x6e667364, nfsd_files); + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + nn->nfsd_client_dir = dentry; + return 0; + } static struct dentry *nfsd_mount(struct file_system_type *fs_type, @@ -1232,6 +1435,7 @@ unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; + struct vfsmount *mnt; struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); @@ -1242,18 +1446,33 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + retval = nfsd_reply_cache_init(nn); + if (retval) + goto out_drc_error; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; nn->track_reclaim_completes = false; nn->clverifier_counter = prandom_u32(); - nn->clientid_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->ntf_refcnt, 0); init_waitqueue_head(&nn->ntf_wq); + + mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); + if (IS_ERR(mnt)) { + retval = PTR_ERR(mnt); + goto out_mount_err; + } + nn->nfsd_mnt = mnt; return 0; +out_mount_err: + nfsd_reply_cache_shutdown(nn); +out_drc_error: + nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: @@ -1262,6 +1481,10 @@ out_export_error: static __net_exit void nfsd_exit_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + mntput(nn->nfsd_mnt); + nfsd_reply_cache_shutdown(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); @@ -1291,13 +1514,8 @@ static int __init init_nfsd(void) retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; - retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ - if (retval) - goto out_exit_pnfs; + nfsd_fault_inject_init(); /* nfsd fault injection controls */ nfsd_stat_init(); /* Statistics */ - retval = nfsd_reply_cache_init(); - if (retval) - goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1311,11 +1529,8 @@ out_free_all: remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); - nfsd_reply_cache_shutdown(); -out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); -out_exit_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); @@ -1328,7 +1543,6 @@ out_unregister_pernet: static void __exit exit_nfsd(void) { - nfsd_reply_cache_shutdown(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 24187b5dd638..af2947551e9c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -22,6 +22,7 @@ #include <uapi/linux/nfsd/debug.h> +#include "netns.h" #include "stats.h" #include "export.h" @@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +struct nfsdfs_client { + struct kref cl_ref; + void (*cl_release)(struct kref *kref); +}; + +struct nfsdfs_client *get_nfsdfs_client(struct inode *); +struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, + struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); +void nfsd_client_rmdir(struct dentry *dentry); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern const struct svc_version nfsd_acl_version2; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0b74d371ed67..5dbd16946e8e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -39,6 +39,7 @@ #include <linux/refcount.h> #include <linux/sunrpc/svc_xprt.h> #include "nfsfh.h" +#include "nfsd.h" typedef struct { u32 cl_boot; @@ -316,6 +317,10 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ u32 cl_minorversion; + /* NFSv4.1 client implementation id: */ + struct xdr_netobj cl_nii_domain; + struct xdr_netobj cl_nii_name; + struct timespec cl_nii_time; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; @@ -347,9 +352,13 @@ struct nfs4_client { struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; /* number of rpc's in progress over an associated session: */ - atomic_t cl_refcount; + atomic_t cl_rpc_users; + struct nfsdfs_client cl_nfsdfs; struct nfs4_op_map cl_spo_must_allow; + /* debugging info directory under nfsd/clients/ : */ + struct dentry *cl_nfsd_dentry; + /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; @@ -663,7 +672,7 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn); /* nfs fault injection functions */ #ifdef CONFIG_NFSD_FAULT_INJECTION -int nfsd_fault_inject_init(void); +void nfsd_fault_inject_init(void); void nfsd_fault_inject_cleanup(void); u64 nfsd_inject_print_clients(void); @@ -684,7 +693,7 @@ u64 nfsd_inject_forget_delegations(u64); u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); u64 nfsd_inject_recall_delegations(u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_init(void) {} static inline void nfsd_fault_inject_cleanup(void) {} #endif /* CONFIG_NFSD_FAULT_INJECTION */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fc24ee47eab5..c85783e536d5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* * If utimes(2) and friends are called with times not NULL, we should * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission - * will return EACCESS, when the caller's effective UID does not match + * will return EACCES, when the caller's effective UID does not match * the owner of the file, and the caller is not privileged. In this * situation, we should return EPERM(notify_change will return this). */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index feeb6d4bdffd..d64c870f998a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -410,6 +410,9 @@ struct nfsd4_exchange_id { int spa_how; u32 spo_must_enforce[3]; u32 spo_must_allow[3]; + struct xdr_netobj nii_domain; + struct xdr_netobj nii_name; + struct timespec64 nii_time; }; struct nfsd4_sequence { @@ -472,7 +475,7 @@ struct nfsd4_layoutcommit { u32 lc_reclaim; /* request */ u32 lc_newoffset; /* request */ u64 lc_last_wr; /* request */ - struct timespec lc_mtime; /* request */ + struct timespec64 lc_mtime; /* request */ u32 lc_layout_type; /* request */ u32 lc_up_len; /* layout length */ void *lc_up_layout; /* decoded by callback */ diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e6fde1a5c072..5778d1347b35 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -288,10 +288,13 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, /* * For queues with unlimited length lost events are not expected and * can possibly have security implications. Avoid losing events when - * memory is short. + * memory is short. For the limited size queues, avoid OOM killer in the + * target monitoring memcg as it may have security repercussion. */ if (group->max_events == UINT_MAX) gfp |= __GFP_NOFAIL; + else + gfp |= __GFP_RETRY_MAYFAIL; /* Whoever is interested in the event, pays for the allocation. */ memalloc_use_memcg(group->memcg); @@ -355,6 +358,10 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) /* Mark is just getting destroyed or created? */ if (!conn) continue; + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) + continue; + /* Pairs with smp_wmb() in fsnotify_add_mark_list() */ + smp_rmb(); fsid = conn->fsid; if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) continue; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a90bb19dcfa2..91006f47e420 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return 0; } +static int fanotify_events_supported(struct path *path, __u64 mask) +{ + /* + * Some filesystems such as 'proc' acquire unusual locks when opening + * files. For them fanotify permission events have high chances of + * deadlocking the system - open done when reporting fanotify event + * blocks on this "unusual" lock while another process holding the lock + * waits for fanotify permission event to be answered. Just disallow + * permission events for such filesystems. + */ + if (mask & FANOTIFY_PERM_EVENTS && + path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) + return -EINVAL; + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (flags & FAN_MARK_ADD) { + ret = fanotify_events_supported(&path, mask); + if (ret) + goto path_put_and_out; + } + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { ret = fanotify_test_fid(&path, &__fsid); if (ret) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4eb2ebfac468..2ecef6155fc0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -95,47 +95,6 @@ void fsnotify_sb_delete(struct super_block *sb) } /* - * fsnotify_nameremove - a filename was removed from a directory - * - * This is mostly called under parent vfs inode lock so name and - * dentry->d_parent should be stable. However there are some corner cases where - * inode lock is not held. So to be on the safe side and be reselient to future - * callers and out of tree users of d_delete(), we do not assume that d_parent - * and d_name are stable and we use dget_parent() and - * take_dentry_name_snapshot() to grab stable references. - */ -void fsnotify_nameremove(struct dentry *dentry, int isdir) -{ - struct dentry *parent; - struct name_snapshot name; - __u32 mask = FS_DELETE; - - /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ - if (IS_ROOT(dentry)) - return; - - if (isdir) - mask |= FS_ISDIR; - - parent = dget_parent(dentry); - /* Avoid unneeded take_dentry_name_snapshot() */ - if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) && - !(dentry->d_sb->s_fsnotify_mask & FS_DELETE)) - goto out_dput; - - take_dentry_name_snapshot(&name, dentry); - - fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &name.name, 0); - - release_dentry_name_snapshot(&name); - -out_dput: - dput(parent); -} -EXPORT_SYMBOL(fsnotify_nameremove); - -/* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 2fda08b2b885..d510223d302c 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -90,9 +90,13 @@ int inotify_handle_event(struct fsnotify_group *group, i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); - /* Whoever is interested in the event, pays for the allocation. */ + /* + * Whoever is interested in the event, pays for the allocation. Do not + * trigger OOM killer in the target monitoring memcg as it may have + * security repercussion. + */ memalloc_use_memcg(group->memcg); - event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT); + event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); memalloc_unuse_memcg(); if (unlikely(!event)) { diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 25eb247ea85a..99ddd126f6f0 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -482,10 +482,13 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->type = type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ - if (fsid) + if (fsid) { conn->fsid = *fsid; - else + conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID; + } else { conn->fsid.val[0] = conn->fsid.val[1] = 0; + conn->flags = 0; + } if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -560,7 +563,12 @@ restart: if (err) return err; goto restart; - } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) { + conn->fsid = *fsid; + /* Pairs with smp_rmb() in fanotify_get_fsid() */ + smp_wmb(); + conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID; + } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) && (fsid->val[0] != conn->fsid.val[0] || fsid->val[1] != conn->fsid.val[1])) { /* diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index d1348fc4ca6d..0c335b51043d 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6191,17 +6191,17 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, if (le16_to_cpu(tl->tl_used)) { trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used)); - *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); + /* + * Assuming the write-out below goes well, this copy will be + * passed back to recovery for processing. + */ + *tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL); if (!(*tl_copy)) { status = -ENOMEM; mlog_errno(status); goto bail; } - /* Assuming the write-out below goes well, this copy - * will be passed back to recovery for processing. */ - memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); - /* All we need to do to clear the truncate log is set * tl_used. */ tl->tl_used = 0; diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 005b813a56b6..429e6a8359a5 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -242,57 +242,29 @@ static struct dentry *blockcheck_debugfs_create(const char *name, static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) { if (stats) { - debugfs_remove(stats->b_debug_check); - stats->b_debug_check = NULL; - debugfs_remove(stats->b_debug_failure); - stats->b_debug_failure = NULL; - debugfs_remove(stats->b_debug_recover); - stats->b_debug_recover = NULL; - debugfs_remove(stats->b_debug_dir); + debugfs_remove_recursive(stats->b_debug_dir); stats->b_debug_dir = NULL; } } -static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent) +static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) { - int rc = -EINVAL; - - if (!stats) - goto out; - stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); - if (!stats->b_debug_dir) - goto out; - stats->b_debug_check = - blockcheck_debugfs_create("blocks_checked", - stats->b_debug_dir, - &stats->b_check_count); + blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir, + &stats->b_check_count); - stats->b_debug_failure = - blockcheck_debugfs_create("checksums_failed", - stats->b_debug_dir, - &stats->b_failure_count); + blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir, + &stats->b_failure_count); - stats->b_debug_recover = - blockcheck_debugfs_create("ecc_recoveries", - stats->b_debug_dir, - &stats->b_recover_count); - if (stats->b_debug_check && stats->b_debug_failure && - stats->b_debug_recover) - rc = 0; - -out: - if (rc) - ocfs2_blockcheck_debug_remove(stats); - return rc; + blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir, + &stats->b_recover_count); } #else -static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent) +static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) { - return 0; } static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) @@ -301,10 +273,10 @@ static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats * #endif /* CONFIG_DEBUG_FS */ /* Always-called wrappers for starting and stopping the debugfs files */ -int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent) +void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) { - return ocfs2_blockcheck_debug_install(stats, parent); + ocfs2_blockcheck_debug_install(stats, parent); } void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h index f2d2689407fa..8f17d2c85f40 100644 --- a/fs/ocfs2/blockcheck.h +++ b/fs/ocfs2/blockcheck.h @@ -25,9 +25,6 @@ struct ocfs2_blockcheck_stats { * ocfs2_blockcheck_stats_debugfs_install() */ struct dentry *b_debug_dir; /* Parent of the debugfs files */ - struct dentry *b_debug_check; /* Exposes b_check_count */ - struct dentry *b_debug_failure; /* Exposes b_failure_count */ - struct dentry *b_debug_recover; /* Exposes b_recover_count */ }; @@ -56,8 +53,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, struct ocfs2_blockcheck_stats *stats); /* Debug Initialization */ -int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent); +void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent); void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats); /* diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 7a3a096856a8..f1b613327ac8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -92,10 +92,6 @@ static struct o2hb_debug_buf *o2hb_db_failedregions; #define O2HB_DEBUG_REGION_PINNED "pinned" static struct dentry *o2hb_debug_dir; -static struct dentry *o2hb_debug_livenodes; -static struct dentry *o2hb_debug_liveregions; -static struct dentry *o2hb_debug_quorumregions; -static struct dentry *o2hb_debug_failedregions; static LIST_HEAD(o2hb_all_regions); @@ -1184,7 +1180,7 @@ bail: if (atomic_read(®->hr_steady_iterations) != 0) { if (atomic_dec_and_test(®->hr_unsteady_iterations)) { printk(KERN_NOTICE "o2hb: Unable to stabilize " - "heartbeart on region %s (%s)\n", + "heartbeat on region %s (%s)\n", config_item_name(®->hr_item), reg->hr_dev_name); atomic_set(®->hr_steady_iterations, 0); @@ -1391,11 +1387,7 @@ static const struct file_operations o2hb_debug_fops = { void o2hb_exit(void) { - debugfs_remove(o2hb_debug_failedregions); - debugfs_remove(o2hb_debug_quorumregions); - debugfs_remove(o2hb_debug_liveregions); - debugfs_remove(o2hb_debug_livenodes); - debugfs_remove(o2hb_debug_dir); + debugfs_remove_recursive(o2hb_debug_dir); kfree(o2hb_db_livenodes); kfree(o2hb_db_liveregions); kfree(o2hb_db_quorumregions); @@ -1419,79 +1411,37 @@ static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, &o2hb_debug_fops); } -static int o2hb_debug_init(void) +static void o2hb_debug_init(void) { - int ret = -ENOMEM; - o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); - if (!o2hb_debug_dir) { - mlog_errno(ret); - goto bail; - } - o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, - o2hb_debug_dir, - &o2hb_db_livenodes, - sizeof(*o2hb_db_livenodes), - O2HB_DB_TYPE_LIVENODES, - sizeof(o2hb_live_node_bitmap), - O2NM_MAX_NODES, - o2hb_live_node_bitmap); - if (!o2hb_debug_livenodes) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_LIVENODES, o2hb_debug_dir, + &o2hb_db_livenodes, sizeof(*o2hb_db_livenodes), + O2HB_DB_TYPE_LIVENODES, sizeof(o2hb_live_node_bitmap), + O2NM_MAX_NODES, o2hb_live_node_bitmap); - o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, - o2hb_debug_dir, - &o2hb_db_liveregions, - sizeof(*o2hb_db_liveregions), - O2HB_DB_TYPE_LIVEREGIONS, - sizeof(o2hb_live_region_bitmap), - O2NM_MAX_REGIONS, - o2hb_live_region_bitmap); - if (!o2hb_debug_liveregions) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, o2hb_debug_dir, + &o2hb_db_liveregions, sizeof(*o2hb_db_liveregions), + O2HB_DB_TYPE_LIVEREGIONS, + sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS, + o2hb_live_region_bitmap); - o2hb_debug_quorumregions = - o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, - o2hb_debug_dir, - &o2hb_db_quorumregions, - sizeof(*o2hb_db_quorumregions), - O2HB_DB_TYPE_QUORUMREGIONS, - sizeof(o2hb_quorum_region_bitmap), - O2NM_MAX_REGIONS, - o2hb_quorum_region_bitmap); - if (!o2hb_debug_quorumregions) { - mlog_errno(ret); - goto bail; - } - - o2hb_debug_failedregions = - o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, - o2hb_debug_dir, - &o2hb_db_failedregions, - sizeof(*o2hb_db_failedregions), - O2HB_DB_TYPE_FAILEDREGIONS, - sizeof(o2hb_failed_region_bitmap), - O2NM_MAX_REGIONS, - o2hb_failed_region_bitmap); - if (!o2hb_debug_failedregions) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, o2hb_debug_dir, + &o2hb_db_quorumregions, + sizeof(*o2hb_db_quorumregions), + O2HB_DB_TYPE_QUORUMREGIONS, + sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS, + o2hb_quorum_region_bitmap); - ret = 0; -bail: - if (ret) - o2hb_exit(); - - return ret; + o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, o2hb_debug_dir, + &o2hb_db_failedregions, + sizeof(*o2hb_db_failedregions), + O2HB_DB_TYPE_FAILEDREGIONS, + sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS, + o2hb_failed_region_bitmap); } -int o2hb_init(void) +void o2hb_init(void) { int i; @@ -1511,7 +1461,7 @@ int o2hb_init(void) o2hb_dependent_users = 0; - return o2hb_debug_init(); + o2hb_debug_init(); } /* if we're already in a callback then we're already serialized by the sem */ diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 7f37540ac4ab..beed31ea86cf 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -63,7 +63,7 @@ void o2hb_unregister_callback(const char *region_uuid, void o2hb_fill_node_map(unsigned long *map, unsigned bytes); void o2hb_exit(void); -int o2hb_init(void); +void o2hb_init(void); int o2hb_check_node_heartbeating_no_sem(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num); void o2hb_stop_all_regions(void); diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 0784575f4c2a..02bf4a1774cc 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -38,10 +38,6 @@ #define SHOW_SOCK_STATS 1 static struct dentry *o2net_dentry; -static struct dentry *sc_dentry; -static struct dentry *nst_dentry; -static struct dentry *stats_dentry; -static struct dentry *nodes_dentry; static DEFINE_SPINLOCK(o2net_debug_lock); @@ -490,36 +486,23 @@ static const struct file_operations nodes_fops = { void o2net_debugfs_exit(void) { - debugfs_remove(nodes_dentry); - debugfs_remove(stats_dentry); - debugfs_remove(sc_dentry); - debugfs_remove(nst_dentry); - debugfs_remove(o2net_dentry); + debugfs_remove_recursive(o2net_dentry); } -int o2net_debugfs_init(void) +void o2net_debugfs_init(void) { umode_t mode = S_IFREG|S_IRUSR; o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); - if (o2net_dentry) - nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode, - o2net_dentry, NULL, &nst_seq_fops); - if (nst_dentry) - sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode, - o2net_dentry, NULL, &sc_seq_fops); - if (sc_dentry) - stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode, - o2net_dentry, NULL, &stats_seq_fops); - if (stats_dentry) - nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode, - o2net_dentry, NULL, &nodes_fops); - if (nodes_dentry) - return 0; - - o2net_debugfs_exit(); - mlog_errno(-ENOMEM); - return -ENOMEM; + + debugfs_create_file(NST_DEBUG_NAME, mode, o2net_dentry, NULL, + &nst_seq_fops); + debugfs_create_file(SC_DEBUG_NAME, mode, o2net_dentry, NULL, + &sc_seq_fops); + debugfs_create_file(STATS_DEBUG_NAME, mode, o2net_dentry, NULL, + &stats_seq_fops); + debugfs_create_file(NODES_DEBUG_NAME, mode, o2net_dentry, NULL, + &nodes_fops); } #endif /* CONFIG_DEBUG_FS */ diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 2234f7fd1f7c..7a7640c59f3c 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -828,9 +828,7 @@ static int __init init_o2nm(void) { int ret = -1; - ret = o2hb_init(); - if (ret) - goto out; + o2hb_init(); ret = o2net_init(); if (ret) diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 3d5d4b2b1356..5c424a099280 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -76,7 +76,7 @@ static void o2quo_fence_self(void) }; } -/* Indicate that a timeout occurred on a hearbeat region write. The +/* Indicate that a timeout occurred on a heartbeat region write. The * other nodes in the cluster may consider us dead at that time so we * want to "fence" ourselves so that we don't scribble on the disk * after they think they've recovered us. This can't solve all diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index c599463d0694..48a3398f0bf5 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1762,7 +1762,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, (msecs_to_jiffies(o2net_reconnect_delay()) + 1); if (node_num != o2nm_this_node()) { - /* believe it or not, accept and node hearbeating testing + /* believe it or not, accept and node heartbeating testing * can succeed for this node before we got here.. so * only use set_nn_state to clear the persistent error * if that hasn't already happened */ @@ -2129,8 +2129,7 @@ int o2net_init(void) o2quo_init(); - if (o2net_debugfs_init()) - goto out; + o2net_debugfs_init(); o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index dd4242be3f1f..de87cbffd175 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -109,16 +109,15 @@ struct o2net_send_tracking; struct o2net_sock_container; #ifdef CONFIG_DEBUG_FS -int o2net_debugfs_init(void); +void o2net_debugfs_init(void); void o2net_debugfs_exit(void); void o2net_debug_add_nst(struct o2net_send_tracking *nst); void o2net_debug_del_nst(struct o2net_send_tracking *nst); void o2net_debug_add_sc(struct o2net_sock_container *sc); void o2net_debug_del_sc(struct o2net_sock_container *sc); #else -static inline int o2net_debugfs_init(void) +static inline void o2net_debugfs_init(void) { - return 0; } static inline void o2net_debugfs_exit(void) { diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 2d016937fdda..42a61eecdacd 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -296,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, out_attach: spin_lock(&dentry_attach_lock); + if (unlikely(dentry->d_fsdata && !alias)) { + /* d_fsdata is set by a racing thread which is doing + * the same thing as this thread is doing. Leave the racing + * thread going ahead and we return here. + */ + spin_unlock(&dentry_attach_lock); + iput(dl->dl_inode); + ocfs2_lock_res_free(&dl->dl_lockres); + kfree(dl); + return 0; + } + dentry->d_fsdata = dl; dl->dl_count++; spin_unlock(&dentry_attach_lock); diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index c8af5bc9e980..a4b58ba99927 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -851,7 +851,7 @@ static const struct file_operations debug_state_fops = { /* end - debug state funcs */ /* files in subroot */ -int dlm_debug_init(struct dlm_ctxt *dlm) +void dlm_debug_init(struct dlm_ctxt *dlm) { struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; @@ -860,10 +860,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm) S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_state_fops); - if (!dc->debug_state_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } /* for dumping lockres */ dc->debug_lockres_dentry = @@ -871,20 +867,12 @@ int dlm_debug_init(struct dlm_ctxt *dlm) S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops); - if (!dc->debug_lockres_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } /* for dumping mles */ dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops); - if (!dc->debug_mle_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } /* for dumping lockres on the purge list */ dc->debug_purgelist_dentry = @@ -892,15 +880,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm) S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_purgelist_fops); - if (!dc->debug_purgelist_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - - return 0; - -bail: - return -ENOMEM; } void dlm_debug_shutdown(struct dlm_ctxt *dlm) @@ -920,24 +899,16 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm) /* subroot - domain dir */ int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { - dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, - dlm_debugfs_root); - if (!dlm->dlm_debugfs_subroot) { - mlog_errno(-ENOMEM); - goto bail; - } - dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), GFP_KERNEL); if (!dlm->dlm_debug_ctxt) { mlog_errno(-ENOMEM); - goto bail; + return -ENOMEM; } + dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, + dlm_debugfs_root); return 0; -bail: - dlm_destroy_debugfs_subroot(dlm); - return -ENOMEM; } void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) @@ -946,14 +917,9 @@ void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) } /* debugfs root */ -int dlm_create_debugfs_root(void) +void dlm_create_debugfs_root(void) { dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); - if (!dlm_debugfs_root) { - mlog_errno(-ENOMEM); - return -ENOMEM; - } - return 0; } void dlm_destroy_debugfs_root(void) diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h index 74d019694c7e..7d0c7c9013ce 100644 --- a/fs/ocfs2/dlm/dlmdebug.h +++ b/fs/ocfs2/dlm/dlmdebug.h @@ -28,20 +28,19 @@ struct debug_lockres { struct dlm_lock_resource *dl_res; }; -int dlm_debug_init(struct dlm_ctxt *dlm); +void dlm_debug_init(struct dlm_ctxt *dlm); void dlm_debug_shutdown(struct dlm_ctxt *dlm); int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); -int dlm_create_debugfs_root(void); +void dlm_create_debugfs_root(void); void dlm_destroy_debugfs_root(void); #else -static inline int dlm_debug_init(struct dlm_ctxt *dlm) +static inline void dlm_debug_init(struct dlm_ctxt *dlm) { - return 0; } static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) { @@ -53,9 +52,8 @@ static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { } -static inline int dlm_create_debugfs_root(void) +static inline void dlm_create_debugfs_root(void) { - return 0; } static inline void dlm_destroy_debugfs_root(void) { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 9021e72e1f98..7338b5d4647c 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1881,11 +1881,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) goto bail; } - status = dlm_debug_init(dlm); - if (status < 0) { - mlog_errno(status); - goto bail; - } + dlm_debug_init(dlm); snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name); dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0); @@ -2346,9 +2342,7 @@ static int __init dlm_init(void) goto error; } - status = dlm_create_debugfs_root(); - if (status) - goto error; + dlm_create_debugfs_root(); return 0; error: diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 810f841494ef..74b768ca1cd8 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2161,7 +2161,7 @@ put: * think that $RECOVERY is currently mastered by a dead node. If so, * we wait a short time to allow that node to get notified by its own * heartbeat stack, then check again. All $RECOVERY lock resources - * mastered by dead nodes are purged when the hearbeat callback is + * mastered by dead nodes are purged when the heartbeat callback is * fired, so we can know for sure that it is safe to continue once * the node returns a live node or no node. */ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e22d6a115220..064ce5bbc3f6 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1109,7 +1109,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, { u64 mig_cookie = be64_to_cpu(mres->mig_cookie); int mres_total_locks = be32_to_cpu(mres->total_locks); - int sz, ret = 0, status = 0; + int ret = 0, status = 0; u8 orig_flags = mres->flags, orig_master = mres->master; @@ -1117,9 +1117,6 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, if (!mres->num_locks) return 0; - sz = sizeof(struct dlm_migratable_lockres) + - (mres->num_locks * sizeof(struct dlm_migratable_lock)); - /* add an all-done flag if we reached the last lock */ orig_flags = mres->flags; BUG_ON(total_locks > mres_total_locks); @@ -1133,7 +1130,8 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, /* send it */ ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, - sz, send_to, &status); + struct_size(mres, ml, mres->num_locks), + send_to, &status); if (ret < 0) { /* XXX: negative status is not handled. * this will end up killing this node. */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index b5fc5d3c7525..14207234fa3d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -426,6 +426,7 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) { res->l_lock_refresh = 0; + res->l_lock_wait = 0; memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); } @@ -460,6 +461,8 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, if (ret) stats->ls_fail++; + + stats->ls_last = ktime_to_us(ktime_get_real()); } static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) @@ -467,6 +470,21 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) lockres->l_lock_refresh++; } +static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_mask_waiter *mw; + + if (list_empty(&lockres->l_mask_waiters)) { + lockres->l_lock_wait = 0; + return; + } + + mw = list_first_entry(&lockres->l_mask_waiters, + struct ocfs2_mask_waiter, mw_item); + lockres->l_lock_wait = + ktime_to_us(ktime_mono_to_real(mw->mw_lock_start)); +} + static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) { mw->mw_lock_start = ktime_get(); @@ -482,6 +500,9 @@ static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) { } +static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres) +{ +} static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) { } @@ -875,6 +896,7 @@ static void lockres_set_flags(struct ocfs2_lock_res *lockres, list_del_init(&mw->mw_item); mw->mw_status = 0; complete(&mw->mw_complete); + ocfs2_track_lock_wait(lockres); } } static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) @@ -1386,6 +1408,7 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); mw->mw_mask = mask; mw->mw_goal = goal; + ocfs2_track_lock_wait(lockres); } /* returns 0 if the mw that was removed was already satisfied, -EBUSY @@ -1402,6 +1425,7 @@ static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, list_del_init(&mw->mw_item); init_completion(&mw->mw_complete); + ocfs2_track_lock_wait(lockres); } return ret; @@ -2989,6 +3013,8 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) kref_init(&dlm_debug->d_refcnt); INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); dlm_debug->d_locking_state = NULL; + dlm_debug->d_locking_filter = NULL; + dlm_debug->d_filter_secs = 0; out: return dlm_debug; } @@ -3079,17 +3105,43 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) * - Lock stats printed * New in version 3 * - Max time in lock stats is in usecs (instead of nsecs) + * New in version 4 + * - Add last pr/ex unlock times and first lock wait time in usecs */ -#define OCFS2_DLM_DEBUG_STR_VERSION 3 +#define OCFS2_DLM_DEBUG_STR_VERSION 4 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) { int i; char *lvb; struct ocfs2_lock_res *lockres = v; +#ifdef CONFIG_OCFS2_FS_STATS + u64 now, last; + struct ocfs2_dlm_debug *dlm_debug = + ((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug; +#endif if (!lockres) return -EINVAL; +#ifdef CONFIG_OCFS2_FS_STATS + if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) { + now = ktime_to_us(ktime_get_real()); + if (lockres->l_lock_prmode.ls_last > + lockres->l_lock_exmode.ls_last) + last = lockres->l_lock_prmode.ls_last; + else + last = lockres->l_lock_exmode.ls_last; + /* + * Use d_filter_secs field to filter lock resources dump, + * the default d_filter_secs(0) value filters nothing, + * otherwise, only dump the last N seconds active lock + * resources. + */ + if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs) + return 0; + } +#endif + seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) @@ -3131,6 +3183,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) # define lock_refresh(_l) ((_l)->l_lock_refresh) +# define lock_last_prmode(_l) ((_l)->l_lock_prmode.ls_last) +# define lock_last_exmode(_l) ((_l)->l_lock_exmode.ls_last) +# define lock_wait(_l) ((_l)->l_lock_wait) #else # define lock_num_prmode(_l) (0) # define lock_num_exmode(_l) (0) @@ -3141,6 +3196,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) # define lock_max_prmode(_l) (0) # define lock_max_exmode(_l) (0) # define lock_refresh(_l) (0) +# define lock_last_prmode(_l) (0ULL) +# define lock_last_exmode(_l) (0ULL) +# define lock_wait(_l) (0ULL) #endif /* The following seq_print was added in version 2 of this output */ seq_printf(m, "%u\t" @@ -3151,7 +3209,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) "%llu\t" "%u\t" "%u\t" - "%u\t", + "%u\t" + "%llu\t" + "%llu\t" + "%llu\t", lock_num_prmode(lockres), lock_num_exmode(lockres), lock_num_prmode_failed(lockres), @@ -3160,7 +3221,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) lock_total_exmode(lockres), lock_max_prmode(lockres), lock_max_exmode(lockres), - lock_refresh(lockres)); + lock_refresh(lockres), + lock_last_prmode(lockres), + lock_last_exmode(lockres), + lock_wait(lockres)); /* End the line */ seq_printf(m, "\n"); @@ -3214,9 +3278,8 @@ static const struct file_operations ocfs2_dlm_debug_fops = { .llseek = seq_lseek, }; -static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) +static void ocfs2_dlm_init_debug(struct ocfs2_super *osb) { - int ret = 0; struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; dlm_debug->d_locking_state = debugfs_create_file("locking_state", @@ -3224,16 +3287,11 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops); - if (!dlm_debug->d_locking_state) { - ret = -EINVAL; - mlog(ML_ERROR, - "Unable to create locking state debugfs file.\n"); - goto out; - } - ocfs2_get_dlm_debug(dlm_debug); -out: - return ret; + dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter", + 0600, + osb->osb_debug_root, + &dlm_debug->d_filter_secs); } static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) @@ -3242,6 +3300,7 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) if (dlm_debug) { debugfs_remove(dlm_debug->d_locking_state); + debugfs_remove(dlm_debug->d_locking_filter); ocfs2_put_dlm_debug(dlm_debug); } } @@ -3256,11 +3315,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) goto local; } - status = ocfs2_dlm_init_debug(osb); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_dlm_init_debug(osb); /* launch downconvert thread */ osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s", @@ -4352,7 +4407,6 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) static int ocfs2_downconvert_thread(void *arg) { - int status = 0; struct ocfs2_super *osb = arg; /* only quit once we've been asked to stop and there is no more @@ -4370,7 +4424,7 @@ static int ocfs2_downconvert_thread(void *arg) } osb->dc_task = NULL; - return status; + return 0; } void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index f03674afbd30..158e5af767fd 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -424,12 +424,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) bh = osb->local_alloc_bh; alloc = (struct ocfs2_dinode *) bh->b_data; - alloc_copy = kmalloc(bh->b_size, GFP_NOFS); + alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; goto out_commit; } - memcpy(alloc_copy, alloc, bh->b_size); status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1272,13 +1271,12 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, * local alloc shutdown won't try to double free main bitmap * bits. Make a copy so the sync function knows which bits to * free. */ - alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); + alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; mlog_errno(status); goto bail; } - memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a4647a646f07..fddbbd60f434 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -150,6 +150,7 @@ struct ocfs2_lock_stats { /* Storing max wait in usecs saves 24 bytes per inode */ u32 ls_max; /* Max wait in USEC */ + u64 ls_last; /* Last unlock time in USEC */ }; #endif @@ -191,6 +192,7 @@ struct ocfs2_lock_res { #ifdef CONFIG_OCFS2_FS_STATS struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */ u32 l_lock_refresh; /* Disk refreshes */ + u64 l_lock_wait; /* First lock wait time */ struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -222,6 +224,8 @@ struct ocfs2_orphan_scan { struct ocfs2_dlm_debug { struct kref d_refcnt; struct dentry *d_locking_state; + struct dentry *d_locking_filter; + u32 d_filter_secs; struct list_head d_lockres_tracking; }; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a201f9780b35..8b2f39506648 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1079,33 +1079,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); - if (!osb->osb_debug_root) { - status = -EINVAL; - mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); - goto read_super_error; - } osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (!osb->osb_ctxt) { - status = -EINVAL; - mlog_errno(status); - goto read_super_error; - } - if (ocfs2_meta_ecc(osb)) { - status = ocfs2_blockcheck_stats_debugfs_install( - &osb->osb_ecc_stats, - osb->osb_debug_root); - if (status) { - mlog(ML_ERROR, - "Unable to create blockcheck statistics " - "files\n"); - goto read_super_error; - } - } + if (ocfs2_meta_ecc(osb)) + ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, + osb->osb_debug_root); status = ocfs2_mount_volume(sb); if (status < 0) @@ -1592,11 +1574,6 @@ static int __init ocfs2_init(void) goto out2; ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); - if (!ocfs2_debugfs_root) { - status = -ENOMEM; - mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); - goto out3; - } ocfs2_set_locking_protocol(); diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 87b1a6fce628..25543a966c48 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -64,7 +64,7 @@ struct client_debug_mask { __u64 mask2; }; -static int orangefs_kernel_debug_init(void); +static void orangefs_kernel_debug_init(void); static int orangefs_debug_help_open(struct inode *, struct file *); static void *help_start(struct seq_file *, loff_t *); @@ -99,7 +99,6 @@ static char *debug_help_string; static char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; static char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; -static struct dentry *help_file_dentry; static struct dentry *client_debug_dentry; static struct dentry *debug_dir; @@ -151,10 +150,8 @@ static DEFINE_MUTEX(orangefs_help_file_lock); * initialize kmod debug operations, create orangefs debugfs dir and * ORANGEFS_KMOD_DEBUG_HELP_FILE. */ -int orangefs_debugfs_init(int debug_mask) +void orangefs_debugfs_init(int debug_mask) { - int rc = -ENOMEM; - /* convert input debug mask to a 64-bit unsigned integer */ orangefs_gossip_debug_mask = (unsigned long long)debug_mask; @@ -183,37 +180,21 @@ int orangefs_debugfs_init(int debug_mask) (unsigned long long)orangefs_gossip_debug_mask); debug_dir = debugfs_create_dir("orangefs", NULL); - if (!debug_dir) { - pr_info("%s: debugfs_create_dir failed.\n", __func__); - goto out; - } - help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, - 0444, - debug_dir, - debug_help_string, - &debug_help_fops); - if (!help_file_dentry) { - pr_info("%s: debugfs_create_file failed.\n", __func__); - goto out; - } + debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, 0444, debug_dir, + debug_help_string, &debug_help_fops); orangefs_debug_disabled = 0; - rc = orangefs_kernel_debug_init(); - -out: - - return rc; + orangefs_kernel_debug_init(); } /* * initialize the kernel-debug file. */ -static int orangefs_kernel_debug_init(void) +static void orangefs_kernel_debug_init(void) { int rc = -ENOMEM; - struct dentry *ret; char *k_buffer = NULL; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); @@ -230,24 +211,11 @@ static int orangefs_kernel_debug_init(void) pr_info("%s: overflow 1!\n", __func__); } - ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, - 0444, - debug_dir, - k_buffer, - &kernel_debug_fops); - if (!ret) { - pr_info("%s: failed to create %s.\n", - __func__, - ORANGEFS_KMOD_DEBUG_FILE); - goto out; - } - - rc = 0; + debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer, + &kernel_debug_fops); out: - gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); - return rc; } @@ -353,12 +321,6 @@ static int orangefs_client_debug_init(void) debug_dir, c_buffer, &kernel_debug_fops); - if (!client_debug_dentry) { - pr_info("%s: failed to create updated %s.\n", - __func__, - ORANGEFS_CLIENT_DEBUG_FILE); - goto out; - } rc = 0; diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h index 51147f9ce3d6..502f6dedccde 100644 --- a/fs/orangefs/orangefs-debugfs.h +++ b/fs/orangefs/orangefs-debugfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -int orangefs_debugfs_init(int); +void orangefs_debugfs_init(int); void orangefs_debugfs_cleanup(void); int orangefs_prepare_debugfs_help_string(int); int orangefs_debugfs_new_client_mask(void __user *); diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 4f2d7ee0d2d1..c010c1fddafc 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -129,9 +129,7 @@ static int __init orangefs_init(void) if (ret) goto cleanup_key_table; - ret = orangefs_debugfs_init(module_parm_debug_mask); - if (ret) - goto debugfs_init_failed; + orangefs_debugfs_init(module_parm_debug_mask); ret = orangefs_sysfs_init(); if (ret) @@ -161,8 +159,6 @@ cleanup_device: orangefs_dev_cleanup(); sysfs_init_failed: - -debugfs_init_failed: orangefs_debugfs_cleanup(); cleanup_key_table: diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 56feaa739979..b801c6353100 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/module.h> @@ -37,7 +34,7 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param) } module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); -MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing"); +MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing"); int ovl_copy_xattr(struct dentry *old, struct dentry *new) { diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93872bb50230..702aa63f6774 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -21,7 +18,7 @@ static unsigned short ovl_redirect_max = 256; module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); -MODULE_PARM_DESC(ovl_redirect_max, +MODULE_PARM_DESC(redirect_max, "Maximum length of absolute redirect xattr value"); static int ovl_set_redirect(struct dentry *dentry, bool samedir); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index cc1c9e5606ba..cb8ec1f65c03 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Overlayfs NFS export support. * * Amir Goldstein <amir73il@gmail.com> * * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 340a6ad45914..e235a635d9ec 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/cred.h> @@ -409,37 +406,16 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd, return ret; } -static unsigned int ovl_get_inode_flags(struct inode *inode) -{ - unsigned int flags = READ_ONCE(inode->i_flags); - unsigned int ovl_iflags = 0; - - if (flags & S_SYNC) - ovl_iflags |= FS_SYNC_FL; - if (flags & S_APPEND) - ovl_iflags |= FS_APPEND_FL; - if (flags & S_IMMUTABLE) - ovl_iflags |= FS_IMMUTABLE_FL; - if (flags & S_NOATIME) - ovl_iflags |= FS_NOATIME_FL; - - return ovl_iflags; -} - static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, - unsigned long arg) + unsigned long arg, unsigned int iflags) { long ret; struct inode *inode = file_inode(file); - unsigned int flags; - unsigned int old_flags; + unsigned int old_iflags; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - ret = mnt_want_write_file(file); if (ret) return ret; @@ -448,8 +424,8 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, /* Check the capability before cred override */ ret = -EPERM; - old_flags = ovl_get_inode_flags(inode); - if (((flags ^ old_flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && + old_iflags = READ_ONCE(inode->i_flags); + if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) && !capable(CAP_LINUX_IMMUTABLE)) goto unlock; @@ -469,6 +445,63 @@ unlock: } +static unsigned int ovl_fsflags_to_iflags(unsigned int flags) +{ + unsigned int iflags = 0; + + if (flags & FS_SYNC_FL) + iflags |= S_SYNC; + if (flags & FS_APPEND_FL) + iflags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + iflags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + iflags |= S_NOATIME; + + return iflags; +} + +static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned int flags; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + return ovl_ioctl_set_flags(file, cmd, arg, + ovl_fsflags_to_iflags(flags)); +} + +static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags) +{ + unsigned int iflags = 0; + + if (xflags & FS_XFLAG_SYNC) + iflags |= S_SYNC; + if (xflags & FS_XFLAG_APPEND) + iflags |= S_APPEND; + if (xflags & FS_XFLAG_IMMUTABLE) + iflags |= S_IMMUTABLE; + if (xflags & FS_XFLAG_NOATIME) + iflags |= S_NOATIME; + + return iflags; +} + +static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fsxattr fa; + + memset(&fa, 0, sizeof(fa)); + if (copy_from_user(&fa, (void __user *) arg, sizeof(fa))) + return -EFAULT; + + return ovl_ioctl_set_flags(file, cmd, arg, + ovl_fsxflags_to_iflags(fa.fsx_xflags)); +} + static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret; @@ -480,8 +513,11 @@ static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case FS_IOC_SETFLAGS: + ret = ovl_ioctl_set_fsflags(file, cmd, arg); + break; + case FS_IOC_FSSETXATTR: - ret = ovl_ioctl_set_flags(file, cmd, arg); + ret = ovl_ioctl_set_fsxflags(file, cmd, arg); break; default: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f7eba21effa5..7663aeb85fa3 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -553,15 +550,15 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, int xinobits = ovl_xino_bits(inode->i_sb); /* - * When NFS export is enabled and d_ino is consistent with st_ino - * (samefs or i_ino has enough bits to encode layer), set the same - * value used for d_ino to i_ino, because nfsd readdirplus compares - * d_ino values to i_ino values of child entries. When called from + * When d_ino is consistent with st_ino (samefs or i_ino has enough + * bits to encode layer), set the same value used for st_ino to i_ino, + * so inode number exposed via /proc/locks and a like will be + * consistent with d_ino and st_ino values. An i_ino value inconsistent + * with d_ino also causes nfsd readdirplus to fail. When called from * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). */ - if (inode->i_sb->s_export_op && - (ovl_same_sb(inode->i_sb) || xinobits)) { + if (ovl_same_sb(inode->i_sb) || xinobits) { inode->i_ino = ino; if (xinobits && fsid && !(ino >> (64 - xinobits))) inode->i_ino |= (unsigned long)fsid << (64 - xinobits); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index badf039267a2..e9717c2f7d45 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cec40077b522..6934bcf030f0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 6ed1ace8f8b3..28a2d12a1029 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ struct ovl_config { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index cc8303a806b4..47a91c9733a5 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 746ea36f3171..b368e2e102fa 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <uapi/linux/magic.h> @@ -31,29 +28,29 @@ struct ovl_dir_cache; static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); -MODULE_PARM_DESC(ovl_redirect_dir_def, +MODULE_PARM_DESC(redirect_dir, "Default to on or off for the redirect_dir feature"); static bool ovl_redirect_always_follow = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); module_param_named(redirect_always_follow, ovl_redirect_always_follow, bool, 0644); -MODULE_PARM_DESC(ovl_redirect_always_follow, +MODULE_PARM_DESC(redirect_always_follow, "Follow redirects even if redirect_dir feature is turned off"); static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); module_param_named(index, ovl_index_def, bool, 0644); -MODULE_PARM_DESC(ovl_index_def, +MODULE_PARM_DESC(index, "Default to on or off for the inodes index feature"); static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); -MODULE_PARM_DESC(ovl_nfs_export_def, +MODULE_PARM_DESC(nfs_export, "Default to on or off for the NFS export feature"); static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); -MODULE_PARM_DESC(ovl_xino_auto_def, +MODULE_PARM_DESC(xino_auto, "Auto enable xino feature"); static void ovl_entry_stack_free(struct ovl_entry *oe) @@ -66,7 +63,7 @@ static void ovl_entry_stack_free(struct ovl_entry *oe) static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); module_param_named(metacopy, ovl_metacopy_def, bool, 0644); -MODULE_PARM_DESC(ovl_metacopy_def, +MODULE_PARM_DESC(metacopy, "Default to on or off for the metadata only copy up feature"); static void ovl_dentry_release(struct dentry *dentry) @@ -995,8 +992,8 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, int err; trap = ovl_get_trap_inode(sb, dir); - err = PTR_ERR(trap); - if (IS_ERR(trap)) { + err = PTR_ERR_OR_ZERO(trap); + if (err) { if (err == -ELOOP) pr_err("overlayfs: conflicting %s path\n", name); return err; @@ -1471,23 +1468,20 @@ out_err: * Check if this layer root is a descendant of: * - another layer of this overlayfs instance * - upper/work dir of any overlayfs instance - * - a disconnected dentry (detached root) */ static int ovl_check_layer(struct super_block *sb, struct dentry *dentry, const char *name) { - struct dentry *next, *parent; - bool is_root = false; + struct dentry *next = dentry, *parent; int err = 0; - if (!dentry || dentry == dentry->d_sb->s_root) + if (!dentry) return 0; - next = dget(dentry); - /* Walk back ancestors to fs root (inclusive) looking for traps */ - do { - parent = dget_parent(next); - is_root = (parent == next); + parent = dget_parent(next); + + /* Walk back ancestors to root (inclusive) looking for traps */ + while (!err && parent != next) { if (ovl_is_inuse(parent)) { err = -EBUSY; pr_err("overlayfs: %s path overlapping in-use upperdir/workdir\n", @@ -1496,17 +1490,12 @@ static int ovl_check_layer(struct super_block *sb, struct dentry *dentry, err = -ELOOP; pr_err("overlayfs: overlapping %s path\n", name); } - dput(next); next = parent; - } while (!err && !is_root); - - /* Did we really walk to fs root or found a detached root? */ - if (!err && next != dentry->d_sb->s_root) { - err = -ESTALE; - pr_err("overlayfs: disconnected %s path\n", name); + parent = dget_parent(next); + dput(next); } - dput(next); + dput(parent); return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index e135064e87ad..f5678a3f8350 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/pnode.c b/fs/pnode.c index 595857a1883e..49f6d7ff2139 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -261,7 +261,6 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); - child->mnt.mnt_flags &= ~MNT_LOCKED; mnt_set_mountpoint(m, mp, child); last_dest = m; last_source = child; diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 62ee41b4bbd0..4c3dcb718961 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -98,3 +98,7 @@ config PROC_CHILDREN Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_PID_ARCH_STATUS + def_bool n + depends on PROC_FS diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..46dcb6f0eccf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); } static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..77eb628ecc7f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -532,8 +532,7 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; - points = oom_badness(task, NULL, NULL, totalpages) * - 1000 / totalpages; + points = oom_badness(task, totalpages) * 1000 / totalpages; seq_printf(m, "%lu\n", points); return 0; @@ -1962,9 +1961,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { - down_read(&mm->mmap_sem); - exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); - up_read(&mm->mmap_sem); + status = down_read_killable(&mm->mmap_sem); + if (!status) { + exact_vma_exists = !!find_exact_vma(mm, vm_start, + vm_end); + up_read(&mm->mmap_sem); + } } mmput(mm); @@ -2010,8 +2012,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = down_read_killable(&mm->mmap_sem); + if (rc) + goto out_mmput; + rc = -ENOENT; - down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { *path = vma->vm_file->f_path; @@ -2107,7 +2112,11 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!mm) goto out_put_task; - down_read(&mm->mmap_sem); + result = ERR_PTR(-EINTR); + if (down_read_killable(&mm->mmap_sem)) + goto out_put_mm; + + result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; @@ -2118,6 +2127,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, out_no_vma: up_read(&mm->mmap_sem); +out_put_mm: mmput(mm); out_put_task: put_task_struct(task); @@ -2160,7 +2170,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) mm = get_task_mm(task); if (!mm) goto out_put_task; - down_read(&mm->mmap_sem); + + ret = down_read_killable(&mm->mmap_sem); + if (ret) { + mmput(mm); + goto out_put_task; + } nr_files = 0; @@ -3061,6 +3076,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3077,8 +3095,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); @@ -3449,6 +3466,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 568d90e17c17..465ea0153b2a 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -120,7 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Committed_AS: ", committed); seq_printf(m, "VmallocTotal: %8lu kB\n", (unsigned long)VMALLOC_TOTAL >> 10); - show_val_kb(m, "VmallocUsed: ", 0ul); + show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages()); show_val_kb(m, "VmallocChunk: ", 0ul); show_val_kb(m, "Percpu: ", pcpu_nr_pages()); diff --git a/fs/proc/root.c b/fs/proc/root.c index 8b145e7b9661..522199e9525e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -211,7 +211,7 @@ static struct file_system_type proc_fs_type = { .init_fs_context = proc_init_fs_context, .parameters = &proc_fs_parameters, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; void __init proc_root_init(void) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 01d4eb0e6bd1..dedca3da428a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -166,7 +166,11 @@ static void *m_start(struct seq_file *m, loff_t *ppos) if (!mm || !mmget_not_zero(mm)) return NULL; - down_read(&mm->mmap_sem); + if (down_read_killable(&mm->mmap_sem)) { + mmput(mm); + return ERR_PTR(-EINTR); + } + hold_task_mempolicy(priv); priv->tail_vma = get_gate_vma(mm); @@ -417,17 +421,53 @@ struct mem_size_stats { unsigned long shared_hugetlb; unsigned long private_hugetlb; u64 pss; + u64 pss_anon; + u64 pss_file; + u64 pss_shmem; u64 pss_locked; u64 swap_pss; bool check_shmem_swap; }; +static void smaps_page_accumulate(struct mem_size_stats *mss, + struct page *page, unsigned long size, unsigned long pss, + bool dirty, bool locked, bool private) +{ + mss->pss += pss; + + if (PageAnon(page)) + mss->pss_anon += pss; + else if (PageSwapBacked(page)) + mss->pss_shmem += pss; + else + mss->pss_file += pss; + + if (locked) + mss->pss_locked += pss; + + if (dirty || PageDirty(page)) { + if (private) + mss->private_dirty += size; + else + mss->shared_dirty += size; + } else { + if (private) + mss->private_clean += size; + else + mss->shared_clean += size; + } +} + static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked) { int i, nr = compound ? 1 << compound_order(page) : 1; unsigned long size = nr * PAGE_SIZE; + /* + * First accumulate quantities that depend only on |size| and the type + * of the compound page. + */ if (PageAnon(page)) { mss->anonymous += size; if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) @@ -440,42 +480,25 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, mss->referenced += size; /* + * Then accumulate quantities that may depend on sharing, or that may + * differ page-by-page. + * * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). */ if (page_count(page) == 1) { - if (dirty || PageDirty(page)) - mss->private_dirty += size; - else - mss->private_clean += size; - mss->pss += (u64)size << PSS_SHIFT; - if (locked) - mss->pss_locked += (u64)size << PSS_SHIFT; + smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, + locked, true); return; } - for (i = 0; i < nr; i++, page++) { int mapcount = page_mapcount(page); - unsigned long pss = (PAGE_SIZE << PSS_SHIFT); - - if (mapcount >= 2) { - if (dirty || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; - else - mss->shared_clean += PAGE_SIZE; - mss->pss += pss / mapcount; - if (locked) - mss->pss_locked += pss / mapcount; - } else { - if (dirty || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; - else - mss->private_clean += PAGE_SIZE; - mss->pss += pss; - if (locked) - mss->pss_locked += pss; - } + unsigned long pss = PAGE_SIZE << PSS_SHIFT; + if (mapcount >= 2) + pss /= mapcount; + smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked, + mapcount < 2); } } @@ -754,10 +777,23 @@ static void smap_gather_stats(struct vm_area_struct *vma, seq_put_decimal_ull_width(m, str, (val) >> 10, 8) /* Show the contents common for smaps and smaps_rollup */ -static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss) +static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, + bool rollup_mode) { SEQ_PUT_DEC("Rss: ", mss->resident); SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); + if (rollup_mode) { + /* + * These are meaningful only for smaps_rollup, otherwise two of + * them are zero, and the other one is the same as Pss. + */ + SEQ_PUT_DEC(" kB\nPss_Anon: ", + mss->pss_anon >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nPss_File: ", + mss->pss_file >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nPss_Shmem: ", + mss->pss_shmem >> PSS_SHIFT); + } SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); @@ -794,7 +830,7 @@ static int show_smap(struct seq_file *m, void *v) SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); seq_puts(m, " kB\n"); - __show_smap(m, &mss); + __show_smap(m, &mss, false); seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); @@ -828,7 +864,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v) memset(&mss, 0, sizeof(mss)); - down_read(&mm->mmap_sem); + ret = down_read_killable(&mm->mmap_sem); + if (ret) + goto out_put_mm; + hold_task_mempolicy(priv); for (vma = priv->mm->mmap; vma; vma = vma->vm_next) { @@ -841,12 +880,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v) seq_pad(m, ' '); seq_puts(m, "[rollup]\n"); - __show_smap(m, &mss); + __show_smap(m, &mss, true); release_task_mempolicy(priv); up_read(&mm->mmap_sem); - mmput(mm); +out_put_mm: + mmput(mm); out_put_task: put_task_struct(priv->task); priv->task = NULL; @@ -1132,7 +1172,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, goto out_mm; } - down_read(&mm->mmap_sem); + if (down_read_killable(&mm->mmap_sem)) { + count = -EINTR; + goto out_mm; + } tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { @@ -1539,7 +1582,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; - down_read(&mm->mmap_sem); + ret = down_read_killable(&mm->mmap_sem); + if (ret) + goto out_free; ret = walk_page_range(start_vaddr, end, &pagemap_walk); up_read(&mm->mmap_sem); start_vaddr = end; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 36bf0f2e102e..7907e6419e57 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -211,7 +211,11 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!mm || !mmget_not_zero(mm)) return NULL; - down_read(&mm->mmap_sem); + if (down_read_killable(&mm->mmap_sem)) { + mmput(mm); + return ERR_PTR(-EINTR); + } + /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) if (n-- == 0) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7bb96fdd38ad..57957c91c6df 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -166,7 +166,7 @@ void __weak elfcorehdr_free(unsigned long long addr) */ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, false); + return read_from_oldmem(buf, count, ppos, 0, sev_active()); } /* @@ -174,7 +174,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) */ ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, sme_active()); + return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active()); } /* @@ -374,7 +374,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, buflen); start = m->paddr + *fpos - m->offset; tmp = read_from_oldmem(buffer, tsz, &start, - userbuf, sme_active()); + userbuf, mem_encrypt_active()); if (tmp < 0) return tmp; buflen -= tsz; diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 8e0a17ce3180..bfbfc2698070 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -112,27 +112,13 @@ static struct dentry *pstore_ftrace_dir; void pstore_register_ftrace(void) { - struct dentry *file; - if (!psinfo->write) return; pstore_ftrace_dir = debugfs_create_dir("pstore", NULL); - if (!pstore_ftrace_dir) { - pr_err("%s: unable to create pstore directory\n", __func__); - return; - } - - file = debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, - NULL, &pstore_knob_fops); - if (!file) { - pr_err("%s: unable to create record_ftrace file\n", __func__); - goto err_file; - } - return; -err_file: - debugfs_remove(pstore_ftrace_dir); + debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, NULL, + &pstore_knob_fops); } void pstore_unregister_ftrace(void) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 89a80b568a17..7fbe8f058220 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -318,22 +318,21 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) goto fail; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; - private = kzalloc(sizeof(*private), GFP_KERNEL); - if (!private) - goto fail_alloc; - private->record = record; - scnprintf(name, sizeof(name), "%s-%s-%llu%s", pstore_type_to_name(record->type), record->psi->name, record->id, record->compressed ? ".enc.z" : ""); + private = kzalloc(sizeof(*private), GFP_KERNEL); + if (!private) + goto fail_inode; + dentry = d_alloc_name(root, name); if (!dentry) goto fail_private; + private->record = record; inode->i_size = private->total_size = size; - inode->i_private = private; if (record->time.tv_sec) @@ -349,7 +348,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) fail_private: free_pstore_private(private); -fail_alloc: +fail_inode: iput(inode); fail: diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 5b7709894415..2bb3468fc93a 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -655,6 +655,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, struct ramoops_platform_data *pdata) { struct device_node *of_node = pdev->dev.of_node; + struct device_node *parent_node; struct resource *res; u32 value; int ret; @@ -689,6 +690,26 @@ static int ramoops_parse_dt(struct platform_device *pdev, #undef parse_size + /* + * Some old Chromebooks relied on the kernel setting the + * console_size and pmsg_size to the record size since that's + * what the downstream kernel did. These same Chromebooks had + * "ramoops" straight under the root node which isn't + * according to the current upstream bindings (though it was + * arguably acceptable under a prior version of the bindings). + * Let's make those old Chromebooks work by detecting that + * we're not a child of "reserved-memory" and mimicking the + * expected behavior. + */ + parent_node = of_get_parent(of_node); + if (!of_node_name_eq(parent_node, "reserved-memory") && + !pdata->console_size && !pdata->ftrace_size && + !pdata->pmsg_size && !pdata->ecc_info.ecc_size) { + pdata->console_size = pdata->record_size; + pdata->pmsg_size = pdata->record_size; + } + of_node_put(parent_node); + return 0; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9ad72ea7f71f..be9c471cdbc8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -223,9 +223,9 @@ static void put_quota_format(struct quota_format_type *fmt) /* * Dquot List Management: - * The quota code uses three lists for dquot management: the inuse_list, - * free_dquots, and dquot_hash[] array. A single dquot structure may be - * on all three lists, depending on its current state. + * The quota code uses four lists for dquot management: the inuse_list, + * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot + * structure may be on some of those lists, depending on its current state. * * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. @@ -236,6 +236,11 @@ static void put_quota_format(struct quota_format_type *fmt) * dqstats.free_dquots gives the number of dquots on the list. When * dquot is invalidated it's completely released from memory. * + * Dirty dquots are added to the dqi_dirty_list of quota_info when mark + * dirtied, and this list is searched when writing dirty dquots back to + * quota file. Note that some filesystems do dirty dquot tracking on their + * own (e.g. in a journal) and thus don't use dqi_dirty_list. + * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. @@ -1996,8 +2001,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) &warn_to[cnt]); if (ret) goto over_quota; - ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, - &warn_to[cnt]); + ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, + DQUOT_SPACE_WARN, &warn_to[cnt]); if (ret) { spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index fd5dd806f1b9..cb13fb76dbee 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -331,9 +331,9 @@ static int quota_state_to_flags(struct qc_state *state) return flags; } -static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) +static int quota_getstate(struct super_block *sb, int type, + struct fs_quota_stat *fqs) { - int type; struct qc_state state; int ret; @@ -349,14 +349,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; - /* - * GETXSTATE quotactl has space for just one set of time limits so - * report them for the first enabled quota type - */ - for (type = 0; type < MAXQUOTAS; type++) - if (state.s_state[type].flags & QCI_ACCT_ENABLED) - break; - BUG_ON(type == MAXQUOTAS); + fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -391,22 +384,22 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) return 0; } -static int quota_getxstate(struct super_block *sb, void __user *addr) +static int quota_getxstate(struct super_block *sb, int type, void __user *addr) { struct fs_quota_stat fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; - ret = quota_getstate(sb, &fqs); + ret = quota_getstate(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; } -static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) +static int quota_getstatev(struct super_block *sb, int type, + struct fs_quota_statv *fqs) { - int type; struct qc_state state; int ret; @@ -422,14 +415,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; - /* - * GETXSTATV quotactl has space for just one set of time limits so - * report them for the first enabled quota type - */ - for (type = 0; type < MAXQUOTAS; type++) - if (state.s_state[type].flags & QCI_ACCT_ENABLED) - break; - BUG_ON(type == MAXQUOTAS); + fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -455,7 +441,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) return 0; } -static int quota_getxstatev(struct super_block *sb, void __user *addr) +static int quota_getxstatev(struct super_block *sb, int type, void __user *addr) { struct fs_quota_statv fqs; int ret; @@ -474,7 +460,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr) default: return -EINVAL; } - ret = quota_getstatev(sb, &fqs); + ret = quota_getstatev(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; @@ -744,9 +730,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: - return quota_getxstate(sb, addr); + return quota_getxstate(sb, type, addr); case Q_XGETQSTATV: - return quota_getxstatev(sb, addr); + return quota_getxstatev(sb, type, addr); case Q_XSETQLIM: return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: diff --git a/fs/read_write.c b/fs/read_write.c index c543d965e288..1f5088dec566 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1565,6 +1565,58 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif +/** + * generic_copy_file_range - copy data between two files + * @file_in: file structure to read from + * @pos_in: file offset to read from + * @file_out: file structure to write data to + * @pos_out: file offset to write data to + * @len: amount of data to copy + * @flags: copy flags + * + * This is a generic filesystem helper to copy data from one file to another. + * It has no constraints on the source or destination file owners - the files + * can belong to different superblocks and different filesystem types. Short + * copies are allowed. + * + * This should be called from the @file_out filesystem, as per the + * ->copy_file_range() method. + * + * Returns the number of bytes copied or a negative error indicating the + * failure. + */ + +ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); +} +EXPORT_SYMBOL(generic_copy_file_range); + +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + /* + * Although we now allow filesystems to handle cross sb copy, passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. NFS defines + * several different file_system_type structures, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range && + file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) + return file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); +} + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1574,17 +1626,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); ssize_t ret; if (flags != 0) return -EINVAL; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, + flags); + if (unlikely(ret)) + return ret; ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) @@ -1594,15 +1644,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (unlikely(ret)) return ret; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - if (len == 0) return 0; @@ -1612,7 +1653,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). */ - if (file_in->f_op->remap_file_range) { + if (file_in->f_op->remap_file_range && + file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { loff_t cloned; cloned = file_in->f_op->remap_file_range(file_in, pos_in, @@ -1625,16 +1667,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, } } - if (file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, - pos_out, len, flags); - if (ret != -EOPNOTSUPP) - goto done; - } - - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); - + ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + WARN_ON_ONCE(ret == -EOPNOTSUPP); done: if (ret > 0) { fsnotify_access(file_in); @@ -1951,25 +1986,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; /* If can't alter the file contents, we're done. */ - if (!(remap_flags & REMAP_FILE_DEDUP)) { - /* Update the timestamps, since we can alter file contents. */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - return ret; - } - - /* - * Clear the security bits if the process is not being run by - * root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - return ret; - } + if (!(remap_flags & REMAP_FILE_DEDUP)) + ret = file_modified(file_out); - return 0; + return ret; } EXPORT_SYMBOL(generic_remap_file_range_prep); @@ -1977,29 +1997,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); loff_t ret; WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - /* * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on * the same mount. Practically, they only need to be on the same file * system. */ - if (inode_in->i_sb != inode_out->i_sb) + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; + ret = generic_file_rw_checks(file_in, file_out); + if (ret < 0) + return ret; if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; diff --git a/fs/select.c b/fs/select.c index 6cbc9ff56ba0..a4d8f6e8b63c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return ret; ret = core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); + restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; @@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; diff --git a/fs/seq_file.c b/fs/seq_file.c index abe27ec43176..04f09689cd6d 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc) } EXPORT_SYMBOL(seq_escape); +void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz) +{ + char *buf; + size_t size = seq_get_buf(m, &buf); + int ret; + + ret = string_escape_mem_ascii(src, isz, buf, size); + seq_commit(m, ret < size ? ret : -1); +} +EXPORT_SYMBOL(seq_escape_mem_ascii); + void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index d6008a636479..c181dee235bb 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Minchan Kim <minchan@kernel.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> #include <linux/mutex.h> diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 23a9c28ad8ea..2a2a2d106440 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index a6c75929a00e..550c3e592032 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index a9ba8d96776a..54c17b7c85fd 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/fs.h> diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 80db1b86a27c..a4894cc59447 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/fs.h> diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index 95da65366548..c4e47e0588c7 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013, 2014 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/buffer_head.h> diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 9b7b1b6a7892..520d323a99ce 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/kernel.h> diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 98537eab27e2..2e3073ace009 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef PAGE_ACTOR_H #define PAGE_ACTOR_H /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #ifndef CONFIG_SQUASHFS_FILE_DIRECT diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 57038604d4a8..d41c21fef138 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -175,6 +175,26 @@ int sysfs_create_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_create_group); +static int internal_create_groups(struct kobject *kobj, int update, + const struct attribute_group **groups) +{ + int error = 0; + int i; + + if (!groups) + return 0; + + for (i = 0; groups[i]; i++) { + error = internal_create_group(kobj, update, groups[i]); + if (error) { + while (--i >= 0) + sysfs_remove_group(kobj, groups[i]); + break; + } + } + return error; +} + /** * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups * @kobj: The kobject to create the group on @@ -191,25 +211,29 @@ EXPORT_SYMBOL_GPL(sysfs_create_group); int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) { - int error = 0; - int i; - - if (!groups) - return 0; - - for (i = 0; groups[i]; i++) { - error = sysfs_create_group(kobj, groups[i]); - if (error) { - while (--i >= 0) - sysfs_remove_group(kobj, groups[i]); - break; - } - } - return error; + return internal_create_groups(kobj, 0, groups); } EXPORT_SYMBOL_GPL(sysfs_create_groups); /** + * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups + * @kobj: The kobject to update the group on + * @groups: The attribute groups to update, NULL terminated + * + * This function update a bunch of attribute groups. If an error occurs when + * updating a group, all previously updated groups will be removed together + * with already existing (not updated) attributes. + * + * Returns 0 on success or error code from sysfs_update_group on failure. + */ +int sysfs_update_groups(struct kobject *kobj, + const struct attribute_group **groups) +{ + return internal_create_groups(kobj, 1, groups); +} +EXPORT_SYMBOL_GPL(sysfs_update_groups); + +/** * sysfs_update_group - given a directory kobject, update an attribute group * @kobj: The kobject to update the group on * @grp: The attribute group to update diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 7098c49f3693..eeeae0475da9 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * inode.c - part of tracefs, a pseudo file system for activating tracing * @@ -5,12 +6,7 @@ * * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * * tracefs is the file system that is used by the tracing infrastructure. - * */ #include <linux/module.h> @@ -509,9 +505,12 @@ static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) switch (dentry->d_inode->i_mode & S_IFMT) { case S_IFDIR: ret = simple_rmdir(parent->d_inode, dentry); + if (!ret) + fsnotify_rmdir(parent->d_inode, dentry); break; default: simple_unlink(parent->d_inode, dentry); + fsnotify_unlink(parent->d_inode, dentry); break; } if (!ret) diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 4aaedf2d7f44..22be7aeb96c4 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -29,8 +29,8 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, { struct ubifs_info *c = inode->i_sb->s_fs_info; void *p = &dn->data; - struct page *ret; unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE); + int err; ubifs_assert(c, pad_len <= *out_len); dn->compr_size = cpu_to_le16(in_len); @@ -39,11 +39,11 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, if (pad_len != in_len) memset(p + in_len, 0, pad_len - in_len); - ret = fscrypt_encrypt_page(inode, virt_to_page(&dn->data), pad_len, - offset_in_page(&dn->data), block, GFP_NOFS); - if (IS_ERR(ret)) { - ubifs_err(c, "fscrypt_encrypt_page failed: %ld", PTR_ERR(ret)); - return PTR_ERR(ret); + err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len, + offset_in_page(p), block, GFP_NOFS); + if (err) { + ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err); + return err; } *out_len = pad_len; @@ -64,10 +64,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, } ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE); - err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen, - offset_in_page(&dn->data), block); + err = fscrypt_decrypt_block_inplace(inode, virt_to_page(&dn->data), + dlen, offset_in_page(&dn->data), + block); if (err) { - ubifs_err(c, "fscrypt_decrypt_page failed: %i", err); + ubifs_err(c, "fscrypt_decrypt_block_inplace() failed: %d", err); return err; } *out_len = clen; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 138c5b07d803..a5f10d79e0dd 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2800,115 +2800,69 @@ static const struct file_operations dfs_fops = { * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. * @c: UBIFS file-system description object * - * This function creates all debugfs files for this instance of UBIFS. Returns - * zero in case of success and a negative error code in case of failure. + * This function creates all debugfs files for this instance of UBIFS. * * Note, the only reason we have not merged this function with the * 'ubifs_debugging_init()' function is because it is better to initialize * debugfs interfaces at the very end of the mount process, and remove them at * the very beginning of the mount process. */ -int dbg_debugfs_init_fs(struct ubifs_info *c) +void dbg_debugfs_init_fs(struct ubifs_info *c) { - int err, n; + int n; const char *fname; - struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - if (!IS_ENABLED(CONFIG_DEBUG_FS)) - return 0; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); if (n == UBIFS_DFS_DIR_LEN) { /* The array size is too small */ fname = UBIFS_DFS_DIR_NAME; - dent = ERR_PTR(-EINVAL); - goto out; + return; } fname = d->dfs_dir_name; - dent = debugfs_create_dir(fname, dfs_rootdir); - if (IS_ERR_OR_NULL(dent)) - goto out; - d->dfs_dir = dent; + d->dfs_dir = debugfs_create_dir(fname, dfs_rootdir); fname = "dump_lprops"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_dump_lprops = dent; + d->dfs_dump_lprops = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, + &dfs_fops); fname = "dump_budg"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_dump_budg = dent; + d->dfs_dump_budg = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, + &dfs_fops); fname = "dump_tnc"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_dump_tnc = dent; + d->dfs_dump_tnc = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, + &dfs_fops); fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_gen = dent; + d->dfs_chk_gen = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_index = dent; + d->dfs_chk_index = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_orph = dent; + d->dfs_chk_orph = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_lprops = dent; + d->dfs_chk_lprops = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_fs = dent; + d->dfs_chk_fs = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_tst_rcvry = dent; + d->dfs_tst_rcvry = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "ro_error"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_ro_error = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(d->dfs_dir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); - return err; + d->dfs_ro_error = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); } /** @@ -2917,8 +2871,7 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - if (IS_ENABLED(CONFIG_DEBUG_FS)) - debugfs_remove_recursive(c->dbg->dfs_dir); + debugfs_remove_recursive(c->dbg->dfs_dir); } struct ubifs_global_debug_info ubifs_dbg; @@ -2994,75 +2947,38 @@ static const struct file_operations dfs_global_fops = { * * UBIFS uses debugfs file-system to expose various debugging knobs to * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. + * file-system. */ -int dbg_debugfs_init(void) +void dbg_debugfs_init(void) { - int err; const char *fname; - struct dentry *dent; - - if (!IS_ENABLED(CONFIG_DEBUG_FS)) - return 0; fname = "ubifs"; - dent = debugfs_create_dir(fname, NULL); - if (IS_ERR_OR_NULL(dent)) - goto out; - dfs_rootdir = dent; + dfs_rootdir = debugfs_create_dir(fname, NULL); fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_gen = dent; + dfs_chk_gen = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, + NULL, &dfs_global_fops); fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_index = dent; + dfs_chk_index = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_orph = dent; + dfs_chk_orph = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_lprops = dent; + dfs_chk_lprops = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_fs = dent; + dfs_chk_fs = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, + NULL, &dfs_global_fops); fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_tst_rcvry = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(dfs_rootdir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n", - current->pid, fname, err); - return err; + dfs_tst_rcvry = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); } /** @@ -3070,8 +2986,7 @@ out: */ void dbg_debugfs_exit(void) { - if (IS_ENABLED(CONFIG_DEBUG_FS)) - debugfs_remove_recursive(dfs_rootdir); + debugfs_remove_recursive(dfs_rootdir); } void ubifs_assert_failed(struct ubifs_info *c, const char *expr, diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index eb26097b6f70..7763639a426b 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -297,9 +297,9 @@ int dbg_leb_unmap(struct ubifs_info *c, int lnum); int dbg_leb_map(struct ubifs_info *c, int lnum); /* Debugfs-related stuff */ -int dbg_debugfs_init(void); +void dbg_debugfs_init(void); void dbg_debugfs_exit(void); -int dbg_debugfs_init_fs(struct ubifs_info *c); +void dbg_debugfs_init_fs(struct ubifs_info *c); void dbg_debugfs_exit_fs(struct ubifs_info *c); #endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6cfc494050be..fd1977b568f0 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1465,9 +1465,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_infos; - err = dbg_debugfs_init_fs(c); - if (err) - goto out_infos; + dbg_debugfs_init_fs(c); c->mounting = 0; @@ -2352,9 +2350,7 @@ static int __init ubifs_init(void) if (err) goto out_shrinker; - err = dbg_debugfs_init(); - if (err) - goto out_compr; + dbg_debugfs_init(); err = register_filesystem(&ubifs_fs_type); if (err) { @@ -2366,7 +2362,6 @@ static int __init ubifs_init(void) out_dbg: dbg_debugfs_exit(); -out_compr: ubifs_compressors_exit(); out_shrinker: unregister_shrinker(&ubifs_shrinker_info); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index e7276932e433..9bb18311a22f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block, return NULL; } -/* Extend the file by 'blocks' blocks, return the number of extents added */ +/* Extend the file with new blocks totaling 'new_block_bytes', + * return the number of extents added + */ static int udf_do_extend_file(struct inode *inode, struct extent_position *last_pos, struct kernel_long_ad *last_ext, - sector_t blocks) + loff_t new_block_bytes) { - sector_t add; + uint32_t add; int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); struct super_block *sb = inode->i_sb; struct kernel_lb_addr prealloc_loc = {}; @@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode, /* The previous extent is fake and we should not extend by anything * - there's nothing to do... */ - if (!blocks && fake) + if (!new_block_bytes && fake) return 0; iinfo = UDF_I(inode); @@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode, /* Can we merge with the previous extent? */ if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { - add = ((1 << 30) - sb->s_blocksize - - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >> - sb->s_blocksize_bits; - if (add > blocks) - add = blocks; - blocks -= add; - last_ext->extLength += add << sb->s_blocksize_bits; + add = (1 << 30) - sb->s_blocksize - + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + if (add > new_block_bytes) + add = new_block_bytes; + new_block_bytes -= add; + last_ext->extLength += add; } if (fake) { @@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode, } /* Managed to do everything necessary? */ - if (!blocks) + if (!new_block_bytes) goto out; /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */ last_ext->extLocation.logicalBlockNum = 0; last_ext->extLocation.partitionReferenceNum = 0; - add = (1 << (30-sb->s_blocksize_bits)) - 1; - last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (add << sb->s_blocksize_bits); + add = (1 << 30) - sb->s_blocksize; + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add; /* Create enough extents to cover the whole hole */ - while (blocks > add) { - blocks -= add; + while (new_block_bytes > add) { + new_block_bytes -= add; err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) return err; count++; } - if (blocks) { + if (new_block_bytes) { last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (blocks << sb->s_blocksize_bits); + new_block_bytes; err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) @@ -596,6 +596,24 @@ out: return count; } +/* Extend the final block of the file to final_block_len bytes */ +static void udf_do_extend_final_block(struct inode *inode, + struct extent_position *last_pos, + struct kernel_long_ad *last_ext, + uint32_t final_block_len) +{ + struct super_block *sb = inode->i_sb; + uint32_t added_bytes; + + added_bytes = final_block_len - + (last_ext->extLength & (sb->s_blocksize - 1)); + last_ext->extLength += added_bytes; + UDF_I(inode)->i_lenExtents += added_bytes; + + udf_write_aext(inode, last_pos, &last_ext->extLocation, + last_ext->extLength, 1); +} + static int udf_extend_file(struct inode *inode, loff_t newsize) { @@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) int8_t etype; struct super_block *sb = inode->i_sb; sector_t first_block = newsize >> sb->s_blocksize_bits, offset; + unsigned long partial_final_block; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); struct kernel_long_ad extent; - int err; + int err = 0; + int within_final_block; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) BUG(); etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); + within_final_block = (etype != -1); - /* File has extent covering the new size (could happen when extending - * inside a block)? */ - if (etype != -1) - return 0; - if (newsize & (sb->s_blocksize - 1)) - offset++; - /* Extended file just to the boundary of the last file block? */ - if (offset == 0) - return 0; - - /* Truncate is extending the file by 'offset' blocks */ if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { /* File has no extents at all or has empty last @@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) &extent.extLength, 0); extent.extLength |= etype << 30; } - err = udf_do_extend_file(inode, &epos, &extent, offset); + + partial_final_block = newsize & (sb->s_blocksize - 1); + + /* File has extent covering the new size (could happen when extending + * inside a block)? + */ + if (within_final_block) { + /* Extending file within the last file block */ + udf_do_extend_final_block(inode, &epos, &extent, + partial_final_block); + } else { + loff_t add = ((loff_t)offset << sb->s_blocksize_bits) | + partial_final_block; + err = udf_do_extend_file(inode, &epos, &extent, add); + } + if (err < 0) goto out; err = 0; @@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, /* Are we beyond EOF? */ if (etype == -1) { int ret; + loff_t hole_len; isBeyondEOF = true; if (count) { if (c) @@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, startnum = (offset > 0); } /* Create extents for the hole between EOF and offset */ - ret = udf_do_extend_file(inode, &prev_epos, laarr, offset); + hole_len = (loff_t)offset << inode->i_blkbits; + ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len); if (ret < 0) { *err = ret; newblock = 0; diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 6afab4fdce90..71ca4d047d65 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um, } EXPORT_SYMBOL(utf8_strncasecmp); +/* String cf is expected to be a valid UTF-8 casefolded + * string. + */ +int utf8_strncasecmp_folded(const struct unicode_map *um, + const struct qstr *cf, + const struct qstr *s1) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur1; + int c1, c2; + int i = 0; + + if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) + return -EINVAL; + + do { + c1 = utf8byte(&cur1); + c2 = cf->name[i++]; + if (c1 < 0) + return -EINVAL; + if (c1 != c2) + return 1; + } while (c1); + + return 0; +} +EXPORT_SYMBOL(utf8_strncasecmp_folded); + int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3b30301c90ec..ccbdbd62f0d8 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/userfaultfd.c * @@ -5,9 +6,6 @@ * Copyright (C) 2008-2009 Red Hat, Inc. * Copyright (C) 2015 Red Hat, Inc. * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * * Some part derived from fs/eventfd.c (anon inode setup) and * mm/ksm.c (mm hashing). */ @@ -42,6 +40,16 @@ enum userfaultfd_state { /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ @@ -460,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : TASK_KILLABLE; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -472,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * __add_wait_queue. */ set_current_state(blocking_state); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -554,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* @@ -585,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -615,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { struct vm_area_struct *vma; @@ -920,10 +928,10 @@ wakeup: * the last page faults that may have been already waiting on * the fault_*wqh. */ - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); @@ -1136,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can @@ -1182,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (ret) userfaultfd_ctx_put(fork_nctx); } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); } return ret; @@ -1221,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, @@ -1883,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; @@ -1891,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a6f0f4761a37..11f703d4a605 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -758,6 +758,7 @@ xfs_add_to_ioend( struct block_device *bdev = xfs_find_bdev_for_inode(inode); unsigned len = i_blocksize(inode); unsigned poff = offset & (PAGE_SIZE - 1); + bool merged, same_page = false; sector_t sector; sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) + @@ -774,10 +775,14 @@ xfs_add_to_ioend( wpc->imap.br_state, offset, bdev, sector); } - if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) { - if (iop) - atomic_inc(&iop->write_count); - if (bio_full(wpc->ioend->io_bio)) + merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, + &same_page); + + if (iop && !same_page) + atomic_inc(&iop->write_count); + + if (!merged) { + if (bio_full(wpc->ioend->io_bio, len)) xfs_chain_bio(wpc->ioend, wbc, bdev, sector); bio_add_page(wpc->ioend->io_bio, page, len, poff); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 76748255f843..916a35cae5e9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -367,20 +367,7 @@ restart: * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) { - error = file_update_time(file); - if (error) - return error; - } - - /* - * If we're writing the file then make sure to clear the setuid and - * setgid bits if the process is not being run by root. This keeps - * people from modifying setuid and setgid binaries. - */ - if (!IS_NOSEC(inode)) - return file_remove_privs(file); - return 0; + return file_modified(file); } static int |