diff options
Diffstat (limited to 'fs')
36 files changed, 348 insertions, 129 deletions
diff --git a/fs/afs/server.c b/fs/afs/server.c index e23be63998a8..629c74986cff 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -428,8 +428,15 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) } write_sequnlock(&net->fs_lock); - if (deleted) + if (deleted) { + write_seqlock(&net->fs_addr_lock); + if (!hlist_unhashed(&server->addr4_link)) + hlist_del_rcu(&server->addr4_link); + if (!hlist_unhashed(&server->addr6_link)) + hlist_del_rcu(&server->addr6_link); + write_sequnlock(&net->fs_addr_lock); afs_destroy_server(net, server); + } } } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 82e8f6edfb48..b12e37f27530 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -749,7 +749,7 @@ static int autofs4_dir_mkdir(struct inode *dir, autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return -ENOMEM; d_add(dentry, inode); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 41e04183e4ce..4ad6f669fe34 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -377,10 +377,10 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, } else map_addr = vm_mmap(filep, addr, size, prot, type, off); - if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr)) - pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n", - task_pid_nr(current), current->comm, - (void *)addr); + if ((type & MAP_FIXED_NOREPLACE) && + PTR_ERR((void *)map_addr) == -EEXIST) + pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n", + task_pid_nr(current), current->comm, (void *)addr); return(map_addr); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5474ef14d6e6..2771cc56a622 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,25 @@ struct btrfs_block_rsv { unsigned short full; unsigned short type; unsigned short failfast; + + /* + * Qgroup equivalent for @size @reserved + * + * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care + * about things like csum size nor how many tree blocks it will need to + * reserve. + * + * Qgroup cares more about net change of the extent usage. + * + * So for one newly inserted file extent, in worst case it will cause + * leaf split and level increase, nodesize for each file extent is + * already too much. + * + * In short, qgroup_size/reserved is the upper limit of possible needed + * qgroup metadata reservation. + */ + u64 qgroup_rsv_size; + u64 qgroup_rsv_reserved; }; /* @@ -714,6 +733,12 @@ struct btrfs_delayed_root; */ #define BTRFS_FS_EXCL_OP 16 +/* + * To info transaction_kthread we need an immediate commit so it doesn't + * need to wait for commit_interval + */ +#define BTRFS_FS_NEED_ASYNC_COMMIT 17 + struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 06ec8ab6d9ba..a8d492dbd3e7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, dst_rsv = &fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + + /* + * Here we migrate space rsv from transaction rsv, since have already + * reserved space when starting a transaction. So no need to reserve + * qgroup space here. + */ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); if (!ret) { trace_btrfs_space_reservation(fs_info, "delayed_item", @@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, return; rsv = &fs_info->delayed_block_rsv; - btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); + /* + * Check btrfs_delayed_item_reserve_metadata() to see why we don't need + * to release/reserve qgroup space. + */ trace_btrfs_space_reservation(fs_info, "delayed_item", item->key.objectid, item->bytes_reserved, 0); @@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata( num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); - if (ret < 0) - return ret; /* * btrfs_dirty_inode will update the inode under btrfs_join_transaction * which doesn't reserve space for speed. This is a problem since we @@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { + ret = btrfs_qgroup_reserve_meta_prealloc(root, + fs_info->nodesize, true); + if (ret < 0) + return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, BTRFS_RESERVE_NO_FLUSH); /* @@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata( "delayed_inode", btrfs_ino(inode), num_bytes, 1); + } else { + btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); } return ret; } diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9e98295de7ce..e1b0651686f7 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, - int action, int is_data, int *qrecord_inserted_ret, + int action, int is_data, int is_system, + int *qrecord_inserted_ret, int *old_ref_mod, int *new_ref_mod) + { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_root *delayed_refs; @@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref->ref_mod = count_mod; head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; + head_ref->is_system = is_system; head_ref->ref_tree = RB_ROOT; INIT_LIST_HEAD(&head_ref->ref_add_list); RB_CLEAR_NODE(&head_ref->href_node); @@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; int qrecord_inserted; + int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, 0, 0, action, 0, - &qrecord_inserted, old_ref_mod, - new_ref_mod); + is_system, &qrecord_inserted, + old_ref_mod, new_ref_mod); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action); @@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, ref_root, reserved, - action, 1, &qrecord_inserted, + action, 1, 0, &qrecord_inserted, old_ref_mod, new_ref_mod); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, @@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); + /* + * extent_ops just modify the flags of an extent and they don't result + * in ref count changes, hence it's safe to pass false/0 for is_system + * argument + */ add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data, NULL, NULL, NULL); + extent_op->is_data, 0, NULL, NULL, NULL); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 741869dbc316..7f00db50bd24 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head { */ unsigned int must_insert_reserved:1; unsigned int is_data:1; + unsigned int is_system:1; unsigned int processing:1; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ac8b1d21baf..60caa68c3618 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && + !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && (now < cur->start_time || now - cur->start_time < fs_info->commit_interval)) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 75cfb80d2551..e2f16b68fcbf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, trace_run_delayed_ref_head(fs_info, head, 0); if (head->total_ref_mod < 0) { - struct btrfs_block_group_cache *cache; + struct btrfs_space_info *space_info; + u64 flags; - cache = btrfs_lookup_block_group(fs_info, head->bytenr); - ASSERT(cache); - percpu_counter_add(&cache->space_info->total_bytes_pinned, + if (head->is_data) + flags = BTRFS_BLOCK_GROUP_DATA; + else if (head->is_system) + flags = BTRFS_BLOCK_GROUP_SYSTEM; + else + flags = BTRFS_BLOCK_GROUP_METADATA; + space_info = __find_space_info(fs_info, flags); + ASSERT(space_info); + percpu_counter_add(&space_info->total_bytes_pinned, -head->num_bytes); - btrfs_put_block_group(cache); if (head->is_data) { spin_lock(&delayed_refs->lock); @@ -5559,14 +5565,18 @@ again: static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, - struct btrfs_block_rsv *dest, u64 num_bytes) + struct btrfs_block_rsv *dest, u64 num_bytes, + u64 *qgroup_to_release_ret) { struct btrfs_space_info *space_info = block_rsv->space_info; + u64 qgroup_to_release = 0; u64 ret; spin_lock(&block_rsv->lock); - if (num_bytes == (u64)-1) + if (num_bytes == (u64)-1) { num_bytes = block_rsv->size; + qgroup_to_release = block_rsv->qgroup_rsv_size; + } block_rsv->size -= num_bytes; if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; @@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } + if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + qgroup_to_release = block_rsv->qgroup_rsv_reserved - + block_rsv->qgroup_rsv_size; + block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; + } else { + qgroup_to_release = 0; + } spin_unlock(&block_rsv->lock); ret = num_bytes; @@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info_add_old_bytes(fs_info, space_info, num_bytes); } + if (qgroup_to_release_ret) + *qgroup_to_release_ret = qgroup_to_release; return ret; } @@ -5738,17 +5757,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, struct btrfs_root *root = inode->root; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 num_bytes = 0; + u64 qgroup_num_bytes = 0; int ret = -ENOSPC; spin_lock(&block_rsv->lock); if (block_rsv->reserved < block_rsv->size) num_bytes = block_rsv->size - block_rsv->reserved; + if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) + qgroup_num_bytes = block_rsv->qgroup_rsv_size - + block_rsv->qgroup_rsv_reserved; spin_unlock(&block_rsv->lock); if (num_bytes == 0) return 0; - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); if (ret) return ret; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); @@ -5756,7 +5779,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, block_rsv_add_bytes(block_rsv, num_bytes, 0); trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), num_bytes, 1); - } + + /* Don't forget to increase qgroup_rsv_reserved */ + spin_lock(&block_rsv->lock); + block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; + spin_unlock(&block_rsv->lock); + } else + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); return ret; } @@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 released = 0; + u64 qgroup_to_release = 0; /* * Since we statically set the block_rsv->size we just want to say we * are releasing 0 bytes, and then we'll just get the reservation over * the size free'd. */ - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); + released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0, + &qgroup_to_release); if (released > 0) trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), released, 0); if (qgroup_free) - btrfs_qgroup_free_meta_prealloc(inode->root, released); + btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); else - btrfs_qgroup_convert_reserved_meta(inode->root, released); + btrfs_qgroup_convert_reserved_meta(inode->root, + qgroup_to_release); } void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, @@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, if (global_rsv == block_rsv || block_rsv->space_info != global_rsv->space_info) global_rsv = NULL; - block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); + block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL); } static void update_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) static void release_global_block_rsv(struct btrfs_fs_info *fs_info) { block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, - (u64)-1); + (u64)-1, NULL); WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); @@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) WARN_ON_ONCE(!list_empty(&trans->new_bgs)); block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, - trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved, NULL); trans->chunk_bytes_reserved = 0; } @@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, { struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; + u64 qgroup_rsv_size = 0; u64 csum_leaves; unsigned outstanding_extents; @@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, inode->csum_bytes); reserve_size += btrfs_calc_trans_metadata_size(fs_info, csum_leaves); + /* + * For qgroup rsv, the calculation is very simple: + * account one nodesize for each outstanding extent + * + * This is overestimating in most cases. + */ + qgroup_rsv_size = outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; + block_rsv->qgroup_rsv_size = qgroup_rsv_size; spin_unlock(&block_rsv->lock); } @@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u32 blocksize) { block_rsv_add_bytes(block_rsv, blocksize, 0); - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); + block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); } /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0167a9c97c9c..f660ba1e5e58 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ again: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - (ret != 0)); + true); if (ret) { btrfs_drop_pages(pages, num_pages); break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e064c49c9a9a..d241285a0d2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@ #include <linux/uio.h> #include <linux/magic.h> #include <linux/iversion.h> +#include <asm/unaligned.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) struct dir_entry *entry = addr; char *name = (char *)(entry + 1); - ctx->pos = entry->offset; - if (!dir_emit(ctx, name, entry->name_len, entry->ino, - entry->type)) + ctx->pos = get_unaligned(&entry->offset); + if (!dir_emit(ctx, name, get_unaligned(&entry->name_len), + get_unaligned(&entry->ino), + get_unaligned(&entry->type))) return 1; - addr += sizeof(struct dir_entry) + entry->name_len; + addr += sizeof(struct dir_entry) + + get_unaligned(&entry->name_len); ctx->pos++; } return 0; @@ -5999,14 +6002,15 @@ again: } entry = addr; - entry->name_len = name_len; + put_unaligned(name_len, &entry->name_len); name_ptr = (char *)(entry + 1); read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), name_len); - entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], + &entry->type); btrfs_dir_item_key_to_cpu(leaf, di, &location); - entry->ino = location.objectid; - entry->offset = found_key.offset; + put_unaligned(location.objectid, &entry->ino); + put_unaligned(found_key.offset, &entry->offset); entries++; addr += sizeof(struct dir_entry) + name_len; total_len += sizeof(struct dir_entry) + name_len; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 124276bba8cf..21a831d3d087 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l) fs_info = l->fs_info; nr = btrfs_header_nritems(l); - btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", - btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(fs_info, l)); + btrfs_info(fs_info, + "leaf %llu gen %llu total ptrs %d free space %d owner %llu", + btrfs_header_bytenr(l), btrfs_header_generation(l), nr, + btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); @@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l) } } -void btrfs_print_tree(struct extent_buffer *c) +void btrfs_print_tree(struct extent_buffer *c, bool follow) { struct btrfs_fs_info *fs_info; int i; u32 nr; @@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c) return; } btrfs_info(fs_info, - "node %llu level %d total ptrs %d free spc %u", - btrfs_header_bytenr(c), level, nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); + "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu", + btrfs_header_bytenr(c), level, btrfs_header_generation(c), + nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr, + btrfs_header_owner(c)); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - pr_info("\tkey %d (%llu %u %llu) block %llu\n", + pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", i, key.objectid, key.type, key.offset, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_node_ptr_generation(c, i)); } + if (!follow) + return; for (i = 0; i < nr; i++) { struct btrfs_key first_key; struct extent_buffer *next; @@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c) if (btrfs_header_level(next) != level - 1) BUG(); - btrfs_print_tree(next); + btrfs_print_tree(next, follow); free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 4a98481688f4..e6bb38fd75ad 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -7,6 +7,6 @@ #define BTRFS_PRINT_TREE_H void btrfs_print_leaf(struct extent_buffer *l); -void btrfs_print_tree(struct extent_buffer *c); +void btrfs_print_tree(struct extent_buffer *c, bool follow); #endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 09c7e4fd550f..9fb758d5077a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/btrfs.h> +#include <linux/sizes.h> #include "ctree.h" #include "transaction.h" @@ -2375,8 +2376,21 @@ out: return ret; } -static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) +/* + * Two limits to commit transaction in advance. + * + * For RATIO, it will be 1/RATIO of the remaining limit + * (excluding data and prealloc meta) as threshold. + * For SIZE, it will be in byte unit as threshold. + */ +#define QGROUP_PERTRANS_RATIO 32 +#define QGROUP_PERTRANS_SIZE SZ_32M +static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup *qg, u64 num_bytes) { + u64 limit; + u64 threshold; + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) return false; @@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) return false; + /* + * Even if we passed the check, it's better to check if reservation + * for meta_pertrans is pushing us near limit. + * If there is too much pertrans reservation or it's near the limit, + * let's try commit transaction to free some, using transaction_kthread + */ + if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | + BTRFS_QGROUP_LIMIT_MAX_EXCL))) { + if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) + limit = qg->max_excl; + else + limit = qg->max_rfer; + threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] - + qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) / + QGROUP_PERTRANS_RATIO; + threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE); + + /* + * Use transaction_kthread to commit transaction, so we no + * longer need to bother nested transaction nor lock context. + */ + if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold) + btrfs_commit_transaction_locksafe(fs_info); + } + return true; } @@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, qg = unode_aux_to_qgroup(unode); - if (enforce && !qgroup_check_limits(qg, num_bytes)) { + if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { ret = -EDQUOT; goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 63fdcab64b01..c944b4769e3c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); + clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); spin_lock(&fs_info->trans_lock); list_del_init(&cur_trans->list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c88fccd80bc5..d8c0826bc2c7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, int wait_for_unblock); + +/* + * Try to commit transaction asynchronously, so this is safe to call + * even holding a spinlock. + * + * It's done by informing transaction_kthread to commit transaction without + * waiting for commit interval. + */ +static inline void btrfs_commit_transaction_locksafe( + struct btrfs_fs_info *fs_info) +{ + set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); + wake_up_process(fs_info->transaction_kthread); +} int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); void btrfs_throttle(struct btrfs_fs_info *fs_info); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8bf60250309e..ae056927080d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_FILE_BUFFER| CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { - if (timespec_compare(ctime, &inode->i_ctime) > 0) { + if (ci->i_version == 0 || + timespec_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode->i_ctime = *ctime; } - if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { + if (ci->i_version == 0 || + ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %ld.%09ld -> %ld.%09ld " "tw %d -> %d\n", @@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ - ci->i_version = le64_to_cpu(info->version); inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; @@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, xattr_blob = NULL; } + /* finally update i_version */ + ci->i_version = le64_to_cpu(info->version); + inode->i_mapping->a_ops = &ceph_aops; switch (inode->i_mode & S_IFMT) { diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 846ca150d52e..4dd842f72846 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1997,6 +1997,16 @@ out: return rc; } +static bool is_dot_dotdot(const char *name, size_t name_size) +{ + if (name_size == 1 && name[0] == '.') + return true; + else if (name_size == 2 && name[0] == '.' && name[1] == '.') + return true; + + return false; +} + /** * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext * @plaintext_name: The plaintext name @@ -2021,13 +2031,21 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, size_t packet_size; int rc = 0; - if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) - && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) - && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, - ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) { - const char *orig_name = name; - size_t orig_name_size = name_size; + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && + !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)) { + if (is_dot_dotdot(name, name_size)) { + rc = ecryptfs_copy_filename(plaintext_name, + plaintext_name_size, + name, name_size); + goto out; + } + + if (name_size <= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE || + strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)) { + rc = -EINVAL; + goto out; + } name += ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; name_size -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; @@ -2047,12 +2065,9 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, decoded_name, decoded_name_size); if (rc) { - printk(KERN_INFO "%s: Could not parse tag 70 packet " - "from filename; copying through filename " - "as-is\n", __func__); - rc = ecryptfs_copy_filename(plaintext_name, - plaintext_name_size, - orig_name, orig_name_size); + ecryptfs_printk(KERN_DEBUG, + "%s: Could not parse tag 70 packet from filename\n", + __func__); goto out_free; } } else { diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index c74ed3ca3372..b76a9853325e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -82,17 +82,28 @@ ecryptfs_filldir(struct dir_context *ctx, const char *lower_name, buf->sb, lower_name, lower_namelen); if (rc) { - printk(KERN_ERR "%s: Error attempting to decode and decrypt " - "filename [%s]; rc = [%d]\n", __func__, lower_name, - rc); - goto out; + if (rc != -EINVAL) { + ecryptfs_printk(KERN_DEBUG, + "%s: Error attempting to decode and decrypt filename [%s]; rc = [%d]\n", + __func__, lower_name, rc); + return rc; + } + + /* Mask -EINVAL errors as these are most likely due a plaintext + * filename present in the lower filesystem despite filename + * encryption being enabled. One unavoidable example would be + * the "lost+found" dentry in the root directory of an Ext4 + * filesystem. + */ + return 0; } + buf->caller->pos = buf->ctx.pos; rc = !dir_emit(buf->caller, name, name_size, ino, d_type); kfree(name); if (!rc) buf->entries_written++; -out: + return rc; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 847904aa63a9..97d17eaeba07 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -395,8 +395,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; - if (mount_crypt_stat - && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) { + if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { rc = ecryptfs_encrypt_and_encode_filename( &encrypted_and_encoded_name, &len, mount_crypt_stat, name, len); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index c89a58cfc991..e74fe84d0886 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1880,7 +1880,7 @@ find_next_matching_auth_tok: candidate_auth_tok = &auth_tok_list_item->auth_tok; if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, - "Considering cadidate auth tok:\n"); + "Considering candidate auth tok:\n"); ecryptfs_dump_auth_tok(candidate_auth_tok); } rc = ecryptfs_get_auth_tok_sig(&candidate_auth_tok_sig, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 09640220fda8..047c327a6b23 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -88,11 +88,11 @@ out_unlock: * The default page_lock and i_size verification done by non-DAX fault paths * is sufficient because ext2 doesn't support hole punching. */ -static int ext2_dax_fault(struct vm_fault *vmf) +static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); - int ret; + vm_fault_t ret; if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b12ba70a895..47d7c151fcba 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits) */ if (inode && inode_to_wb_is_valid(inode)) { struct bdi_writeback *wb; - bool locked, congested; + struct wb_lock_cookie lock_cookie = {}; + bool congested; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &lock_cookie); congested = wb_congested(wb, cong_bits); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &lock_cookie); return congested; } diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 9bb2fe35799d..10205ececc27 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/bio.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/zlib.h> @@ -59,7 +60,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, >> bufshift; int haveblocks; blkcnt_t blocknum; - struct buffer_head *bhs[needblocks + 1]; + struct buffer_head **bhs; int curbh, curpage; if (block_size > deflateBound(1UL << zisofs_block_shift)) { @@ -80,7 +81,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, /* Because zlib is not thread-safe, do all the I/O at the top. */ blocknum = block_start >> bufshift; - memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *)); + bhs = kcalloc(needblocks + 1, sizeof(*bhs), GFP_KERNEL); + if (!bhs) { + *errp = -ENOMEM; + return 0; + } haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks); ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs); @@ -190,6 +195,7 @@ z_eio: b_eio: for (i = 0; i < haveblocks; i++) brelse(bhs[i]); + kfree(bhs); return stream.total_out; } @@ -305,7 +311,7 @@ static int zisofs_readpage(struct file *file, struct page *page) unsigned int zisofs_pages_per_cblock = PAGE_SHIFT <= zisofs_block_shift ? (1 << (zisofs_block_shift - PAGE_SHIFT)) : 0; - struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)]; + struct page **pages; pgoff_t index = page->index, end_index; end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -330,6 +336,12 @@ static int zisofs_readpage(struct file *file, struct page *page) full_page = 0; pcount = 1; } + pages = kcalloc(max_t(unsigned int, zisofs_pages_per_cblock, 1), + sizeof(*pages), GFP_KERNEL); + if (!pages) { + unlock_page(page); + return -ENOMEM; + } pages[full_page] = page; for (i = 0; i < pcount; i++, index++) { @@ -357,6 +369,7 @@ static int zisofs_readpage(struct file *file, struct page *page) } /* At this point, err contains 0 or -EIO depending on the "critical" page */ + kfree(pages); return err; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index bc258a4402f6..ec3fba7d492f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -394,7 +394,10 @@ static int parse_options(char *options, struct iso9660_options *popt) break; #ifdef CONFIG_JOLIET case Opt_iocharset: + kfree(popt->iocharset); popt->iocharset = match_strdup(&args[0]); + if (!popt->iocharset) + return 0; break; #endif case Opt_map_a: diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f60dee7faf03..87bdf0f4cba1 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -342,7 +342,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!sb_rdonly(sb)) + if (c && !sb_rdonly(sb)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); diff --git a/fs/namespace.c b/fs/namespace.c index e398f32d7541..5f75969adff1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1089,7 +1089,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; @@ -2814,7 +2815,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); - if (flags & SB_RDONLY) + if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; /* The default atime for remount is preservation */ diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d51e1bb781cf..d94e8031fe5f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, u32 event_mask, const void *data, int data_type) { - __u32 marks_mask, marks_ignored_mask; + __u32 marks_mask = 0, marks_ignored_mask = 0; const struct path *path = data; pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" @@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - if (inode_mark && vfsmnt_mark) { - marks_mask = (vfsmnt_mark->mask | inode_mark->mask); - marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); - } else if (inode_mark) { - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if ((event_mask & FS_EVENT_ON_CHILD) && - !(inode_mark->mask & FS_EVENT_ON_CHILD)) - return false; - marks_mask = inode_mark->mask; - marks_ignored_mask = inode_mark->ignored_mask; - } else if (vfsmnt_mark) { - marks_mask = vfsmnt_mark->mask; - marks_ignored_mask = vfsmnt_mark->ignored_mask; - } else { - BUG(); + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (inode_mark && + (!(event_mask & FS_EVENT_ON_CHILD) || + (inode_mark->mask & FS_EVENT_ON_CHILD))) { + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; + } + + if (vfsmnt_mark) { + marks_mask |= vfsmnt_mark->mask; + marks_ignored_mask |= vfsmnt_mark->ignored_mask; } if (d_is_dir(path->dentry) && diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 219b269c737e..613ec7e5a465 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -192,8 +192,9 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; - __u32 inode_test_mask = 0; - __u32 vfsmount_test_mask = 0; + __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 marks_mask = 0; + __u32 marks_ignored_mask = 0; if (unlikely(!inode_mark && !vfsmount_mark)) { BUG(); @@ -213,29 +214,25 @@ static int send_to_group(struct inode *to_tell, /* does the inode mark tell us to do something? */ if (inode_mark) { group = inode_mark->group; - inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); - inode_test_mask &= inode_mark->mask; - inode_test_mask &= ~inode_mark->ignored_mask; + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; } /* does the vfsmount_mark tell us to do something? */ if (vfsmount_mark) { - vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); group = vfsmount_mark->group; - vfsmount_test_mask &= vfsmount_mark->mask; - vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; - if (inode_mark) - vfsmount_test_mask &= ~inode_mark->ignored_mask; + marks_mask |= vfsmount_mark->mask; + marks_ignored_mask |= vfsmount_mark->ignored_mask; } pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" - " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" + " vfsmount_mark=%p marks_mask=%x marks_ignored_mask=%x" " data=%p data_is=%d cookie=%d\n", - __func__, group, to_tell, mask, inode_mark, - inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, + __func__, group, to_tell, mask, inode_mark, vfsmount_mark, + marks_mask, marks_ignored_mask, data, data_is, cookie); - if (!inode_test_mask && !vfsmount_test_mask) + if (!(test_mask & marks_mask & ~marks_ignored_mask)) return 0; return group->ops->handle_event(group, to_tell, inode_mark, diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 3ae5fdba0225..10796d3fe27d 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -579,6 +579,11 @@ void orangefs_kill_sb(struct super_block *sb) /* provided sb cleanup */ kill_anon_super(sb); + if (!ORANGEFS_SB(sb)) { + mutex_lock(&orangefs_request_mutex); + mutex_unlock(&orangefs_request_mutex); + return; + } /* * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock diff --git a/fs/proc/base.c b/fs/proc/base.c index eafa39a3a88c..1b2ede6abcdf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1693,6 +1693,12 @@ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t uid; kgid_t gid; + if (unlikely(task->flags & PF_KTHREAD)) { + *ruid = GLOBAL_ROOT_UID; + *rgid = GLOBAL_ROOT_GID; + return; + } + /* Default to the tasks effective ownership */ rcu_read_lock(); cred = __task_cred(task); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index a000d7547479..b572cc865b92 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v) LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, - idr_get_cursor(&task_active_pid_ns(current)->idr)); + idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 65ae54659833..c486ad4b43f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1310,9 +1310,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); + unsigned long offset = swp_offset(entry); + offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; frame = swp_type(entry) | - (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + (offset << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; @@ -1332,6 +1334,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); } spin_unlock(ptl); return err; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 020c597ef9b6..d88231e3b2be 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2966,7 +2966,7 @@ static int __init dquot_init(void) NULL); order = 0; - dquot_hash = (struct hlist_head *)__get_free_pages(GFP_ATOMIC, order); + dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order); if (!dquot_hash) panic("Cannot create dquot hash table"); diff --git a/fs/super.c b/fs/super.c index 5fa9a8d8d865..122c402049a2 100644 --- a/fs/super.c +++ b/fs/super.c @@ -167,6 +167,7 @@ static void destroy_unused_super(struct super_block *s) security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); + free_prealloced_shrinker(&s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } @@ -252,6 +253,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_shrink.count_objects = super_cache_count; s->s_shrink.batch = 1024; s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; + if (prealloc_shrinker(&s->s_shrink)) + goto fail; return s; fail: @@ -518,11 +521,7 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - err = register_shrinker(&s->s_shrink); - if (err) { - deactivate_locked_super(s); - s = ERR_PTR(err); - } + register_shrinker_prepared(&s->s_shrink); return s; } diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index f897e55f2cd0..16a8ad21b77e 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,6 +28,9 @@ #include "udf_sb.h" +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 + static int udf_uni2char_utf8(wchar_t uni, unsigned char *out, int boundlen) @@ -37,6 +40,9 @@ static int udf_uni2char_utf8(wchar_t uni, if (boundlen <= 0) return -ENAMETOOLONG; + if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) + return -EINVAL; + if (uni < 0x80) { out[u_len++] = (unsigned char)uni; } else if (uni < 0x800) { |