diff options
Diffstat (limited to 'fs')
55 files changed, 728 insertions, 593 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 22ea424ee741..073bb57adab1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1242,6 +1242,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 1ce06c849a86..3e36e4adc4a3 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -42,8 +42,14 @@ struct __btrfs_workqueue { /* Thresholding related variants */ atomic_t pending; - int max_active; - int current_max; + + /* Up limit of concurrency workers */ + int limit_active; + + /* Current number of concurrency workers */ + int current_active; + + /* Threshold to change current_active */ int thresh; unsigned int count; spinlock_t thres_lock; @@ -88,7 +94,7 @@ BTRFS_WORK_HELPER(scrubnc_helper); BTRFS_WORK_HELPER(scrubparity_helper); static struct __btrfs_workqueue * -__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, +__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active, int thresh) { struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -96,26 +102,31 @@ __btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, if (!ret) return NULL; - ret->max_active = max_active; + ret->limit_active = limit_active; atomic_set(&ret->pending, 0); if (thresh == 0) thresh = DFT_THRESHOLD; /* For low threshold, disabling threshold is a better choice */ if (thresh < DFT_THRESHOLD) { - ret->current_max = max_active; + ret->current_active = limit_active; ret->thresh = NO_THRESHOLD; } else { - ret->current_max = 1; + /* + * For threshold-able wq, let its concurrency grow on demand. + * Use minimal max_active at alloc time to reduce resource + * usage. + */ + ret->current_active = 1; ret->thresh = thresh; } if (flags & WQ_HIGHPRI) ret->normal_wq = alloc_workqueue("%s-%s-high", flags, - ret->max_active, - "btrfs", name); + ret->current_active, "btrfs", + name); else ret->normal_wq = alloc_workqueue("%s-%s", flags, - ret->max_active, "btrfs", + ret->current_active, "btrfs", name); if (!ret->normal_wq) { kfree(ret); @@ -134,7 +145,7 @@ __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, unsigned int flags, - int max_active, + int limit_active, int thresh) { struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); @@ -143,14 +154,14 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, return NULL; ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, - max_active, thresh); + limit_active, thresh); if (!ret->normal) { kfree(ret); return NULL; } if (flags & WQ_HIGHPRI) { - ret->high = __btrfs_alloc_workqueue(name, flags, max_active, + ret->high = __btrfs_alloc_workqueue(name, flags, limit_active, thresh); if (!ret->high) { __btrfs_destroy_workqueue(ret->normal); @@ -180,7 +191,7 @@ static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) */ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) { - int new_max_active; + int new_current_active; long pending; int need_change = 0; @@ -197,7 +208,7 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) wq->count %= (wq->thresh / 4); if (!wq->count) goto out; - new_max_active = wq->current_max; + new_current_active = wq->current_active; /* * pending may be changed later, but it's OK since we really @@ -205,19 +216,19 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) */ pending = atomic_read(&wq->pending); if (pending > wq->thresh) - new_max_active++; + new_current_active++; if (pending < wq->thresh / 2) - new_max_active--; - new_max_active = clamp_val(new_max_active, 1, wq->max_active); - if (new_max_active != wq->current_max) { + new_current_active--; + new_current_active = clamp_val(new_current_active, 1, wq->limit_active); + if (new_current_active != wq->current_active) { need_change = 1; - wq->current_max = new_max_active; + wq->current_active = new_current_active; } out: spin_unlock(&wq->thres_lock); if (need_change) { - workqueue_set_max_active(wq->normal_wq, wq->current_max); + workqueue_set_max_active(wq->normal_wq, wq->current_active); } } @@ -351,13 +362,13 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) kfree(wq); } -void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) +void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active) { if (!wq) return; - wq->normal->max_active = max; + wq->normal->limit_active = limit_active; if (wq->high) - wq->high->max_active = max; + wq->high->limit_active = limit_active; } void btrfs_set_work_high_priority(struct btrfs_work *work) diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index b0b093b6afec..ad4d0647d1a6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -69,7 +69,7 @@ BTRFS_WORK_HELPER_PROTO(scrubparity_helper); struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, unsigned int flags, - int max_active, + int limit_active, int thresh); void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, btrfs_func_t func, diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 81220b2203c6..0ef5cc13fae2 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,8 +44,6 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 -/* DIO is ready to submit */ -#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564a7de17d99..e54dd5905cee 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -183,8 +183,7 @@ no_valid_dev_replace_entry_found: } out: - if (path) - btrfs_free_path(path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9ebd34f1c677..295795aebe0b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3443,6 +3443,26 @@ static int barrier_all_devices(struct btrfs_fs_info *info) return 0; } +int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) +{ + if ((flags & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_AVAIL_ALLOC_BIT_SINGLE)) || + ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) + return 0; + + if (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID5 | + BTRFS_BLOCK_GROUP_RAID10)) + return 1; + + if (flags & BTRFS_BLOCK_GROUP_RAID6) + return 2; + + pr_warn("BTRFS: unknown raid type: %llu\n", flags); + return 0; +} + int btrfs_calc_num_tolerated_disk_barrier_failures( struct btrfs_fs_info *fs_info) { @@ -3452,13 +3472,12 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; - int num_types = 4; int i; int c; int num_tolerated_disk_barrier_failures = (int)fs_info->fs_devices->num_devices; - for (i = 0; i < num_types; i++) { + for (i = 0; i < ARRAY_SIZE(types); i++) { struct btrfs_space_info *tmp; sinfo = NULL; @@ -3476,44 +3495,21 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( down_read(&sinfo->groups_sem); for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { - if (!list_empty(&sinfo->block_groups[c])) { - u64 flags; - - btrfs_get_block_group_info( - &sinfo->block_groups[c], &space); - if (space.total_bytes == 0 || - space.used_bytes == 0) - continue; - flags = space.flags; - /* - * return - * 0: if dup, single or RAID0 is configured for - * any of metadata, system or data, else - * 1: if RAID5 is configured, or if RAID1 or - * RAID10 is configured and only two mirrors - * are used, else - * 2: if RAID6 is configured, else - * num_mirrors - 1: if RAID1 or RAID10 is - * configured and more than - * 2 mirrors are used. - */ - if (num_tolerated_disk_barrier_failures > 0 && - ((flags & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID0)) || - ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) - == 0))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1) { - if (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID10)) { - num_tolerated_disk_barrier_failures = 1; - } else if (flags & - BTRFS_BLOCK_GROUP_RAID6) { - num_tolerated_disk_barrier_failures = 2; - } - } - } + u64 flags; + + if (list_empty(&sinfo->block_groups[c])) + continue; + + btrfs_get_block_group_info(&sinfo->block_groups[c], + &space); + if (space.total_bytes == 0 || space.used_bytes == 0) + continue; + flags = space.flags; + + num_tolerated_disk_barrier_failures = min( + num_tolerated_disk_barrier_failures, + btrfs_get_num_tolerated_disk_barrier_failures( + flags)); } up_read(&sinfo->groups_sem); } @@ -3769,9 +3765,7 @@ void close_ctree(struct btrfs_root *root) * block groups queued for removal, the deletion will be * skipped when we quit the cleaner thread. */ - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_delete_unused_bgs(root->fs_info); - mutex_unlock(&root->fs_info->cleaner_mutex); ret = btrfs_commit_super(root); if (ret) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index d4cbfeeeedd4..bdfb479ea859 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -139,6 +139,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid); int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)); +int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int btrfs_calc_num_tolerated_disk_barrier_failures( struct btrfs_fs_info *fs_info); int __init btrfs_end_io_wq_init(void); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5411f0ab5683..9f9604201333 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3742,10 +3742,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_reserved = 0; found->bytes_readonly = 0; found->bytes_may_use = 0; - if (total_bytes > 0) - found->full = 0; - else - found->full = 1; + found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->chunk_alloc = 0; found->flush = 0; @@ -8668,7 +8665,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { - btrfs_drop_and_free_fs_root(tree_root->fs_info, root); + btrfs_add_dropped_root(trans, root); } else { free_extent_buffer(root->node); free_extent_buffer(root->commit_root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f1018cfbfefa..e2357e31609a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, - unsigned long bio_flags) + unsigned long bio_flags, + bool force_bio_submit) { int ret = 0; struct bio *bio; @@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || + force_bio_submit || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, @@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree, } while (cur <= end) { unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + bool force_bio_submit = false; if (cur >= last_byte) { char *userpage; @@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree, block_start = em->block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) block_start = EXTENT_MAP_HOLE; + + /* + * If we have a file range that points to a compressed extent + * and it's followed by a consecutive file range that points to + * to the same compressed extent (possibly with a different + * offset and/or length, so it either points to the whole extent + * or only part of it), we must make sure we do not submit a + * single bio to populate the pages for the 2 ranges because + * this makes the compressed extent read zero out the pages + * belonging to the 2nd range. Imagine the following scenario: + * + * File layout + * [0 - 8K] [8K - 24K] + * | | + * | | + * points to extent X, points to extent X, + * offset 4K, length of 8K offset 0, length 16K + * + * [extent X, compressed length = 4K uncompressed length = 16K] + * + * If the bio to read the compressed extent covers both ranges, + * it will decompress extent X into the pages belonging to the + * first range and then it will stop, zeroing out the remaining + * pages that belong to the other range that points to extent X. + * So here we make sure we submit 2 bios, one for the first + * range and another one for the third range. Both will target + * the same physical extent from disk, but we can't currently + * make the compressed bio endio callback populate the pages + * for both ranges because each compressed bio is tightly + * coupled with a single extent map, and each range can have + * an extent map with a different offset value relative to the + * uncompressed data of our extent and different lengths. This + * is a corner case so we prioritize correctness over + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && + prev_em_start && *prev_em_start != (u64)-1 && + *prev_em_start != em->orig_start) + force_bio_submit = true; + + if (prev_em_start) + *prev_em_start = em->orig_start; + free_extent_map(em); em = NULL; @@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, - this_bio_flag); + this_bio_flag, + force_bio_submit); if (!ret) { nr++; *bio_flags = this_bio_flag; @@ -3089,6 +3137,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, struct inode *inode; struct btrfs_ordered_extent *ordered; int index; + u64 prev_em_start = (u64)-1; inode = pages[0]->mapping->host; while (1) { @@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, &prev_em_start); page_cache_release(pages[index]); } } @@ -3172,7 +3221,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw); + bio_flags, rw, NULL); return ret; } @@ -3198,7 +3247,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, int ret; ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, READ, NULL); if (bio) ret = submit_one_bio(READ, bio, mirror_num, bio_flags); return ret; @@ -3451,7 +3500,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, - 0, 0, 0); + 0, 0, 0, false); if (ret) SetPageError(page); } @@ -3754,7 +3803,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, - 0, epd->bio_flags, bio_flags); + 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { set_btree_ioerr(p); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 237da012f7d0..611b66d73e80 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_free_io_failure_record(inode, 0, (u64)-1); @@ -6909,8 +6910,7 @@ out: trace_btrfs_get_extent(root, em); - if (path) - btrfs_free_path(path); + btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); if (!err) @@ -7409,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return em; } +struct btrfs_dio_data { + u64 outstanding_extents; + u64 reserve; +}; static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -7416,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; + struct btrfs_dio_data *dio_data = NULL; u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -7437,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * that anything that needs to check if there's a transction doesn't get * confused. */ - outstanding_extents = current->journal_info; + dio_data = current->journal_info; current->journal_info = NULL; } @@ -7569,17 +7573,18 @@ unlock: * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (*outstanding_extents) { - (*outstanding_extents)--; + if (dio_data->outstanding_extents) { + (dio_data->outstanding_extents)--; } else { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } - current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); - set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); + WARN_ON(dio_data->reserve < len); + dio_data->reserve -= len; + current->journal_info = dio_data; } /* @@ -7602,8 +7607,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); - if (outstanding_extents) - current->journal_info = outstanding_extents; + if (dio_data) + current->journal_info = dio_data; return ret; } @@ -8330,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - u64 outstanding_extents = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_dio_data dio_data = { 0 }; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8368,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; - outstanding_extents = div64_u64(count + + dio_data.outstanding_extents = div64_u64(count + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); @@ -8377,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * do the accounting properly if we go over the number we * originally calculated. Abuse current->journal_info for this. */ - current->journal_info = &outstanding_extents; + dio_data.reserve = round_up(count, root->sectorsize); + current->journal_info = &dio_data; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_end(inode); @@ -8392,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { - /* - * If the error comes from submitting stage, - * btrfs_get_blocsk_direct() has free'd data space, - * and metadata space will be handled by - * finish_ordered_fn, don't do that again to make - * sure bytes_may_use is correct. - */ - if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, - &BTRFS_I(inode)->runtime_flags)) - btrfs_delalloc_release_space(inode, count); + if (dio_data.reserve) + btrfs_delalloc_release_space(inode, + dio_data.reserve); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9a11db0c47ee..a39f5d1144e8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3267,13 +3267,13 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, scrub_blocked_if_needed(fs_info); } - /* for raid56, we skip parity stripe */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = get_raid56_logic_offset(physical, num, map, &logical, &stripe_logical); logical += base; if (ret) { + /* it is parity strip */ stripe_logical += base; stripe_end = stripe_logical + increment; ret = scrub_raid56_parity(sctx, map, scrub_dev, @@ -3480,7 +3480,6 @@ out: static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, - u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length, u64 dev_offset, int is_dev_replace) { @@ -3531,8 +3530,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, struct btrfs_root *root = sctx->dev_root; struct btrfs_fs_info *fs_info = root->fs_info; u64 length; - u64 chunk_tree; - u64 chunk_objectid; u64 chunk_offset; int ret = 0; int slot; @@ -3596,8 +3593,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (found_key.offset + length <= start) goto skip; - chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); - chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); /* @@ -3630,9 +3625,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, dev_replace->cursor_right = found_key.offset + length; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; - ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, - chunk_offset, length, found_key.offset, - is_dev_replace); + ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, + found_key.offset, is_dev_replace); /* * flush, submit all pending read and write bios, afterwards diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2b07b3581781..11d1eab9234d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) * groups on disk until we're mounted read-write again * unless we clean them up here. */ - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_delete_unused_bgs(fs_info); - mutex_unlock(&root->fs_info->cleaner_mutex); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8f259b3a66b3..74bc3338418b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans, btrfs_unpin_free_ino(root); clear_btree_io_tree(&root->dirty_log_pages); } + + /* We can free old roots now. */ + spin_lock(&trans->dropped_roots_lock); + while (!list_empty(&trans->dropped_roots)) { + root = list_first_entry(&trans->dropped_roots, + struct btrfs_root, root_list); + list_del_init(&root->root_list); + spin_unlock(&trans->dropped_roots_lock); + btrfs_drop_and_free_fs_root(fs_info, root); + spin_lock(&trans->dropped_roots_lock); + } + spin_unlock(&trans->dropped_roots_lock); up_write(&fs_info->commit_root_sem); } @@ -255,11 +267,13 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs); + INIT_LIST_HEAD(&cur_trans->dropped_roots); mutex_init(&cur_trans->cache_write_mutex); cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); spin_lock_init(&cur_trans->deleted_bgs_lock); + spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, } +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + + /* Add ourselves to the transaction dropped list */ + spin_lock(&cur_trans->dropped_roots_lock); + list_add_tail(&root->root_list, &cur_trans->dropped_roots); + spin_unlock(&cur_trans->dropped_roots_lock); + + /* Make sure we don't try to update the root at commit time */ + spin_lock(&root->fs_info->fs_roots_radix_lock); + radix_tree_tag_clear(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&root->fs_info->fs_roots_radix_lock); +} + int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index edc2fbc262d7..87964bf8892d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -65,6 +65,7 @@ struct btrfs_transaction { struct list_head switch_commits; struct list_head dirty_bgs; struct list_head io_bgs; + struct list_head dropped_roots; u64 num_dirty_bgs; /* @@ -76,6 +77,7 @@ struct btrfs_transaction { spinlock_t dirty_bgs_lock; struct list_head deleted_bgs; spinlock_t deleted_bgs_lock; + spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; int dirty_bg_run; @@ -216,5 +218,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); void btrfs_put_transaction(struct btrfs_transaction *transaction); void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); - +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a4b9c8b2d35a..f31db4325339 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -115,8 +115,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = -EAGAIN; } out: - if (path) - btrfs_free_path(path); + btrfs_free_path(path); if (ret == -EAGAIN) { if (root->defrag_max.objectid > root->defrag_progress.objectid) goto done; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 76201d6f6ce4..6fc735869c18 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3585,23 +3585,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } while (read_seqretry(&fs_info->profiles_lock, seq)); if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - int num_tolerated_disk_barrier_failures; - u64 target = bctl->sys.target; - - num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); - if (num_tolerated_disk_barrier_failures > 0 && - (target & - (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_AVAIL_ALLOC_BIT_SINGLE))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1 && - (target & - (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))) - num_tolerated_disk_barrier_failures = 1; - - fs_info->num_tolerated_disk_barrier_failures = - num_tolerated_disk_barrier_failures; + fs_info->num_tolerated_disk_barrier_failures = min( + btrfs_calc_num_tolerated_disk_barrier_failures(fs_info), + btrfs_get_num_tolerated_disk_barrier_failures( + bctl->sys.target)); } ret = insert_balance_item(fs_info->tree_root, bctl); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a268abfe60ac..9d23e788d1df 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -276,7 +276,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) for (i = 0; i < num_pages; i++) { struct page *page = osd_data->pages[i]; - if (rc < 0) + if (rc < 0 && rc != ENOENT) goto unlock; if (bytes < (int)PAGE_CACHE_SIZE) { /* zero (remainder of) page */ @@ -717,8 +717,10 @@ static int ceph_writepages_start(struct address_space *mapping, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); - if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { + if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { pr_warn("writepage_start %p on forced umount\n", inode); + truncate_pagecache(inode, 0); + mapping_set_error(mapping, -EIO); return -EIO; /* we're in a forced umount, don't write! */ } if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ddd5e9471290..27b566874bc1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2413,6 +2413,14 @@ again: goto out_unlock; } + if (!__ceph_is_any_caps(ci) && + ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + dout("get_cap_refs %p forced umount\n", inode); + *err = -EIO; + ret = 1; + goto out_unlock; + } + dout("get_cap_refs %p have %s needed %s\n", inode, ceph_cap_string(have), ceph_cap_string(need)); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8b79d87eaf46..0c62868b5c56 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -136,7 +136,6 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *cf = file->private_data; - struct inode *parent_inode = NULL; int err; int flags, fmode, wanted; @@ -210,10 +209,7 @@ int ceph_open(struct inode *inode, struct file *file) ihold(inode); req->r_num_caps = 1; - if (flags & O_CREAT) - parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry); - err = ceph_mdsc_do_request(mdsc, parent_inode, req); - iput(parent_inode); + err = ceph_mdsc_do_request(mdsc, NULL, req); if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); @@ -279,7 +275,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (err) goto out_req; - if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); if (d_unhashed(dentry)) { @@ -956,6 +952,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); + if (iocb->ki_flags & IOCB_APPEND) { + err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); + if (err < 0) + goto out; + } + err = generic_write_checks(iocb, from); if (err <= 0) goto out; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6aa07af67603..51cb02da75d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2107,7 +2107,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, msg = create_request_message(mdsc, req, mds, drop_cap_releases); if (IS_ERR(msg)) { req->r_err = PTR_ERR(msg); - complete_request(mdsc, req); return PTR_ERR(msg); } req->r_request = msg; @@ -2135,7 +2134,7 @@ static int __do_request(struct ceph_mds_client *mdsc, { struct ceph_mds_session *session = NULL; int mds = -1; - int err = -EAGAIN; + int err = 0; if (req->r_err || req->r_got_result) { if (req->r_aborted) @@ -2149,6 +2148,11 @@ static int __do_request(struct ceph_mds_client *mdsc, err = -EIO; goto finish; } + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { + dout("do_request forced umount\n"); + err = -EIO; + goto finish; + } put_request_session(req); @@ -2196,13 +2200,15 @@ static int __do_request(struct ceph_mds_client *mdsc, out_session: ceph_put_mds_session(session); +finish: + if (err) { + dout("__do_request early error %d\n", err); + req->r_err = err; + complete_request(mdsc, req); + __unregister_request(mdsc, req); + } out: return err; - -finish: - req->r_err = err; - complete_request(mdsc, req); - goto out; } /* @@ -2289,8 +2295,6 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, if (req->r_err) { err = req->r_err; - __unregister_request(mdsc, req); - dout("do_request early error %d\n", err); goto out; } @@ -2411,7 +2415,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_unlock(&mdsc->mutex); goto out; } - if (req->r_got_safe && !head->safe) { + if (req->r_got_safe) { pr_warn("got unsafe after safe on %llu from mds%d\n", tid, mds); mutex_unlock(&mdsc->mutex); @@ -2520,8 +2524,7 @@ out_err: if (err) { req->r_err = err; } else { - req->r_reply = msg; - ceph_msg_get(msg); + req->r_reply = ceph_msg_get(msg); req->r_got_result = true; } } else { @@ -3555,7 +3558,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush, want_snap; - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) return; dout("sync\n"); @@ -3584,7 +3587,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) */ static bool done_closing_sessions(struct ceph_mds_client *mdsc) { - if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) + if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) return true; return atomic_read(&mdsc->num_sessions) == 0; } @@ -3643,6 +3646,34 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("stopped\n"); } +void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) +{ + struct ceph_mds_session *session; + int mds; + + dout("force umount\n"); + + mutex_lock(&mdsc->mutex); + for (mds = 0; mds < mdsc->max_sessions; mds++) { + session = __ceph_lookup_mds_session(mdsc, mds); + if (!session) + continue; + mutex_unlock(&mdsc->mutex); + mutex_lock(&session->s_mutex); + __close_session(mdsc, session); + if (session->s_state == CEPH_MDS_SESSION_CLOSING) { + cleanup_session_requests(mdsc, session); + remove_session_caps(session); + } + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); + mutex_lock(&mdsc->mutex); + kick_requests(mdsc, mds); + } + __wake_requests(mdsc, &mdsc->waiting_for_map); + mutex_unlock(&mdsc->mutex); +} + static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 762757e6cebf..f575eafe2261 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -366,6 +366,7 @@ extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, extern int ceph_mdsc_init(struct ceph_fs_client *fsc); extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); +extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc); extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 233d906aec02..4aa7122a8d38 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -338,12 +338,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) return 0; } - if (num == 0 && realm->seq == ceph_empty_snapc->seq) { - ceph_get_snap_context(ceph_empty_snapc); - snapc = ceph_empty_snapc; - goto done; - } - /* alloc new snap context */ err = -ENOMEM; if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) @@ -381,7 +375,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) realm->ino, realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); -done: ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; return 0; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7b6bfcbf801c..f446afada328 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -708,6 +708,7 @@ static void ceph_umount_begin(struct super_block *sb) if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; + ceph_mdsc_force_umount(fsc->mdsc); return; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6a1119e87fbb..e739950ca084 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) static void cifs_show_security(struct seq_file *s, struct cifs_ses *ses) { - if (ses->sectype == Unspecified) + if (ses->sectype == Unspecified) { + if (ses->user_name == NULL) + seq_puts(s, ",sec=none"); return; + } seq_puts(s, ",sec="); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index c63f5227b681..28a77bf1d559 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); @@ -119,7 +119,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, size_t len; if (pos == max) { unsigned blkbits = inode->i_blkbits; - sector_t block = pos >> blkbits; + long page = pos >> PAGE_SHIFT; + sector_t block = page << (PAGE_SHIFT - blkbits); unsigned first = pos - (block << blkbits); long size; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 24489126f8ca..091a36444972 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1380,6 +1380,10 @@ static long writeback_chunk_size(struct bdi_writeback *wb, * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. + * + * NOTE! This is called with wb->list_lock held, and will + * unlock and relock that for each inode it ends up doing + * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, @@ -1398,9 +1402,7 @@ static long writeback_sb_inodes(struct super_block *sb, unsigned long start_time = jiffies; long write_chunk; long wrote = 0; /* count both pages and inodes */ - struct blk_plug plug; - blk_start_plug(&plug); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); @@ -1479,6 +1481,21 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; + + if (need_resched()) { + /* + * We're trying to balance between building up a nice + * long list of IOs to improve our merge rate, and + * getting those IOs out quickly for anyone throttling + * in balance_dirty_pages(). cond_resched() doesn't + * unplug, so get our IOs out the door before we + * give up the CPU. + */ + blk_flush_plug(current); + cond_resched(); + } + + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) @@ -1486,7 +1503,7 @@ static long writeback_sb_inodes(struct super_block *sb, requeue_inode(inode, wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); - cond_resched_lock(&wb->list_lock); + /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. @@ -1498,7 +1515,6 @@ static long writeback_sb_inodes(struct super_block *sb, break; } } - blk_finish_plug(&plug); return wrote; } @@ -1545,12 +1561,15 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .range_cyclic = 1, .reason = reason, }; + struct blk_plug plug; + blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work.nr_pages; } @@ -1578,10 +1597,12 @@ static long wb_writeback(struct bdi_writeback *wb, unsigned long oldest_jif; struct inode *inode; long progress; + struct blk_plug plug; oldest_jif = jiffies; work->older_than_this = &oldest_jif; + blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { /* @@ -1661,6 +1682,7 @@ static long wb_writeback(struct bdi_writeback *wb, } } spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work->nr_pages; } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a38e38f7b6fc..9bd1244caf38 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -34,6 +34,7 @@ #include <linux/percpu.h> #include <linux/list_sort.h> #include <linux/lockref.h> +#include <linux/rhashtable.h> #include "gfs2.h" #include "incore.h" @@ -50,9 +51,8 @@ #include "trace_gfs2.h" struct gfs2_glock_iter { - int hash; /* hash bucket index */ - unsigned nhash; /* Index within current bucket */ struct gfs2_sbd *sdp; /* incore superblock */ + struct rhashtable_iter hti; /* rhashtable iterator */ struct gfs2_glock *gl; /* current glock struct */ loff_t last_pos; /* last position */ }; @@ -70,44 +70,19 @@ static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) -#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) -static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; -static struct dentry *gfs2_root; - -/** - * gl_hash() - Turn glock number into hash bucket number - * @lock: The glock number - * - * Returns: The number of the corresponding hash bucket - */ - -static unsigned int gl_hash(const struct gfs2_sbd *sdp, - const struct lm_lockname *name) -{ - unsigned int h; - - h = jhash(&name->ln_number, sizeof(u64), 0); - h = jhash(&name->ln_type, sizeof(unsigned int), h); - h = jhash(&sdp, sizeof(struct gfs2_sbd *), h); - h &= GFS2_GL_HASH_MASK; - - return h; -} - -static inline void spin_lock_bucket(unsigned int hash) -{ - hlist_bl_lock(&gl_hash_table[hash]); -} +static struct rhashtable_params ht_parms = { + .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, + .key_len = sizeof(struct lm_lockname), + .key_offset = offsetof(struct gfs2_glock, gl_name), + .head_offset = offsetof(struct gfs2_glock, gl_node), +}; -static inline void spin_unlock_bucket(unsigned int hash) -{ - hlist_bl_unlock(&gl_hash_table[hash]); -} +static struct rhashtable gl_hash_table; -static void gfs2_glock_dealloc(struct rcu_head *rcu) +void gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (gl->gl_ops->go_flags & GLOF_ASPACE) { kmem_cache_free(gfs2_glock_aspace_cachep, gl); @@ -115,13 +90,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(gfs2_glock_cachep, gl); } -} - -void gfs2_glock_free(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - - call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); } @@ -192,7 +160,7 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); if (lockref_put_or_lock(&gl->gl_lockref)) @@ -202,9 +170,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); - spin_lock_bucket(gl->gl_hash); - hlist_bl_del_rcu(&gl->gl_list); - spin_unlock_bucket(gl->gl_hash); + rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); @@ -212,33 +178,6 @@ void gfs2_glock_put(struct gfs2_glock *gl) } /** - * search_bucket() - Find struct gfs2_glock by lock number - * @bucket: the bucket to search - * @name: The lock name - * - * Returns: NULL, or the struct gfs2_glock with the requested number - */ - -static struct gfs2_glock *search_bucket(unsigned int hash, - const struct gfs2_sbd *sdp, - const struct lm_lockname *name) -{ - struct gfs2_glock *gl; - struct hlist_bl_node *h; - - hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { - if (!lm_name_equal(&gl->gl_name, name)) - continue; - if (gl->gl_sbd != sdp) - continue; - if (lockref_get_not_dead(&gl->gl_lockref)) - return gl; - } - - return NULL; -} - -/** * may_grant - check if its ok to grant a new lock * @gl: The glock * @gh: The lock request which we wish to grant @@ -506,7 +445,7 @@ __releases(&gl->gl_spin) __acquires(&gl->gl_spin) { const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int lck_flags = gh ? gh->gh_flags : 0; int ret; @@ -628,7 +567,7 @@ out_unlock: static void delete_work_func(struct work_struct *work) { struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip; struct inode *inode; u64 no_addr = gl->gl_name.ln_number; @@ -704,15 +643,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct gfs2_glock **glp) { struct super_block *s = sdp->sd_vfs; - struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; - struct gfs2_glock *gl, *tmp; - unsigned int hash = gl_hash(sdp, &name); + struct lm_lockname name = { .ln_number = number, + .ln_type = glops->go_type, + .ln_sbd = sdp }; + struct gfs2_glock *gl, *tmp = NULL; struct address_space *mapping; struct kmem_cache *cachep; + int ret, tries = 0; - rcu_read_lock(); - gl = search_bucket(hash, sdp, &name); - rcu_read_unlock(); + gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); + if (gl && !lockref_get_not_dead(&gl->gl_lockref)) + gl = NULL; *glp = gl; if (gl) @@ -739,14 +680,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } atomic_inc(&sdp->sd_glock_disposal); - gl->gl_sbd = sdp; + gl->gl_node.next = NULL; gl->gl_flags = 0; gl->gl_name = name; gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; - gl->gl_hash = hash; gl->gl_ops = glops; gl->gl_dstamp = ktime_set(0, 0); preempt_disable(); @@ -771,22 +711,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->writeback_index = 0; } - spin_lock_bucket(hash); - tmp = search_bucket(hash, sdp, &name); - if (tmp) { - spin_unlock_bucket(hash); - kfree(gl->gl_lksb.sb_lvbptr); - kmem_cache_free(cachep, gl); - atomic_dec(&sdp->sd_glock_disposal); - gl = tmp; - } else { - hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); - spin_unlock_bucket(hash); +again: + ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node, + ht_parms); + if (ret == 0) { + *glp = gl; + return 0; } - *glp = gl; + if (ret == -EEXIST) { + ret = 0; + tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); + if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { + if (++tries < 100) { + cond_resched(); + goto again; + } + tmp = NULL; + ret = -ENOMEM; + } + } else { + WARN_ON_ONCE(ret); + } + kfree(gl->gl_lksb.sb_lvbptr); + kmem_cache_free(cachep, gl); + atomic_dec(&sdp->sd_glock_disposal); + *glp = tmp; - return 0; + return ret; } /** @@ -928,7 +880,7 @@ __releases(&gl->gl_spin) __acquires(&gl->gl_spin) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; int try_futile = 0; @@ -1006,7 +958,7 @@ trap_recursive: int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error = 0; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) @@ -1313,7 +1265,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; spin_lock(&gl->gl_spin); gl->gl_reply = ret; @@ -1462,31 +1414,26 @@ static struct shrinker glock_shrinker = { * */ -static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, - unsigned int hash) +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; - struct hlist_bl_head *head = &gl_hash_table[hash]; - struct hlist_bl_node *pos; + struct rhash_head *pos, *next; + const struct bucket_table *tbl; + int i; rcu_read_lock(); - hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) - examiner(gl); + tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) { + if ((gl->gl_name.ln_sbd == sdp) && + lockref_get_not_dead(&gl->gl_lockref)) + examiner(gl); + } } rcu_read_unlock(); cond_resched(); } -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) -{ - unsigned x; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(examiner, sdp, x); -} - - /** * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw @@ -1569,7 +1516,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) int ret; ret = gfs2_truncatei_resume(ip); - gfs2_assert_withdraw(gl->gl_sbd, ret == 0); + gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); @@ -1733,17 +1680,17 @@ static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock *gl = iter_ptr; - seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n", + seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, - (long long)gl->gl_stats.stats[GFS2_LKS_SRTT], - (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], - (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], - (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], - (long long)gl->gl_stats.stats[GFS2_LKS_SIRT], - (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], - (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], - (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], + (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); return 0; } @@ -1776,11 +1723,10 @@ static const char *gfs2_stype[] = { static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) { - struct gfs2_glock_iter *gi = seq->private; - struct gfs2_sbd *sdp = gi->sdp; - unsigned index = gi->hash >> 3; - unsigned subindex = gi->hash & 0x07; - s64 value; + struct gfs2_sbd *sdp = seq->private; + loff_t pos = *(loff_t *)iter_ptr; + unsigned index = pos >> 3; + unsigned subindex = pos & 0x07; int i; if (index == 0 && subindex != 0) @@ -1791,12 +1737,12 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) for_each_possible_cpu(i) { const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); - if (index == 0) { - value = i; - } else { - value = lkstats->lkstats[index - 1].stats[subindex]; - } - seq_printf(seq, " %15lld", (long long)value); + + if (index == 0) + seq_printf(seq, " %15u", i); + else + seq_printf(seq, " %15llu", (unsigned long long)lkstats-> + lkstats[index - 1].stats[subindex]); } seq_putc(seq, '\n'); return 0; @@ -1804,20 +1750,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) int __init gfs2_glock_init(void) { - unsigned i; - for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { - INIT_HLIST_BL_HEAD(&gl_hash_table[i]); - } + int ret; + + ret = rhashtable_init(&gl_hash_table, &ht_parms); + if (ret < 0) + return ret; glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0); - if (!glock_workqueue) + if (!glock_workqueue) { + rhashtable_destroy(&gl_hash_table); return -ENOMEM; + } gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs2_delete_workqueue) { destroy_workqueue(glock_workqueue); + rhashtable_destroy(&gl_hash_table); return -ENOMEM; } @@ -1829,72 +1779,41 @@ int __init gfs2_glock_init(void) void gfs2_glock_exit(void) { unregister_shrinker(&glock_shrinker); + rhashtable_destroy(&gl_hash_table); destroy_workqueue(glock_workqueue); destroy_workqueue(gfs2_delete_workqueue); } -static inline struct gfs2_glock *glock_hash_chain(unsigned hash) +static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { - return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), - struct gfs2_glock, gl_list); -} - -static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) -{ - return hlist_bl_entry(rcu_dereference(gl->gl_list.next), - struct gfs2_glock, gl_list); -} - -static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) -{ - struct gfs2_glock *gl; - do { - gl = gi->gl; - if (gl) { - gi->gl = glock_hash_next(gl); - gi->nhash++; - } else { - if (gi->hash >= GFS2_GL_HASH_SIZE) { - rcu_read_unlock(); - return 1; - } - gi->gl = glock_hash_chain(gi->hash); - gi->nhash = 0; - } - while (gi->gl == NULL) { - gi->hash++; - if (gi->hash >= GFS2_GL_HASH_SIZE) { - rcu_read_unlock(); - return 1; - } - gi->gl = glock_hash_chain(gi->hash); - gi->nhash = 0; + gi->gl = rhashtable_walk_next(&gi->hti); + if (IS_ERR(gi->gl)) { + if (PTR_ERR(gi->gl) == -EAGAIN) + continue; + gi->gl = NULL; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || - __lockref_is_dead(&gi->gl->gl_lockref)); - - return 0; + } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) || + __lockref_is_dead(&gi->gl->gl_lockref))); } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; + int ret; if (gi->last_pos <= *pos) - n = gi->nhash + (*pos - gi->last_pos); - else - gi->hash = 0; + n = (*pos - gi->last_pos); - gi->nhash = 0; - rcu_read_lock(); + ret = rhashtable_walk_start(&gi->hti); + if (ret) + return NULL; do { - if (gfs2_glock_iter_next(gi)) - return NULL; - } while (n--); + gfs2_glock_iter_next(gi); + } while (gi->gl && n--); gi->last_pos = *pos; return gi->gl; @@ -1907,9 +1826,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, (*pos)++; gi->last_pos = *pos; - if (gfs2_glock_iter_next(gi)) - return NULL; - + gfs2_glock_iter_next(gi); return gi->gl; } @@ -1917,9 +1834,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock_iter *gi = seq->private; - if (gi->gl) - rcu_read_unlock(); gi->gl = NULL; + rhashtable_walk_stop(&gi->hti); } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) @@ -1930,26 +1846,19 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) { - struct gfs2_glock_iter *gi = seq->private; - - gi->hash = *pos; + preempt_disable(); if (*pos >= GFS2_NR_SBSTATS) return NULL; - preempt_disable(); - return SEQ_START_TOKEN; + return pos; } static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { - struct gfs2_glock_iter *gi = seq->private; (*pos)++; - gi->hash++; - if (gi->hash >= GFS2_NR_SBSTATS) { - preempt_enable(); + if (*pos >= GFS2_NR_SBSTATS) return NULL; - } - return SEQ_START_TOKEN; + return pos; } static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) @@ -1987,14 +1896,28 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) if (ret == 0) { struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + gi->last_pos = 0; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; + gi->gl = NULL; + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); } return ret; } +static int gfs2_glocks_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + + gi->gl = NULL; + rhashtable_walk_exit(&gi->hti); + return seq_release_private(inode, file); +} + static int gfs2_glstats_open(struct inode *inode, struct file *file) { int ret = seq_open_private(file, &gfs2_glstats_seq_ops, @@ -2003,21 +1926,22 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; + gi->last_pos = 0; seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; + gi->gl = NULL; + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); } return ret; } static int gfs2_sbstats_open(struct inode *inode, struct file *file) { - int ret = seq_open_private(file, &gfs2_sbstats_seq_ops, - sizeof(struct gfs2_glock_iter)); + int ret = seq_open(file, &gfs2_sbstats_seq_ops); if (ret == 0) { struct seq_file *seq = file->private_data; - struct gfs2_glock_iter *gi = seq->private; - gi->sdp = inode->i_private; + seq->private = inode->i_private; /* sdp */ } return ret; } @@ -2027,7 +1951,7 @@ static const struct file_operations gfs2_glocks_fops = { .open = gfs2_glocks_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = gfs2_glocks_release, }; static const struct file_operations gfs2_glstats_fops = { @@ -2035,7 +1959,7 @@ static const struct file_operations gfs2_glstats_fops = { .open = gfs2_glstats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = gfs2_glocks_release, }; static const struct file_operations gfs2_sbstats_fops = { @@ -2043,7 +1967,7 @@ static const struct file_operations gfs2_sbstats_fops = { .open = gfs2_sbstats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index fa3fa5e94553..1f6c9c3fe5cb 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -32,13 +32,15 @@ struct workqueue_struct *gfs2_freeze_wq; static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { - fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", + fs_err(gl->gl_name.ln_sbd, + "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page " + "state 0x%lx\n", bh, (unsigned long long)bh->b_blocknr, bh->b_state, bh->b_page->mapping, bh->b_page->flags); - fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", + fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n", gl->gl_name.ln_type, gl->gl_name.ln_number, gfs2_glock2aspace(gl)); - gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); + gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n"); } /** @@ -52,7 +54,7 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, unsigned int nr_revokes) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct list_head *head = &gl->gl_ail_list; struct gfs2_bufdata *bd, *tmp; struct buffer_head *bh; @@ -80,7 +82,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, static void gfs2_ail_empty_gl(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; memset(&tr, 0, sizeof(tr)); @@ -109,7 +111,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int revokes = atomic_read(&gl->gl_ail_count); unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); int ret; @@ -139,7 +141,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static void rgrp_go_sync(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd; int error; @@ -179,7 +181,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd = gl->gl_object; @@ -218,7 +220,7 @@ static void inode_go_sync(struct gfs2_glock *gl) GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH); + gfs2_log_flush(gl->gl_name.ln_sbd, gl, NORMAL_FLUSH); filemap_fdatawrite(metamapping); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; @@ -252,7 +254,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gl->gl_object; - gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); + gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); if (flags & DIO_METADATA) { struct address_space *mapping = gfs2_glock2aspace(gl); @@ -264,9 +266,9 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } } - if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { - gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH); - gl->gl_sbd->sd_rindex_uptodate = 0; + if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) { + gfs2_log_flush(gl->gl_name.ln_sbd, NULL, NORMAL_FLUSH); + gl->gl_name.ln_sbd->sd_rindex_uptodate = 0; } if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); @@ -281,7 +283,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) static int inode_go_demote_ok(const struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_holder *gh; if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) @@ -416,7 +418,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) static int inode_go_lock(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip = gl->gl_object; int error = 0; @@ -477,7 +479,7 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) static void freeze_go_sync(struct gfs2_glock *gl) { int error = 0; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { @@ -500,7 +502,7 @@ static void freeze_go_sync(struct gfs2_glock *gl) static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; struct gfs2_log_header_host head; @@ -545,7 +547,7 @@ static int freeze_go_demote_ok(const struct gfs2_glock *gl) static void iopen_go_callback(struct gfs2_glock *gl, bool remote) { struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) return; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a1ec7c20e498..121ed08d9d9f 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -22,6 +22,7 @@ #include <linux/ktime.h> #include <linux/percpu.h> #include <linux/lockref.h> +#include <linux/rhashtable.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -203,13 +204,15 @@ enum { }; struct lm_lockname { + struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; }; #define lm_name_equal(name1, name2) \ - (((name1)->ln_number == (name2)->ln_number) && \ - ((name1)->ln_type == (name2)->ln_type)) + (((name1)->ln_number == (name2)->ln_number) && \ + ((name1)->ln_type == (name2)->ln_type) && \ + ((name1)->ln_sbd == (name2)->ln_sbd)) struct gfs2_glock_operations { @@ -241,7 +244,7 @@ enum { }; struct gfs2_lkstats { - s64 stats[GFS2_NR_LKSTATS]; + u64 stats[GFS2_NR_LKSTATS]; }; enum { @@ -327,7 +330,6 @@ enum { struct gfs2_glock { struct hlist_bl_node gl_list; - struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; @@ -341,7 +343,6 @@ struct gfs2_glock { gl_req:2, /* State in last dlm request */ gl_reply:8; /* Last reply from the dlm */ - unsigned int gl_hash; unsigned long gl_demote_time; /* time of first demote request */ long gl_hold_time; struct list_head gl_holders; @@ -367,7 +368,7 @@ struct gfs2_glock { loff_t end; } gl_vm; }; - struct rcu_head gl_rcu; + struct rhash_head gl_node; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ @@ -835,7 +836,7 @@ static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) { - const struct gfs2_sbd *sdp = gl->gl_sbd; + const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; preempt_disable(); this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++; preempt_enable(); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 641383a9c1bb..284c1542783e 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -31,7 +31,7 @@ extern struct workqueue_struct *gfs2_control_wq; * * @delta is the difference between the current rtt sample and the * running average srtt. We add 1/8 of that to the srtt in order to - * update the current srtt estimate. The varience estimate is a bit + * update the current srtt estimate. The variance estimate is a bit * more complicated. We subtract the abs value of the @delta from * the current variance estimate and add 1/4 of that to the running * total. @@ -80,7 +80,7 @@ static inline void gfs2_update_reply_times(struct gfs2_glock *gl) preempt_disable(); rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp)); - lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats); gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */ preempt_enable(); @@ -108,7 +108,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) dstamp = gl->gl_dstamp; gl->gl_dstamp = ktime_get_real(); irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp)); - lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats); gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */ preempt_enable(); @@ -253,7 +253,7 @@ static void gfs2_reverse_hex(char *c, u64 value) static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; int req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; @@ -281,7 +281,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, static void gdlm_put_lock(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int lvb_needs_unlock = 0; int error; @@ -319,7 +319,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) static void gdlm_cancel(struct gfs2_glock *gl) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); } diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 92324ac58290..d5369a109781 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -70,7 +70,7 @@ static bool buffer_is_rgrp(const struct gfs2_bufdata *bd) static void maybe_release_space(struct gfs2_bufdata *bd) { struct gfs2_glock *gl = bd->bd_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_rgrpd *rgd = gl->gl_object; unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; @@ -578,7 +578,7 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, static void gfs2_meta_sync(struct gfs2_glock *gl) { struct address_space *mapping = gfs2_glock2aspace(gl); - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error; if (mapping == NULL) @@ -588,7 +588,7 @@ static void gfs2_meta_sync(struct gfs2_glock *gl) error = filemap_fdatawait(mapping); if (error) - gfs2_io_error(gl->gl_sbd); + gfs2_io_error(gl->gl_name.ln_sbd); } static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index b984a6e190bc..0e1d4be5865a 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -114,7 +114,7 @@ const struct address_space_operations gfs2_rgrp_aops = { struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) { struct address_space *mapping = gfs2_glock2aspace(gl); - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct page *page; struct buffer_head *bh; unsigned int shift; @@ -200,7 +200,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct buffer_head *bh; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { @@ -362,7 +362,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct buffer_head *first_bh, *bh; u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index ac5d8027d335..8ca161567a93 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -44,7 +44,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) { struct inode *inode = mapping->host; if (mapping->a_ops == &gfs2_meta_aops) - return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; + return (((struct gfs2_glock *)mapping) - 1)->gl_name.ln_sbd; else if (mapping->a_ops == &gfs2_rgrp_aops) return container_of(mapping, struct gfs2_sbd, sd_aspace); else diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 9b61f92fcfdf..3a31226531ea 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -119,7 +119,7 @@ static void gfs2_qd_dispose(struct list_head *list) while (!list_empty(list)) { qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); - sdp = qd->qd_gl->gl_sbd; + sdp = qd->qd_gl->gl_name.ln_sbd; list_del(&qd->qd_lru); @@ -302,7 +302,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, static void qd_hold(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); lockref_get(&qd->qd_lockref); } @@ -367,7 +367,7 @@ static void slot_put(struct gfs2_quota_data *qd) static int bh_get(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); unsigned int block, offset; struct buffer_head *bh; @@ -414,7 +414,7 @@ fail: static void bh_put(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; mutex_lock(&sdp->sd_quota_mutex); gfs2_assert(sdp, qd->qd_bh_count); @@ -486,7 +486,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) static void qd_unlock(struct gfs2_quota_data *qd) { - gfs2_assert_warn(qd->qd_gl->gl_sbd, + gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd, test_bit(QDF_LOCKED, &qd->qd_flags)); clear_bit(QDF_LOCKED, &qd->qd_flags); bh_put(qd); @@ -614,7 +614,7 @@ static int sort_qd(const void *a, const void *b) static void do_qc(struct gfs2_quota_data *qd, s64 change) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); struct gfs2_quota_change *qc = qd->qd_bh_qc; s64 x; @@ -831,7 +831,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { - struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks, ind_blocks; @@ -922,7 +922,7 @@ out: gfs2_glock_dq_uninit(&ghs[qx]); mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); - gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl, NORMAL_FLUSH); + gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH); return error; } @@ -954,7 +954,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder *q_gh) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; int error; @@ -1037,7 +1037,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) static int need_sync(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_tune *gt = &sdp->sd_tune; s64 value; unsigned int num, den; @@ -1125,7 +1125,7 @@ out: static int print_message(struct gfs2_quota_data *qd, char *type) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; fs_info(sdp, "quota %s for %s %u\n", type, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c6c62321dfd6..475985d14758 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1860,13 +1860,13 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) { const struct gfs2_glock *gl = rgd->rd_gl; - const struct gfs2_sbd *sdp = gl->gl_sbd; + const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_lkstats *st; - s64 r_dcount, l_dcount; - s64 l_srttb, a_srttb = 0; + u64 r_dcount, l_dcount; + u64 l_srttb, a_srttb = 0; s64 srttb_diff; - s64 sqr_diff; - s64 var; + u64 sqr_diff; + u64 var; int cpu, nonzero = 0; preempt_disable(); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 20c007d747ab..49ac55da4e33 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -104,7 +104,7 @@ TRACE_EVENT(gfs2_glock_state_change, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gl->gl_name.ln_number; __entry->gltype = gl->gl_name.ln_type; __entry->cur_state = glock_trace_state(gl->gl_state); @@ -140,7 +140,7 @@ TRACE_EVENT(gfs2_glock_put, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->gltype = gl->gl_name.ln_type; __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); @@ -174,7 +174,7 @@ TRACE_EVENT(gfs2_demote_rq, ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->gltype = gl->gl_name.ln_type; __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); @@ -209,7 +209,7 @@ TRACE_EVENT(gfs2_promote, ), TP_fast_assign( - __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gh->gh_gl->gl_name.ln_number; __entry->gltype = gh->gh_gl->gl_name.ln_type; __entry->first = first; @@ -239,7 +239,7 @@ TRACE_EVENT(gfs2_glock_queue, ), TP_fast_assign( - __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gh->gh_gl->gl_name.ln_number; __entry->gltype = gh->gh_gl->gl_name.ln_type; __entry->queue = queue; @@ -267,18 +267,18 @@ TRACE_EVENT(gfs2_glock_lock_time, __field( int, status ) __field( char, flags ) __field( s64, tdiff ) - __field( s64, srtt ) - __field( s64, srttvar ) - __field( s64, srttb ) - __field( s64, srttvarb ) - __field( s64, sirt ) - __field( s64, sirtvar ) - __field( s64, dcount ) - __field( s64, qcount ) + __field( u64, srtt ) + __field( u64, srttvar ) + __field( u64, srttb ) + __field( u64, srttvarb ) + __field( u64, sirt ) + __field( u64, sirtvar ) + __field( u64, dcount ) + __field( u64, qcount ) ), TP_fast_assign( - __entry->dev = gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gl->gl_name.ln_number; __entry->gltype = gl->gl_name.ln_type; __entry->status = gl->gl_lksb.sb_status; @@ -333,7 +333,7 @@ TRACE_EVENT(gfs2_pin, ), TP_fast_assign( - __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = bd->bd_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->pin = pin; __entry->len = bd->bd_bh->b_size; __entry->block = bd->bd_bh->b_blocknr; @@ -449,7 +449,7 @@ TRACE_EVENT(gfs2_bmap, ), TP_fast_assign( - __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->lblock = lblock; __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0; __entry->inum = ip->i_no_addr; @@ -489,7 +489,7 @@ TRACE_EVENT(gfs2_block_alloc, ), TP_fast_assign( - __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = rgd->rd_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->start = block; __entry->inum = ip->i_no_addr; __entry->len = len; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 88bff2430669..b95d0d625f32 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -158,7 +158,7 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) { struct gfs2_trans *tr = current->journal_info; - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); struct gfs2_bufdata *bd; @@ -224,7 +224,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_bufdata *bd; lock_buffer(bh); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 2714ef835bdd..be806ead7f4d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -113,7 +113,8 @@ out: return status; } -static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) +static int nfs_delegation_claim_opens(struct inode *inode, + const nfs4_stateid *stateid, fmode_t type) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; @@ -140,7 +141,7 @@ again: /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - err = nfs4_open_delegation_recall(ctx, state, stateid); + err = nfs4_open_delegation_recall(ctx, state, stateid, type); if (!err) err = nfs_delegation_claim_locks(ctx, state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) @@ -411,7 +412,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation do { if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) break; - err = nfs_delegation_claim_opens(inode, &delegation->stateid); + err = nfs_delegation_claim_opens(inode, &delegation->stateid, + delegation->type); if (!issync || err != -EAGAIN) break; /* diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index a44829173e57..333063e032f0 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 38678d9a5cc4..4b1d08f56aba 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -166,8 +166,11 @@ nfs_direct_select_verf(struct nfs_direct_req *dreq, struct nfs_writeverf *verfp = &dreq->verf; #ifdef CONFIG_NFS_V4_1 - if (ds_clp) { - /* pNFS is in use, use the DS verf */ + /* + * pNFS is in use, use the DS verf except commit_through_mds is set + * for layout segment where nbuckets is zero. + */ + if (ds_clp && dreq->ds_cinfo.nbuckets > 0) { if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; else diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index b34f2e228601..02ec07973bc4 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -629,23 +629,18 @@ out_put: goto out; } -static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) { int i; - for (i = 0; i < fl->num_fh; i++) { - if (!fl->fh_array[i]) - break; - kfree(fl->fh_array[i]); + if (fl->fh_array) { + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); } - kfree(fl->fh_array); - fl->fh_array = NULL; -} - -static void -_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) -{ - filelayout_free_fh_array(fl); kfree(fl); } @@ -716,21 +711,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, /* Do we want to use a mempool here? */ fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) - goto out_err_free; + goto out_err; p = xdr_inline_decode(&stream, 4); if (unlikely(!p)) - goto out_err_free; + goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); - goto out_err_free; + goto out_err; } p = xdr_inline_decode(&stream, fl->fh_array[i]->size); if (unlikely(!p)) - goto out_err_free; + goto out_err; memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); dprintk("DEBUG: %s: fh len %d\n", __func__, fl->fh_array[i]->size); @@ -739,8 +734,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, __free_page(scratch); return 0; -out_err_free: - filelayout_free_fh_array(fl); out_err: __free_page(scratch); return -EIO; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index d731bbf974aa..0f020e4d8421 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -175,10 +175,12 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct nfs_server *server = NFS_SERVER(file_inode(filep)); struct nfs4_exception exception = { }; - int err; + loff_t err; do { err = _nfs42_proc_llseek(filep, offset, whence); + if (err >= 0) + break; if (err == -ENOTSUPP) return -EOPNOTSUPP; err = nfs4_handle_exception(server, err, &exception); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 693b903b48bd..f93b9cdb4934 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1127,6 +1127,21 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } +static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, + fmode_t fmode) +{ + switch(fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + return state->n_rdwr != 0; + case FMODE_WRITE: + return state->n_wronly != 0; + case FMODE_READ: + return state->n_rdonly != 0; + } + WARN_ON_ONCE(1); + return false; +} + static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) { int ret = 0; @@ -1571,17 +1586,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context return opendata; } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, + fmode_t fmode) { struct nfs4_state *newstate; int ret; - if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || - opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && - (opendata->o_arg.u.delegation_type & fmode) != fmode) - /* This mode can't have been delegated, so we must have - * a valid open_stateid to cover it - not need to reclaim. - */ + if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) return 0; opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; @@ -1597,14 +1608,14 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod newstate = nfs4_opendata_to_nfs4_state(opendata); if (IS_ERR(newstate)) return PTR_ERR(newstate); + if (newstate != opendata->state) + ret = -ESTALE; nfs4_close_state(newstate, fmode); - *res = newstate; - return 0; + return ret; } static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) { - struct nfs4_state *newstate; int ret; /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ @@ -1615,27 +1626,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * clear_bit(NFS_DELEGATED_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); - if (state->n_rdwr != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } - if (state->n_wronly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } - if (state->n_rdonly != 0) { - ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); + if (ret != 0) + return ret; + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE); + if (ret != 0) + return ret; + ret = nfs4_open_recover_helper(opendata, FMODE_READ); + if (ret != 0) + return ret; /* * We may have performed cached opens for all three recoveries. * Check if we need to update the current stateid. @@ -1759,18 +1758,32 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct return err; } -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, + struct nfs4_state *state, const nfs4_stateid *stateid, + fmode_t type) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_opendata *opendata; - int err; + int err = 0; opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_DELEG_CUR_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - err = nfs4_open_recover(opendata, state); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + switch (type & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + case FMODE_WRITE: + err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); + if (err) + break; + err = nfs4_open_recover_helper(opendata, FMODE_WRITE); + if (err) + break; + case FMODE_READ: + err = nfs4_open_recover_helper(opendata, FMODE_READ); + } nfs4_opendata_put(opendata); return nfs4_handle_delegation_recall_error(server, state, stateid, err); } @@ -2645,6 +2658,15 @@ out: return err; } +static bool +nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task) +{ + if (inode == NULL || !nfs_have_layout(inode)) + return false; + + return pnfs_wait_on_layoutreturn(inode, task); +} + struct nfs4_closedata { struct inode *inode; struct nfs4_state *state; @@ -2763,6 +2785,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_no_action; } + if (nfs4_wait_on_layoutreturn(inode, task)) { + nfs_release_seqid(calldata->arg.seqid); + goto out_wait; + } + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc) @@ -5308,6 +5335,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; + if (nfs4_wait_on_layoutreturn(d_data->inode, task)) + return; + if (d_data->roc) pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier); @@ -7800,39 +7830,46 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", __func__, delay); rpc_delay(task, delay); - task->tk_status = 0; - rpc_restart_call_prepare(task); - goto out; /* Do not call nfs4_async_handle_error() */ + /* Do not call nfs4_async_handle_error() */ + goto out_restart; } break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: spin_lock(&inode->i_lock); - lo = NFS_I(inode)->layout; - if (!lo || list_empty(&lo->plh_segs)) { + if (nfs4_stateid_match(&lgp->args.stateid, + &lgp->args.ctx->state->stateid)) { spin_unlock(&inode->i_lock); /* If the open stateid was bad, then recover it. */ state = lgp->args.ctx->state; - } else { + break; + } + lo = NFS_I(inode)->layout; + if (lo && nfs4_stateid_match(&lgp->args.stateid, + &lo->plh_stateid)) { LIST_HEAD(head); /* * Mark the bad layout state as invalid, then retry * with the current stateid. */ + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); - - task->tk_status = 0; - rpc_restart_call_prepare(task); - } + } else + spin_unlock(&inode->i_lock); + goto out_restart; } if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) - rpc_restart_call_prepare(task); + goto out_restart; out: dprintk("<-- %s\n", __func__); return; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; out_overflow: task->tk_status = -EOVERFLOW; goto out; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index da73bc443238..5db324635e92 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1481,7 +1481,7 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); - clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); spin_lock(&sp->so_lock); goto restart; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7c5718ba625e..fe3ddd20ff89 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * for it without upsetting the slab allocator. */ if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * - sizeof(struct page) > PAGE_SIZE) + sizeof(struct page *) > PAGE_SIZE) return 0; return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ba1246433794..8abe27165ad0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1104,20 +1104,15 @@ bool pnfs_roc(struct inode *ino) mark_lseg_invalid(lseg, &tmp_list); found = true; } - /* pnfs_prepare_layoutreturn() grabs lo ref and it will be put - * in pnfs_roc_release(). We don't really send a layoutreturn but - * still want others to view us like we are sending one! - * - * If pnfs_prepare_layoutreturn() fails, it means someone else is doing - * LAYOUTRETURN, so we proceed like there are no layouts to return. - * - * ROC in three conditions: + /* ROC in two conditions: * 1. there are ROC lsegs * 2. we don't send layoutreturn - * 3. no others are sending layoutreturn */ - if (found && !layoutreturn && pnfs_prepare_layoutreturn(lo)) + if (found && !layoutreturn) { + /* lo ref dropped in pnfs_roc_release() */ + pnfs_get_layout_hdr(lo); roc = true; + } out_noroc: spin_unlock(&ino->i_lock); @@ -1172,6 +1167,26 @@ void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) spin_unlock(&ino->i_lock); } +bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *lo; + bool sleep = false; + + /* we might not have grabbed lo reference. so need to check under + * i_lock */ + spin_lock(&ino->i_lock); + lo = nfsi->layout; + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + sleep = true; + spin_unlock(&ino->i_lock); + + if (sleep) + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + + return sleep; +} + /* * Compare two layout segments for sorting into layout cache. * We want to preferentially return RW over RO layouts, so ensure those diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 78c9351ff117..d1990e90e7a0 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -270,6 +270,7 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier); +bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); @@ -639,6 +640,12 @@ pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) { } +static inline bool +pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) +{ + return false; +} + static inline void set_pnfs_layoutdriver(struct nfs_server *s, const struct nfs_fh *mntfh, u32 id) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ae0ff7a11b40..01b8cc8e8cfc 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; /* read path should never have more than one mirror */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 388f48079c43..72624dc4a623 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1351,6 +1351,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *mirror; + if (pgio->pg_ops && pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + pgio->pg_ops = &nfs_pgio_rw_ops; nfs_pageio_stop_mirroring(pgio); diff --git a/fs/nsfs.c b/fs/nsfs.c index e4905fbf3396..8f20d6016e20 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -142,7 +142,8 @@ static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = dentry->d_fsdata; - return seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); + seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); + return 0; } static const struct super_operations nsfs_ops = { diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 46b8b2bbc95a..ee5aa4daaea0 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, int found, ret; int set_maybe; int dispatch_assert = 0; + int dispatched = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1658,15 +1659,18 @@ send_response: mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); } - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return response; } @@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); item->u.am.lockres = res; /* already have a ref */ /* can optionally ignore node numbers higher than this node */ diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index d0e436dc6437..3d90ad7ff91f 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, unsigned int hash; int master = DLM_LOCK_RES_OWNER_UNKNOWN; u32 flags = DLM_ASSERT_MASTER_REQUERY; + int dispatched = 0; if (!dlm_grab(dlm)) { /* since the domain has gone away on this @@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, dlm_put(dlm); /* sender will take care of this and retry */ return ret; - } else + } else { + dispatched = 1; __dlm_lockres_grab_inflight_worker(dlm, res); + } spin_unlock(&res->spinlock); } else { /* put.. incase we are not the master */ @@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, } spin_unlock(&dlm->spinlock); - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return master; } @@ -1776,7 +1780,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_migratable_lockres *mres) { struct dlm_migratable_lock *ml; - struct list_head *queue; + struct list_head *queue, *iter; struct list_head *tmpq = NULL; struct dlm_lock *newlock = NULL; struct dlm_lockstatus *lksb = NULL; @@ -1821,7 +1825,9 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each_entry(lock, tmpq, list) { + list_for_each(iter, tmpq) { + lock = list_entry(iter, + struct dlm_lock, list); if (lock->ml.cookie == ml->cookie) break; lock = NULL; diff --git a/fs/seq_file.c b/fs/seq_file.c index 263b125dbcf4..225586e141ca 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -372,16 +372,16 @@ EXPORT_SYMBOL(seq_release); * @esc: set of characters that need escaping * * Puts string into buffer, replacing each occurrence of character from - * @esc with usual octal escape. Returns 0 in case of success, -1 - in - * case of overflow. + * @esc with usual octal escape. + * Use seq_has_overflowed() to check for errors. */ -int seq_escape(struct seq_file *m, const char *s, const char *esc) +void seq_escape(struct seq_file *m, const char *s, const char *esc) { char *end = m->buf + m->size; - char *p; + char *p; char c; - for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { + for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { if (!strchr(esc, c)) { *p++ = c; continue; @@ -394,14 +394,13 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc) continue; } seq_set_overflow(m); - return -1; - } + return; + } m->count = p - m->buf; - return 0; } EXPORT_SYMBOL(seq_escape); -int seq_vprintf(struct seq_file *m, const char *f, va_list args) +void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; @@ -409,24 +408,20 @@ int seq_vprintf(struct seq_file *m, const char *f, va_list args) len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); if (m->count + len < m->size) { m->count += len; - return 0; + return; } } seq_set_overflow(m); - return -1; } EXPORT_SYMBOL(seq_vprintf); -int seq_printf(struct seq_file *m, const char *f, ...) +void seq_printf(struct seq_file *m, const char *f, ...) { - int ret; va_list args; va_start(args, f); - ret = seq_vprintf(m, f, args); + seq_vprintf(m, f, args); va_end(args); - - return ret; } EXPORT_SYMBOL(seq_printf); @@ -664,26 +659,25 @@ int seq_open_private(struct file *filp, const struct seq_operations *ops, } EXPORT_SYMBOL(seq_open_private); -int seq_putc(struct seq_file *m, char c) +void seq_putc(struct seq_file *m, char c) { - if (m->count < m->size) { - m->buf[m->count++] = c; - return 0; - } - return -1; + if (m->count >= m->size) + return; + + m->buf[m->count++] = c; } EXPORT_SYMBOL(seq_putc); -int seq_puts(struct seq_file *m, const char *s) +void seq_puts(struct seq_file *m, const char *s) { int len = strlen(s); - if (m->count + len < m->size) { - memcpy(m->buf + m->count, s, len); - m->count += len; - return 0; + + if (m->count + len >= m->size) { + seq_set_overflow(m); + return; } - seq_set_overflow(m); - return -1; + memcpy(m->buf + m->count, s, len); + m->count += len; } EXPORT_SYMBOL(seq_puts); @@ -694,8 +688,8 @@ EXPORT_SYMBOL(seq_puts); * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ -int seq_put_decimal_ull(struct seq_file *m, char delimiter, - unsigned long long num) +void seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num) { int len; @@ -707,35 +701,33 @@ int seq_put_decimal_ull(struct seq_file *m, char delimiter, if (num < 10) { m->buf[m->count++] = num + '0'; - return 0; + return; } len = num_to_str(m->buf + m->count, m->size - m->count, num); if (!len) goto overflow; m->count += len; - return 0; + return; + overflow: seq_set_overflow(m); - return -1; } EXPORT_SYMBOL(seq_put_decimal_ull); -int seq_put_decimal_ll(struct seq_file *m, char delimiter, - long long num) +void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num) { if (num < 0) { if (m->count + 3 >= m->size) { seq_set_overflow(m); - return -1; + return; } if (delimiter) m->buf[m->count++] = delimiter; num = -num; delimiter = '-'; } - return seq_put_decimal_ull(m, delimiter, num); - + seq_put_decimal_ull(m, delimiter, num); } EXPORT_SYMBOL(seq_put_decimal_ll); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 634e676072cb..50311703135b 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -467,8 +467,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * the fault_*wqh. */ spin_lock(&ctx->fault_pending_wqh.lock); - __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, &range); - __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, &range); + __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); + __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); spin_unlock(&ctx->fault_pending_wqh.lock); wake_up_poll(&ctx->fd_wqh, POLLHUP); @@ -650,10 +650,10 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx, spin_lock(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) - __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, + __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) - __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, range); + __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range); spin_unlock(&ctx->fault_pending_wqh.lock); } @@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags) file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); - if (IS_ERR(file)) + if (IS_ERR(file)) { + mmput(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); + } out: return file; } |