diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 15 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 3 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 11 | ||||
-rw-r--r-- | fs/btrfs/file.c | 16 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 62 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 13 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 179 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 21 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 4 | ||||
-rw-r--r-- | fs/ceph/cache.c | 2 | ||||
-rw-r--r-- | fs/ceph/caps.c | 76 | ||||
-rw-r--r-- | fs/ceph/file.c | 87 | ||||
-rw-r--r-- | fs/ceph/inode.c | 1 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 57 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 16 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 8 | ||||
-rw-r--r-- | fs/cifs/connect.c | 78 | ||||
-rw-r--r-- | fs/cifs/ioctl.c | 11 | ||||
-rw-r--r-- | fs/cifs/smb2file.c | 19 | ||||
-rw-r--r-- | fs/cifs/smb2ops.c | 10 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 123 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.h | 45 | ||||
-rw-r--r-- | fs/cifs/smbfsctl.h | 2 | ||||
-rw-r--r-- | fs/dax.c | 4 |
26 files changed, 626 insertions, 241 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 0a793c7930eb..bb0dfb1c7af1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,12 +50,21 @@ struct block_device *I_BDEV(struct inode *inode) } EXPORT_SYMBOL(I_BDEV); -static void bdev_write_inode(struct inode *inode) +static void bdev_write_inode(struct block_device *bdev) { + struct inode *inode = bdev->bd_inode; + int ret; + spin_lock(&inode->i_lock); while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); - WARN_ON_ONCE(write_inode_now(inode, true)); + ret = write_inode_now(inode, true); + if (ret) { + char name[BDEVNAME_SIZE]; + pr_warn_ratelimited("VFS: Dirty inode writeback failed " + "for block device %s (err=%d).\n", + bdevname(bdev, name), ret); + } spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); @@ -1504,7 +1513,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) * ->release can cause the queue to disappear, so flush all * dirty data before. */ - bdev_write_inode(bdev->bd_inode); + bdev_write_inode(bdev); } if (bdev->bd_contains == bdev) { if (disk->fops->release) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 640598c0d0e7..974be09e7556 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the qgroup rescan worker to stop */ + btrfs_qgroup_wait_for_completion(fs_info); + /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al., set sem back to initial state */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99a8e57da8a1..acf3ed11cfb6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10279,22 +10279,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) block_group = list_first_entry(&fs_info->unused_bgs, struct btrfs_block_group_cache, bg_list); - space_info = block_group->space_info; list_del_init(&block_group->bg_list); + + space_info = block_group->space_info; + if (ret || btrfs_mixed_space_info(space_info)) { btrfs_put_block_group(block_group); continue; } spin_unlock(&fs_info->unused_bgs_lock); - mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + mutex_lock(&fs_info->delete_unused_bgs_mutex); /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); if (block_group->reserved || btrfs_block_group_used(&block_group->item) || - block_group->ro) { + block_group->ro || + list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do @@ -10410,7 +10413,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: - mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6bd5ce9d75f0..977e715f0bf2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ next_slot: } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ next_slot: btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0e4f2bfcc37d..994490d5fa64 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1304,8 +1304,14 @@ next_slot: num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; @@ -7503,6 +7509,28 @@ struct btrfs_dio_data { u64 reserve; }; +static void adjust_dio_outstanding_extents(struct inode *inode, + struct btrfs_dio_data *dio_data, + const u64 len) +{ + unsigned num_extents; + + num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (dio_data->outstanding_extents) { + dio_data->outstanding_extents -= num_extents; + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents += num_extents; + spin_unlock(&BTRFS_I(inode)->lock); + } +} + static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -7538,8 +7566,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. */ - if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) - return -ENOTBLK; + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, + create)) { + ret = -ENOTBLK; + goto err; + } em = btrfs_get_extent(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { @@ -7657,19 +7688,7 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - /* - * If we have an outstanding_extents count still set then we're - * within our reservation, otherwise we need to adjust our inode - * counter appropriately. - */ - if (dio_data->outstanding_extents) { - (dio_data->outstanding_extents)--; - } else { - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->lock); - } - + adjust_dio_outstanding_extents(inode, dio_data, len); btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; @@ -7696,8 +7715,17 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); +err: if (dio_data) current->journal_info = dio_data; + /* + * Compensate the delalloc release we do in btrfs_direct_IO() when we + * write less data then expected, so that we don't underflow our inode's + * outstanding extents counter. + */ + if (create && dio_data) + adjust_dio_outstanding_extents(inode, dio_data, len); + return ret; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 46476c226395..93e12c18ffd7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2198,7 +2198,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, int slot; int ret; - path->leave_spinning = 1; mutex_lock(&fs_info->qgroup_rescan_lock); ret = btrfs_search_slot_for_read(fs_info->extent_root, &fs_info->qgroup_rescan_progress, @@ -2286,7 +2285,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) goto out; err = 0; - while (!err) { + while (!err && !btrfs_fs_closing(fs_info)) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2307,7 +2306,8 @@ out: btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { @@ -2336,7 +2336,9 @@ out: } btrfs_end_transaction(trans, fs_info->quota_root); - if (err >= 0) { + if (btrfs_fs_closing(fs_info)) { + btrfs_info(fs_info, "qgroup scan paused"); + } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", err > 0 ? " (inconsistency flag cleared)" : ""); } else { @@ -2384,12 +2386,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_progress, 0, sizeof(fs_info->qgroup_rescan_progress)); fs_info->qgroup_rescan_progress.objectid = progress_objectid; + init_completion(&fs_info->qgroup_rescan_completion); spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); - init_completion(&fs_info->qgroup_rescan_completion); - memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 550de89a8661..2907a77fb1f6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -248,14 +248,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct scrub_block *sblocks_for_recheck); static void scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size, int retry_failed_mirror); -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size); + struct scrub_block *sblock, + int retry_failed_mirror); +static void scrub_recheck_block_checksum(struct scrub_block *sblock); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good); static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, @@ -889,11 +884,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) struct btrfs_fs_info *fs_info; u64 length; u64 logical; - u64 generation; unsigned int failed_mirror_index; unsigned int is_metadata; unsigned int have_csum; - u8 *csum; struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */ struct scrub_block *sblock_bad; int ret; @@ -918,13 +911,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) } length = sblock_to_check->page_count * PAGE_SIZE; logical = sblock_to_check->pagev[0]->logical; - generation = sblock_to_check->pagev[0]->generation; BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; is_metadata = !(sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); have_csum = sblock_to_check->pagev[0]->have_csum; - csum = sblock_to_check->pagev[0]->csum; dev = sblock_to_check->pagev[0]->dev; if (sctx->is_dev_replace && !is_metadata && !have_csum) { @@ -987,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock_bad = sblocks_for_recheck + failed_mirror_index; /* build and submit the bios for the failed mirror, check checksums */ - scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, - csum, generation, sctx->csum_size, 1); + scrub_recheck_block(fs_info, sblock_bad, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) { @@ -1101,9 +1091,7 @@ nodatasum_case: sblock_other = sblocks_for_recheck + mirror_index; /* build and submit the bios, check checksums */ - scrub_recheck_block(fs_info, sblock_other, is_metadata, - have_csum, csum, generation, - sctx->csum_size, 0); + scrub_recheck_block(fs_info, sblock_other, 0); if (!sblock_other->header_error && !sblock_other->checksum_error && @@ -1215,9 +1203,7 @@ nodatasum_case: * is verified, but most likely the data comes out * of the page cache. */ - scrub_recheck_block(fs_info, sblock_bad, - is_metadata, have_csum, csum, - generation, sctx->csum_size, 1); + scrub_recheck_block(fs_info, sblock_bad, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) @@ -1318,6 +1304,9 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; u64 length = original_sblock->page_count * PAGE_SIZE; u64 logical = original_sblock->pagev[0]->logical; + u64 generation = original_sblock->pagev[0]->generation; + u64 flags = original_sblock->pagev[0]->flags; + u64 have_csum = original_sblock->pagev[0]->have_csum; struct scrub_recover *recover; struct btrfs_bio *bbio; u64 sublen; @@ -1372,6 +1361,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, sblock = sblocks_for_recheck + mirror_index; sblock->sctx = sctx; + page = kzalloc(sizeof(*page), GFP_NOFS); if (!page) { leave_nomem: @@ -1383,7 +1373,15 @@ leave_nomem: } scrub_page_get(page); sblock->pagev[page_index] = page; + page->sblock = sblock; + page->flags = flags; + page->generation = generation; page->logical = logical; + page->have_csum = have_csum; + if (have_csum) + memcpy(page->csum, + original_sblock->pagev[0]->csum, + sctx->csum_size); scrub_stripe_index_and_offset(logical, bbio->map_type, @@ -1474,15 +1472,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, * the pages that are errored in the just handled mirror can be repaired. */ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size, int retry_failed_mirror) + struct scrub_block *sblock, + int retry_failed_mirror) { int page_num; sblock->no_io_error_seen = 1; - sblock->header_error = 0; - sblock->checksum_error = 0; for (page_num = 0; page_num < sblock->page_count; page_num++) { struct bio *bio; @@ -1518,9 +1513,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } if (sblock->no_io_error_seen) - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - csum_size); + scrub_recheck_block_checksum(sblock); return; } @@ -1535,61 +1528,16 @@ static inline int scrub_check_fsid(u8 fsid[], return !ret; } -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size) +static void scrub_recheck_block_checksum(struct scrub_block *sblock) { - int page_num; - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u32 crc = ~(u32)0; - void *mapped_buffer; - - WARN_ON(!sblock->pagev[0]->page); - if (is_metadata) { - struct btrfs_header *h; - - mapped_buffer = kmap_atomic(sblock->pagev[0]->page); - h = (struct btrfs_header *)mapped_buffer; - - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || - !scrub_check_fsid(h->fsid, sblock->pagev[0]) || - memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, - BTRFS_UUID_SIZE)) { - sblock->header_error = 1; - } else if (generation != btrfs_stack_header_generation(h)) { - sblock->header_error = 1; - sblock->generation_error = 1; - } - csum = h->csum; - } else { - if (!have_csum) - return; - - mapped_buffer = kmap_atomic(sblock->pagev[0]->page); - } - - for (page_num = 0;;) { - if (page_num == 0 && is_metadata) - crc = btrfs_csum_data( - ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE, - crc, PAGE_SIZE - BTRFS_CSUM_SIZE); - else - crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE); - - kunmap_atomic(mapped_buffer); - page_num++; - if (page_num >= sblock->page_count) - break; - WARN_ON(!sblock->pagev[page_num]->page); - - mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page); - } + sblock->header_error = 0; + sblock->checksum_error = 0; + sblock->generation_error = 0; - btrfs_csum_final(crc, calculated_csum); - if (memcmp(calculated_csum, csum, csum_size)) - sblock->checksum_error = 1; + if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA) + scrub_checksum_data(sblock); + else + scrub_checksum_tree_block(sblock); } static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, @@ -1833,6 +1781,18 @@ static int scrub_checksum(struct scrub_block *sblock) u64 flags; int ret; + /* + * No need to initialize these stats currently, + * because this function only use return value + * instead of these stats value. + * + * Todo: + * always use stats + */ + sblock->header_error = 0; + sblock->generation_error = 0; + sblock->checksum_error = 0; + WARN_ON(sblock->page_count < 1); flags = sblock->pagev[0]->flags; ret = 0; @@ -1858,7 +1818,6 @@ static int scrub_checksum_data(struct scrub_block *sblock) struct page *page; void *buffer; u32 crc = ~(u32)0; - int fail = 0; u64 len; int index; @@ -1889,9 +1848,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) btrfs_csum_final(crc, csum); if (memcmp(csum, on_disk_csum, sctx->csum_size)) - fail = 1; + sblock->checksum_error = 1; - return fail; + return sblock->checksum_error; } static int scrub_checksum_tree_block(struct scrub_block *sblock) @@ -1907,8 +1866,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) u64 mapped_size; void *p; u32 crc = ~(u32)0; - int fail = 0; - int crc_fail = 0; u64 len; int index; @@ -1923,19 +1880,20 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) * a) don't have an extent buffer and * b) the page is already kmapped */ - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) - ++fail; + sblock->header_error = 1; - if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) - ++fail; + if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) { + sblock->header_error = 1; + sblock->generation_error = 1; + } if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) - ++fail; + sblock->header_error = 1; if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) - ++fail; + sblock->header_error = 1; len = sctx->nodesize - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; @@ -1960,9 +1918,9 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) btrfs_csum_final(crc, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) - ++crc_fail; + sblock->checksum_error = 1; - return fail || crc_fail; + return sblock->header_error || sblock->checksum_error; } static int scrub_checksum_super(struct scrub_block *sblock) @@ -2176,39 +2134,27 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) { struct scrub_block *sblock = container_of(work, struct scrub_block, work); struct scrub_ctx *sctx = sblock->sctx; - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - unsigned int is_metadata; - unsigned int have_csum; - u8 *csum; - u64 generation; u64 logical; struct btrfs_device *dev; - is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); - have_csum = sblock->pagev[0]->have_csum; - csum = sblock->pagev[0]->csum; - generation = sblock->pagev[0]->generation; logical = sblock->pagev[0]->logical; dev = sblock->pagev[0]->dev; - if (sblock->no_io_error_seen) { - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - sctx->csum_size); - } + if (sblock->no_io_error_seen) + scrub_recheck_block_checksum(sblock); if (!sblock->no_io_error_seen) { spin_lock(&sctx->stat_lock); sctx->stat.read_errors++; spin_unlock(&sctx->stat_lock); - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, "IO error rebuilding logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else if (sblock->header_error || sblock->checksum_error) { spin_lock(&sctx->stat_lock); sctx->stat.uncorrectable_errors++; spin_unlock(&sctx->stat_lock); - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, "failed to rebuild valid logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else { @@ -2500,8 +2446,7 @@ static void scrub_block_complete(struct scrub_block *sblock) } } -static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, - u8 *csum) +static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) { struct btrfs_ordered_sum *sum = NULL; unsigned long index; @@ -2565,7 +2510,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, if (flags & BTRFS_EXTENT_FLAG_DATA) { /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); + have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) ++sctx->stat.no_csum; if (sctx->is_dev_replace && !have_csum) { @@ -2703,7 +2648,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, if (flags & BTRFS_EXTENT_FLAG_DATA) { /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); + have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) goto skip; } @@ -3012,6 +2957,9 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, logic_start + map->stripe_len)) { btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", key.objectid, logic_start); + spin_lock(&sctx->stat_lock); + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); goto next; } again: @@ -3361,6 +3309,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, "scrub: tree block %llu spanning " "stripes, ignored. logical=%llu", key.objectid, logical); + spin_lock(&sctx->stat_lock); + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); goto next; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9b2dafa5ba59..a6df8fdc1312 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3400,6 +3400,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) u32 count_data = 0; u32 count_meta = 0; u32 count_sys = 0; + int chunk_reserved = 0; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -3501,6 +3502,7 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); + btrfs_release_path(path); if (!ret) { mutex_unlock(&fs_info->delete_unused_bgs_mutex); @@ -3537,6 +3539,25 @@ again: goto loop; } + if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) { + trans = btrfs_start_transaction(chunk_root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + ret = PTR_ERR(trans); + goto error; + } + + ret = btrfs_force_chunk_alloc(trans, chunk_root, + BTRFS_BLOCK_GROUP_DATA); + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + goto error; + } + + btrfs_end_transaction(trans, chunk_root); + chunk_reserved = 1; + } + ret = btrfs_relocate_chunk(chunk_root, found_key.offset); mutex_unlock(&fs_info->delete_unused_bgs_mutex); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e1c1..1fcd7b6e7564 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 834f9f3723fb..a4766ded1ba7 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -88,7 +88,7 @@ static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data, const struct ceph_inode_info* ci = cookie_netfs_data; uint16_t klen; - /* use ceph virtual inode (id + snaphot) */ + /* use ceph virtual inode (id + snapshot) */ klen = sizeof(ci->i_vino); if (klen > maxbuf) return 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 27b566874bc1..c69e1253b47b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1655,9 +1655,8 @@ retry_locked: !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ inode->i_data.nrpages && /* have cached pages */ - (file_wanted == 0 || /* no open files */ - (revoking & (CEPH_CAP_FILE_CACHE| - CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ + (revoking & (CEPH_CAP_FILE_CACHE| + CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */ !tried_invalidate) { dout("check_caps trying to invalidate on %p\n", inode); if (try_nonblocking_invalidate(inode) < 0) { @@ -1971,49 +1970,46 @@ out: } /* - * wait for any uncommitted directory operations to commit. + * wait for any unsafe requests to complete. */ -static int unsafe_dirop_wait(struct inode *inode) +static int unsafe_request_wait(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_dirops; - struct ceph_mds_request *req; - u64 last_tid; - int ret = 0; - - if (!S_ISDIR(inode->i_mode)) - return 0; + struct ceph_mds_request *req1 = NULL, *req2 = NULL; + int ret, err = 0; spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - req = list_last_entry(head, struct ceph_mds_request, - r_unsafe_dir_item); - last_tid = req->r_tid; - - do { - ceph_mdsc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); + if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { + req1 = list_last_entry(&ci->i_unsafe_dirops, + struct ceph_mds_request, + r_unsafe_dir_item); + ceph_mdsc_get_request(req1); + } + if (!list_empty(&ci->i_unsafe_iops)) { + req2 = list_last_entry(&ci->i_unsafe_iops, + struct ceph_mds_request, + r_unsafe_target_item); + ceph_mdsc_get_request(req2); + } + spin_unlock(&ci->i_unsafe_lock); - dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", - inode, req->r_tid, last_tid); - ret = !wait_for_completion_timeout(&req->r_safe_completion, - ceph_timeout_jiffies(req->r_timeout)); + dout("unsafe_requeset_wait %p wait on tid %llu %llu\n", + inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); + if (req1) { + ret = !wait_for_completion_timeout(&req1->r_safe_completion, + ceph_timeout_jiffies(req1->r_timeout)); if (ret) - ret = -EIO; /* timed out */ - - ceph_mdsc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - if (ret || list_empty(head)) - break; - req = list_first_entry(head, struct ceph_mds_request, - r_unsafe_dir_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); - return ret; + err = -EIO; + ceph_mdsc_put_request(req1); + } + if (req2) { + ret = !wait_for_completion_timeout(&req2->r_safe_completion, + ceph_timeout_jiffies(req2->r_timeout)); + if (ret) + err = -EIO; + ceph_mdsc_put_request(req2); + } + return err; } int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -2039,7 +2035,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); - ret = unsafe_dirop_wait(inode); + ret = unsafe_request_wait(inode); /* * only wait on non-file metadata writeback (the mds diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0c62868b5c56..3c68e6aee2f0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -34,6 +34,74 @@ * need to wait for MDS acknowledgement. */ +/* + * Calculate the length sum of direct io vectors that can + * be combined into one page vector. + */ +static size_t dio_get_pagev_size(const struct iov_iter *it) +{ + const struct iovec *iov = it->iov; + const struct iovec *iovend = iov + it->nr_segs; + size_t size; + + size = iov->iov_len - it->iov_offset; + /* + * An iov can be page vectored when both the current tail + * and the next base are page aligned. + */ + while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && + (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { + size += iov->iov_len; + } + dout("dio_get_pagevlen len = %zu\n", size); + return size; +} + +/* + * Allocate a page vector based on (@it, @nbytes). + * The return value is the tuple describing a page vector, + * that is (@pages, @page_align, @num_pages). + */ +static struct page ** +dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, + size_t *page_align, int *num_pages) +{ + struct iov_iter tmp_it = *it; + size_t align; + struct page **pages; + int ret = 0, idx, npages; + + align = (unsigned long)(it->iov->iov_base + it->iov_offset) & + (PAGE_SIZE - 1); + npages = calc_pages_for(align, nbytes); + pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); + if (!pages) { + pages = vmalloc(sizeof(*pages) * npages); + if (!pages) + return ERR_PTR(-ENOMEM); + } + + for (idx = 0; idx < npages; ) { + size_t start; + ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, + npages - idx, &start); + if (ret < 0) + goto fail; + + iov_iter_advance(&tmp_it, ret); + nbytes -= ret; + idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; + } + + BUG_ON(nbytes != 0); + *num_pages = npages; + *page_align = align; + dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align); + return pages; +fail: + ceph_put_page_vector(pages, idx, false); + return ERR_PTR(ret); +} /* * Prepare an open request. Preallocate ceph_cap to avoid an @@ -458,11 +526,10 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, size_t start; ssize_t n; - n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); - if (n < 0) - return n; - - num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; + n = dio_get_pagev_size(i); + pages = dio_get_pages_alloc(i, n, &start, &num_pages); + if (IS_ERR(pages)) + return PTR_ERR(pages); ret = striped_read(inode, off, n, pages, num_pages, checkeof, @@ -592,7 +659,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, CEPH_OSD_FLAG_WRITE; while (iov_iter_count(from) > 0) { - u64 len = iov_iter_single_seg_count(from); + u64 len = dio_get_pagev_size(from); size_t start; ssize_t n; @@ -611,14 +678,14 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); - n = iov_iter_get_pages_alloc(from, &pages, len, &start); - if (unlikely(n < 0)) { - ret = n; + n = len; + pages = dio_get_pages_alloc(from, len, &start, &num_pages); + if (IS_ERR(pages)) { ceph_osdc_put_request(req); + ret = PTR_ERR(pages); break; } - num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; /* * throw out any page cache pages in this range. this * may block. diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 96d2bd829902..498dcfa2dcdb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -452,6 +452,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_unsafe_writes); INIT_LIST_HEAD(&ci->i_unsafe_dirops); + INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock); ci->i_snap_realm = NULL; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 51cb02da75d9..e7b130a637f9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -633,13 +633,8 @@ static void __register_request(struct ceph_mds_client *mdsc, mdsc->oldest_tid = req->r_tid; if (dir) { - struct ceph_inode_info *ci = ceph_inode(dir); - ihold(dir); - spin_lock(&ci->i_unsafe_lock); req->r_unsafe_dir = dir; - list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); - spin_unlock(&ci->i_unsafe_lock); } } @@ -665,13 +660,20 @@ static void __unregister_request(struct ceph_mds_client *mdsc, rb_erase(&req->r_node, &mdsc->request_tree); RB_CLEAR_NODE(&req->r_node); - if (req->r_unsafe_dir) { + if (req->r_unsafe_dir && req->r_got_unsafe) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); - spin_lock(&ci->i_unsafe_lock); list_del_init(&req->r_unsafe_dir_item); spin_unlock(&ci->i_unsafe_lock); + } + if (req->r_target_inode && req->r_got_unsafe) { + struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); + spin_lock(&ci->i_unsafe_lock); + list_del_init(&req->r_unsafe_target_item); + spin_unlock(&ci->i_unsafe_lock); + } + if (req->r_unsafe_dir) { iput(req->r_unsafe_dir); req->r_unsafe_dir = NULL; } @@ -1430,6 +1432,13 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; } + /* The inode has cached pages, but it's no longer used. + * we can safely drop it */ + if (wanted == 0 && used == CEPH_CAP_FILE_CACHE && + !(oissued & CEPH_CAP_FILE_CACHE)) { + used = 0; + oissued = 0; + } if ((used | wanted) & ~oissued & mine) goto out; /* we need these caps */ @@ -1438,7 +1447,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) /* we aren't the only cap.. just remove us */ __ceph_remove_cap(cap, true); } else { - /* try to drop referring dentries */ + /* try dropping referring dentries */ spin_unlock(&ci->i_ceph_lock); d_prune_aliases(inode); dout("trim_caps_cb %p cap %p pruned, count now %d\n", @@ -1704,6 +1713,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) req->r_started = jiffies; req->r_resend_mds = -1; INIT_LIST_HEAD(&req->r_unsafe_dir_item); + INIT_LIST_HEAD(&req->r_unsafe_target_item); req->r_fmode = -1; kref_init(&req->r_kref); INIT_LIST_HEAD(&req->r_wait); @@ -1935,7 +1945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -2477,6 +2487,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } else { req->r_got_unsafe = true; list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); + if (req->r_unsafe_dir) { + struct ceph_inode_info *ci = + ceph_inode(req->r_unsafe_dir); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_dir_item, + &ci->i_unsafe_dirops); + spin_unlock(&ci->i_unsafe_lock); + } } dout("handle_reply tid %lld result %d\n", tid, result); @@ -2518,6 +2536,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) up_read(&mdsc->snap_rwsem); if (realm) ceph_put_snap_realm(mdsc, realm); + + if (err == 0 && req->r_got_unsafe && req->r_target_inode) { + struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops); + spin_unlock(&ci->i_unsafe_lock); + } out_err: mutex_lock(&mdsc->mutex); if (!req->r_aborted) { @@ -3917,17 +3942,19 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, return msg; } -static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) +static int mds_sign_message(struct ceph_msg *msg) { - struct ceph_mds_session *s = con->private; + struct ceph_mds_session *s = msg->con->private; struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_sign_message(auth, msg); } -static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) +static int mds_check_message_signature(struct ceph_msg *msg) { - struct ceph_mds_session *s = con->private; + struct ceph_mds_session *s = msg->con->private; struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_check_message_signature(auth, msg); } @@ -3940,8 +3967,8 @@ static const struct ceph_connection_operations mds_con_ops = { .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, .alloc_msg = mds_alloc_msg, - .sign_message = sign_message, - .check_message_signature = check_message_signature, + .sign_message = mds_sign_message, + .check_message_signature = mds_check_message_signature, }; /* eof */ diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f575eafe2261..ccf11ef0ca87 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -236,6 +236,9 @@ struct ceph_mds_request { struct inode *r_unsafe_dir; struct list_head r_unsafe_dir_item; + /* unsafe requests that modify the target inode */ + struct list_head r_unsafe_target_item; + struct ceph_mds_session *r_session; int r_attempts; /* resend attempts */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2f2460d23a06..75b7d125ce66 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -342,6 +342,7 @@ struct ceph_inode_info { struct list_head i_unsafe_writes; /* uncommitted sync writes */ struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ + struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock; struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e739950ca084..cbc0f4bca0c0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -454,6 +454,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nocase"); if (tcon->retry) seq_puts(s, ",hard"); + if (tcon->use_persistent) + seq_puts(s, ",persistenthandles"); + else if (tcon->use_resilient) + seq_puts(s, ",resilienthandles"); if (tcon->unix_ext) seq_puts(s, ",unix"); else @@ -921,9 +925,7 @@ const struct file_operations cifs_file_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -939,9 +941,7 @@ const struct file_operations cifs_file_strict_ops = { .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -957,9 +957,7 @@ const struct file_operations cifs_file_direct_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -975,9 +973,7 @@ const struct file_operations cifs_file_nobrl_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -992,9 +988,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1009,9 +1003,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b406a32deb1f..2b510c537a0d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -493,7 +493,10 @@ struct smb_vol { bool mfsymlinks:1; /* use Minshall+French Symlinks */ bool multiuser:1; bool rwpidforward:1; /* pid forward for read/write operations */ - bool nosharesock; + bool nosharesock:1; + bool persistent:1; + bool nopersistent:1; + bool resilient:1; /* noresilient not required since not fored for CA */ unsigned int rsize; unsigned int wsize; bool sockopt_tcp_nodelay:1; @@ -895,6 +898,8 @@ struct cifs_tcon { bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ + bool use_resilient:1; /* use resilient instead of durable handles */ + bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ @@ -1015,6 +1020,7 @@ struct cifs_fid { __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ + __u8 create_guid[16]; #endif struct cifs_pending_open *pending_open; unsigned int epoch; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3f2228570d44..ecb0803bdb0e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -87,6 +87,8 @@ enum { Opt_sign, Opt_seal, Opt_noac, Opt_fsc, Opt_mfsymlinks, Opt_multiuser, Opt_sloppy, Opt_nosharesock, + Opt_persistent, Opt_nopersistent, + Opt_resilient, Opt_noresilient, /* Mount options which take numeric value */ Opt_backupuid, Opt_backupgid, Opt_uid, @@ -169,6 +171,10 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_multiuser, "multiuser" }, { Opt_sloppy, "sloppy" }, { Opt_nosharesock, "nosharesock" }, + { Opt_persistent, "persistenthandles"}, + { Opt_nopersistent, "nopersistenthandles"}, + { Opt_resilient, "resilienthandles"}, + { Opt_noresilient, "noresilienthandles"}, { Opt_backupuid, "backupuid=%s" }, { Opt_backupgid, "backupgid=%s" }, @@ -1497,6 +1503,33 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_nosharesock: vol->nosharesock = true; break; + case Opt_nopersistent: + vol->nopersistent = true; + if (vol->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_persistent: + vol->persistent = true; + if ((vol->nopersistent) || (vol->resilient)) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_resilient: + vol->resilient = true; + if (vol->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_noresilient: + vol->resilient = false; /* already the default */ + break; /* Numeric Values */ case Opt_backupuid: @@ -2655,6 +2688,42 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags); } tcon->seal = volume_info->seal; + tcon->use_persistent = false; + /* check if SMB2 or later, CIFS does not support persistent handles */ + if (volume_info->persistent) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB3 or later required for persistent handles\n"); + rc = -EOPNOTSUPP; + goto out_fail; +#ifdef CONFIG_CIFS_SMB2 + } else if (ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) + tcon->use_persistent = true; + else /* persistent handles requested but not supported */ { + cifs_dbg(VFS, + "Persistent handles not supported on share\n"); + rc = -EOPNOTSUPP; + goto out_fail; +#endif /* CONFIG_CIFS_SMB2 */ + } +#ifdef CONFIG_CIFS_SMB2 + } else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) + && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) + && (volume_info->nopersistent == false)) { + cifs_dbg(FYI, "enabling persistent handles\n"); + tcon->use_persistent = true; +#endif /* CONFIG_CIFS_SMB2 */ + } else if (volume_info->resilient) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB2.1 or later required for resilient handles\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } + tcon->use_resilient = true; + } + /* * We can have only one retry value for a connection to a share so for * resources mounted more than once to the same server share the last @@ -3503,6 +3572,15 @@ try_mount_again: goto mount_fail_check; } +#ifdef CONFIG_CIFS_SMB2 + if ((volume_info->persistent == true) && ((ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) { + cifs_dbg(VFS, "persistent handles not supported by server\n"); + rc = -EOPNOTSUPP; + goto mount_fail_check; + } +#endif /* CONFIG_CIFS_SMB2*/ + /* search for existing tcon to this server share */ tcon = cifs_get_tcon(ses, volume_info); if (IS_ERR(tcon)) { diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 28a77bf1d559..35cf990f87d3 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -85,9 +85,14 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); - /* check if source and target are on same tree connection */ - if (src_tcon != target_tcon) { - cifs_dbg(VFS, "file copy src and target on different volume\n"); + /* check source and target on same server (or volume if dup_extents) */ + if (dup_extents && (src_tcon != target_tcon)) { + cifs_dbg(VFS, "source and target of copy not on same share\n"); + goto out_fput; + } + + if (!dup_extents && (src_tcon->ses != target_tcon->ses)) { + cifs_dbg(VFS, "source and target of copy not on same server\n"); goto out_fput; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 2ab297dae5a7..f9e766f464be 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -43,6 +43,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, struct smb2_file_all_info *smb2_data = NULL; __u8 smb2_oplock[17]; struct cifs_fid *fid = oparms->fid; + struct network_resiliency_req nr_ioctl_req; smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb); if (smb2_path == NULL) { @@ -67,6 +68,24 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, if (rc) goto out; + + if (oparms->tcon->use_resilient) { + nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */ + nr_ioctl_req.Reserved = 0; + rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, + fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true, + (char *)&nr_ioctl_req, sizeof(nr_ioctl_req), + NULL, NULL /* no return info */); + if (rc == -EOPNOTSUPP) { + cifs_dbg(VFS, + "resiliency not supported by server, disabling\n"); + oparms->tcon->use_resilient = false; + } else if (rc) + cifs_dbg(FYI, "error %d setting resiliency\n", rc); + + rc = 0; + } + if (buf) { /* open response does not have IndexNumber field - get it */ rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 18da19f4f811..53ccdde6ff18 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -810,7 +810,6 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, &eof, false); } -#ifdef CONFIG_CIFS_SMB311 static int smb2_duplicate_extents(const unsigned int xid, struct cifsFileInfo *srcfile, @@ -854,8 +853,6 @@ smb2_duplicate_extents(const unsigned int xid, duplicate_extents_out: return rc; } -#endif /* CONFIG_CIFS_SMB311 */ - static int smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, @@ -1703,6 +1700,7 @@ struct smb_version_operations smb30_operations = { .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, .clone_range = smb2_clone_range, + .duplicate_extents = smb2_duplicate_extents, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, @@ -1840,7 +1838,7 @@ struct smb_version_values smb21_values = { struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -1860,7 +1858,7 @@ struct smb_version_values smb30_values = { struct smb_version_values smb302_values = { .version_string = SMB302_VERSION_STRING, .protocol_id = SMB302_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -1881,7 +1879,7 @@ struct smb_version_values smb302_values = { struct smb_version_values smb311_values = { .version_string = SMB311_VERSION_STRING, .protocol_id = SMB311_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 61276929d139..767555518d40 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1151,13 +1151,130 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, return 0; } +static struct create_durable_v2 * +create_durable_v2_buf(struct cifs_fid *pfid) +{ + struct create_durable_v2 *buf; + + buf = kzalloc(sizeof(struct create_durable_v2), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_durable_v2, dcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct durable_context_v2)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_durable_v2, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + + buf->dcontext.Timeout = 0; /* Should this be configurable by workload */ + buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); + get_random_bytes(buf->dcontext.CreateGuid, 16); + memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + + /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */ + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = '2'; + buf->Name[3] = 'Q'; + return buf; +} + +static struct create_durable_handle_reconnect_v2 * +create_reconnect_durable_v2_buf(struct cifs_fid *fid) +{ + struct create_durable_handle_reconnect_v2 *buf; + + buf = kzalloc(sizeof(struct create_durable_handle_reconnect_v2), + GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = + cpu_to_le16(offsetof(struct create_durable_handle_reconnect_v2, + dcontext)); + buf->ccontext.DataLength = + cpu_to_le32(sizeof(struct durable_reconnect_context_v2)); + buf->ccontext.NameOffset = + cpu_to_le16(offsetof(struct create_durable_handle_reconnect_v2, + Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + + buf->dcontext.Fid.PersistentFileId = fid->persistent_fid; + buf->dcontext.Fid.VolatileFileId = fid->volatile_fid; + buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); + memcpy(buf->dcontext.CreateGuid, fid->create_guid, 16); + + /* SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 is "DH2C" */ + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = '2'; + buf->Name[3] = 'C'; + return buf; +} + static int -add_durable_context(struct kvec *iov, unsigned int *num_iovec, +add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec, struct cifs_open_parms *oparms) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; + iov[num].iov_base = create_durable_v2_buf(oparms->fid); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable_v2); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_durable_v2)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + /* indicate that we don't need to relock the file */ + oparms->reconnect = false; + + iov[num].iov_base = create_reconnect_durable_v2_buf(oparms->fid); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, + sizeof(struct create_durable_handle_reconnect_v2)); + inc_rfc1001_len(&req->hdr, + sizeof(struct create_durable_handle_reconnect_v2)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms, bool use_persistent) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + if (use_persistent) { + if (oparms->reconnect) + return add_durable_reconnect_v2_context(iov, num_iovec, + oparms); + else + return add_durable_v2_context(iov, num_iovec, oparms); + } + if (oparms->reconnect) { iov[num].iov_base = create_reconnect_durable_buf(oparms->fid); /* indicate that we don't need to relock the file */ @@ -1275,7 +1392,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ccontext->Next = cpu_to_le32(server->vals->create_lease_size); } - rc = add_durable_context(iov, &num_iovecs, oparms); + + rc = add_durable_context(iov, &num_iovecs, oparms, + tcon->use_persistent); if (rc) { cifs_small_buf_release(req); kfree(copy_path); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 451108284a2f..4af52780ec35 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -590,6 +590,44 @@ struct create_durable { } Data; } __packed; +/* See MS-SMB2 2.2.13.2.11 */ +/* Flags */ +#define SMB2_DHANDLE_FLAG_PERSISTENT 0x00000002 +struct durable_context_v2 { + __le32 Timeout; + __le32 Flags; + __u64 Reserved; + __u8 CreateGuid[16]; +} __packed; + +struct create_durable_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct durable_context_v2 dcontext; +} __packed; + +/* See MS-SMB2 2.2.13.2.12 */ +struct durable_reconnect_context_v2 { + struct { + __u64 PersistentFileId; + __u64 VolatileFileId; + } Fid; + __u8 CreateGuid[16]; + __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ +} __packed; + +/* See MS-SMB2 2.2.14.2.12 */ +struct durable_reconnect_context_v2_rsp { + __le32 Timeout; + __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ +} __packed; + +struct create_durable_handle_reconnect_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct durable_reconnect_context_v2 dcontext; +} __packed; + #define COPY_CHUNK_RES_KEY_SIZE 24 struct resume_key_req { char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; @@ -643,6 +681,13 @@ struct fsctl_get_integrity_information_rsp { /* Integrity flags for above */ #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001 +/* See MS-SMB2 2.2.31.3 */ +struct network_resiliency_req { + __le32 Timeout; + __le32 Reserved; +} __packed; +/* There is no buffer for the response ie no struct network_resiliency_rsp */ + struct validate_negotiate_info_req { __le32 Capabilities; diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index a639d0dab453..f996daeea271 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -90,7 +90,7 @@ #define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064 /* Retrieve an opaque file reference for server-side data movement ie copy */ #define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078 -#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ +#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 @@ -174,8 +174,10 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, else len = iov_iter_zero(max - pos, iter); - if (!len) + if (!len) { + retval = -EFAULT; break; + } pos += len; addr += len; |