diff options
Diffstat (limited to 'fs')
34 files changed, 429 insertions, 189 deletions
diff --git a/fs/affs/Changes b/fs/affs/Changes index a29409c1ffe0..b41c2c9792ff 100644 --- a/fs/affs/Changes +++ b/fs/affs/Changes @@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel] Version 3.11 ------------ -- Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>] +- Converted to use 2.3.x page cache [Dave Jones] - Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>] Version 3.10 diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b50764bef141..131d82800b3a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -333,7 +333,6 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx); static void btrfsic_dump_database(struct btrfsic_state *state); -static void btrfsic_complete_bio_end_io(struct bio *bio, int err); static int btrfsic_test_for_metadata(struct btrfsic_state *state, char **datav, unsigned int num_pages); static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, @@ -1687,7 +1686,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, for (i = 0; i < num_pages;) { struct bio *bio; unsigned int j; - DECLARE_COMPLETION_ONSTACK(complete); bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); if (!bio) { @@ -1698,8 +1696,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, } bio->bi_bdev = block_ctx->dev->bdev; bio->bi_sector = dev_bytenr >> 9; - bio->bi_end_io = btrfsic_complete_bio_end_io; - bio->bi_private = &complete; for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], @@ -1712,12 +1708,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, "btrfsic: error, failed to add a single page!\n"); return -1; } - submit_bio(READ, bio); - - /* this will also unplug the queue */ - wait_for_completion(&complete); - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (submit_bio_wait(READ, bio)) { printk(KERN_INFO "btrfsic: read error at logical %llu dev %s!\n", block_ctx->start, block_ctx->dev->name); @@ -1740,11 +1731,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, return block_ctx->len; } -static void btrfsic_complete_bio_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static void btrfsic_dump_database(struct btrfsic_state *state) { struct list_head *elem_all; @@ -3008,14 +2994,12 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) return submit_bh(rw, bh); } -void btrfsic_submit_bio(int rw, struct bio *bio) +static void __btrfsic_submit_bio(int rw, struct bio *bio) { struct btrfsic_dev_state *dev_state; - if (!btrfsic_is_initialized) { - submit_bio(rw, bio); + if (!btrfsic_is_initialized) return; - } mutex_lock(&btrfsic_mutex); /* since btrfsic_submit_bio() is also called before @@ -3106,10 +3090,20 @@ void btrfsic_submit_bio(int rw, struct bio *bio) } leave: mutex_unlock(&btrfsic_mutex); +} +void btrfsic_submit_bio(int rw, struct bio *bio) +{ + __btrfsic_submit_bio(rw, bio); submit_bio(rw, bio); } +int btrfsic_submit_bio_wait(int rw, struct bio *bio) +{ + __btrfsic_submit_bio(rw, bio); + return submit_bio_wait(rw, bio); +} + int btrfsic_mount(struct btrfs_root *root, struct btrfs_fs_devices *fs_devices, int including_extent_data, u32 print_mask) diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 8b59175cc502..13b8566c97ab 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h @@ -22,9 +22,11 @@ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY int btrfsic_submit_bh(int rw, struct buffer_head *bh); void btrfsic_submit_bio(int rw, struct bio *bio); +int btrfsic_submit_bio_wait(int rw, struct bio *bio); #else #define btrfsic_submit_bh submit_bh #define btrfsic_submit_bio submit_bio +#define btrfsic_submit_bio_wait submit_bio_wait #endif int btrfsic_mount(struct btrfs_root *root, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8e457fca0a0b..ff43802a7c88 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1952,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, return err; } -static void repair_io_failure_callback(struct bio *bio, int err) -{ - complete(bio->bi_private); -} - /* * this bypasses the standard btrfs submit functions deliberately, as * the standard behavior is to write all copies in a raid setup. here we only @@ -1973,7 +1968,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, { struct bio *bio; struct btrfs_device *dev; - DECLARE_COMPLETION_ONSTACK(compl); u64 map_length = 0; u64 sector; struct btrfs_bio *bbio = NULL; @@ -1990,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; - bio->bi_private = &compl; - bio->bi_end_io = repair_io_failure_callback; bio->bi_size = 0; map_length = length; @@ -2012,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, } bio->bi_bdev = dev->bdev; bio_add_page(bio, page, length, start - page_offset(page)); - btrfsic_submit_bio(WRITE_SYNC, bio); - wait_for_completion(&compl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { /* try to remap that extent elsewhere? */ bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 561e2f16ba3e..1fd3f33c330a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -208,7 +208,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, int is_metadata, int have_csum, const u8 *csum, u64 generation, u16 csum_size); -static void scrub_complete_bio_end_io(struct bio *bio, int err); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write); @@ -1294,7 +1293,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, for (page_num = 0; page_num < sblock->page_count; page_num++) { struct bio *bio; struct scrub_page *page = sblock->pagev[page_num]; - DECLARE_COMPLETION_ONSTACK(complete); if (page->dev->bdev == NULL) { page->io_error = 1; @@ -1311,18 +1309,11 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } bio->bi_bdev = page->dev->bdev; bio->bi_sector = page->physical >> 9; - bio->bi_end_io = scrub_complete_bio_end_io; - bio->bi_private = &complete; bio_add_page(bio, page->page, PAGE_SIZE, 0); - btrfsic_submit_bio(READ, bio); - - /* this will also unplug the queue */ - wait_for_completion(&complete); - - page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (btrfsic_submit_bio_wait(READ, bio)) sblock->no_io_error_seen = 0; + bio_put(bio); } @@ -1391,11 +1382,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, sblock->checksum_error = 1; } -static void scrub_complete_bio_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write) @@ -1430,7 +1416,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, sblock_bad->checksum_error || page_bad->io_error) { struct bio *bio; int ret; - DECLARE_COMPLETION_ONSTACK(complete); if (!page_bad->dev->bdev) { printk_ratelimited(KERN_WARNING @@ -1443,19 +1428,14 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; bio->bi_bdev = page_bad->dev->bdev; bio->bi_sector = page_bad->physical >> 9; - bio->bi_end_io = scrub_complete_bio_end_io; - bio->bi_private = &complete; ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); if (PAGE_SIZE != ret) { bio_put(bio); return -EIO; } - btrfsic_submit_bio(WRITE, bio); - /* this will also unplug the queue */ - wait_for_completion(&complete); - if (!bio_flagged(bio, BIO_UPTODATE)) { + if (btrfsic_submit_bio_wait(WRITE, bio)) { btrfs_dev_stat_inc_and_print(page_bad->dev, BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_replace_stats_inc( @@ -3375,7 +3355,6 @@ static int write_page_nocow(struct scrub_ctx *sctx, struct bio *bio; struct btrfs_device *dev; int ret; - DECLARE_COMPLETION_ONSTACK(compl); dev = sctx->wr_ctx.tgtdev; if (!dev) @@ -3392,8 +3371,6 @@ static int write_page_nocow(struct scrub_ctx *sctx, spin_unlock(&sctx->stat_lock); return -ENOMEM; } - bio->bi_private = &compl; - bio->bi_end_io = scrub_complete_bio_end_io; bio->bi_size = 0; bio->bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; @@ -3404,10 +3381,8 @@ leave_with_eio: btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; } - btrfsic_submit_bio(WRITE_SYNC, bio); - wait_for_completion(&compl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) goto leave_with_eio; bio_put(bio); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6df8bd481425..1e561c059539 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) } SetPageUptodate(page); - if (err == 0) + if (err >= 0) ceph_readpage_to_fscache(inode, page); out: diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 7db2e6ca4b8f..8c44fdd4e1c3 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) { struct ceph_inode_info *ci = ceph_inode(inode); + if (!PageFsCache(page)) + return; + fscache_wait_on_page_write(ci->fscache, page); fscache_uncache_page(ci->fscache, page); } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 13976c33332e..3c0a4bd74996 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) * caller should hold i_ceph_lock. * caller will not hold session s_mutex if called from destroy_inode. */ -void __ceph_remove_cap(struct ceph_cap *cap) +void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) { struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; @@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap) /* remove from session list */ spin_lock(&session->s_cap_lock); + /* + * s_cap_reconnect is protected by s_cap_lock. no one changes + * s_cap_gen while session is in the reconnect state. + */ + if (queue_release && + (!session->s_cap_reconnect || + cap->cap_gen == session->s_cap_gen)) + __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, + cap->mseq, cap->issue_seq); + if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ dout("__ceph_remove_cap delaying %p removal from session %p\n", @@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session, struct ceph_mds_cap_release *head; struct ceph_mds_cap_item *item; - spin_lock(&session->s_cap_lock); BUG_ON(!session->s_num_cap_releases); msg = list_first_entry(&session->s_cap_releases, struct ceph_msg, list_head); @@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session, (int)CEPH_CAPS_PER_RELEASE, (int)msg->front.iov_len); } - spin_unlock(&session->s_cap_lock); } /* @@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode) p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); - struct ceph_mds_session *session = cap->session; - - __queue_cap_release(session, ceph_ino(inode), cap->cap_id, - cap->mseq, cap->issue_seq); p = rb_next(p); - __ceph_remove_cap(cap); + __ceph_remove_cap(cap, true); } } @@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, } spin_unlock(&mdsc->cap_dirty_lock); } - __ceph_remove_cap(cap); + __ceph_remove_cap(cap, false); } /* else, we already released it */ @@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (!inode) { dout(" i don't have ino %llx\n", vino.ino); - if (op == CEPH_CAP_OP_IMPORT) + if (op == CEPH_CAP_OP_IMPORT) { + spin_lock(&session->s_cap_lock); __queue_cap_release(session, vino.ino, cap_id, mseq, seq); + spin_unlock(&session->s_cap_lock); + } goto flush_cap_releases; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 868b61d56cac..2a0bcaeb189a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -352,8 +352,18 @@ more: } /* note next offset and last dentry name */ + rinfo = &req->r_reply_info; + if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { + frag = le32_to_cpu(rinfo->dir_dir->frag); + if (ceph_frag_is_leftmost(frag)) + fi->next_offset = 2; + else + fi->next_offset = 0; + off = fi->next_offset; + } fi->offset = fi->next_offset; fi->last_readdir = req; + fi->frag = frag; if (req->r_reply_info.dir_end) { kfree(fi->last_name); @@ -363,7 +373,6 @@ more: else fi->next_offset = 0; } else { - rinfo = &req->r_reply_info; err = note_last_dentry(fi, rinfo->dir_dname[rinfo->dir_nr-1], rinfo->dir_dname_len[rinfo->dir_nr-1]); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8549a48115f7..9a8e396aed89 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode, int issued = 0, implemented; struct timespec mtime, atime, ctime; u32 nsplits; + struct ceph_inode_frag *frag; + struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; int err = 0; int queue_trunc = 0; @@ -751,15 +753,38 @@ no_change: /* FIXME: move me up, if/when version reflects fragtree changes */ nsplits = le32_to_cpu(info->fragtree.nsplits); mutex_lock(&ci->i_fragtree_mutex); + rb_node = rb_first(&ci->i_fragtree); for (i = 0; i < nsplits; i++) { u32 id = le32_to_cpu(info->fragtree.splits[i].frag); - struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); - - if (IS_ERR(frag)) - continue; + frag = NULL; + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (ceph_frag_compare(frag->frag, id) >= 0) { + if (frag->frag != id) + frag = NULL; + else + rb_node = rb_next(rb_node); + break; + } + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + frag = NULL; + } + if (!frag) { + frag = __get_or_create_frag(ci, id); + if (IS_ERR(frag)) + continue; + } frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); dout(" frag %x split by %d\n", frag->frag, frag->split_by); } + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + } mutex_unlock(&ci->i_fragtree_mutex); /* were we issued a capability? */ @@ -1250,8 +1275,20 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, int err = 0, i; struct inode *snapdir = NULL; struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; - u64 frag = le32_to_cpu(rhead->args.readdir.frag); struct ceph_dentry_info *di; + u64 r_readdir_offset = req->r_readdir_offset; + u32 frag = le32_to_cpu(rhead->args.readdir.frag); + + if (rinfo->dir_dir && + le32_to_cpu(rinfo->dir_dir->frag) != frag) { + dout("readdir_prepopulate got new frag %x -> %x\n", + frag, le32_to_cpu(rinfo->dir_dir->frag)); + frag = le32_to_cpu(rinfo->dir_dir->frag); + if (ceph_frag_is_leftmost(frag)) + r_readdir_offset = 2; + else + r_readdir_offset = 0; + } if (req->r_aborted) return readdir_prepopulate_inodes_only(req, session); @@ -1315,7 +1352,7 @@ retry_lookup: } di = dn->d_fsdata; - di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); + di->offset = ceph_make_fpos(frag, i + r_readdir_offset); /* inode */ if (dn->d_inode) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b7bda5d9611d..d90861f45210 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -43,6 +43,7 @@ */ struct ceph_reconnect_state { + int nr_caps; struct ceph_pagelist *pagelist; bool flock; }; @@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_waiting); INIT_LIST_HEAD(&s->s_unsafe); s->s_num_cap_releases = 0; + s->s_cap_reconnect = 0; s->s_cap_iterator = NULL; INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_releases_done); @@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, req->r_unsafe_dir = NULL; } + complete_all(&req->r_safe_completion); + ceph_mdsc_put_request(req); } @@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, dout("removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); spin_lock(&ci->i_ceph_lock); - __ceph_remove_cap(cap); + __ceph_remove_cap(cap, false); if (!__ceph_is_any_real_caps(ci)) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; @@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) session->s_trim_caps--; if (oissued) { /* we aren't the only cap.. just remove us */ - __queue_cap_release(session, ceph_ino(inode), cap->cap_id, - cap->mseq, cap->issue_seq); - __ceph_remove_cap(cap); + __ceph_remove_cap(cap, true); } else { /* try to drop referring dentries */ spin_unlock(&ci->i_ceph_lock); @@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, unsigned num; dout("discard_cap_releases mds%d\n", session->s_mds); - spin_lock(&session->s_cap_lock); /* zero out the in-progress message */ msg = list_first_entry(&session->s_cap_releases, @@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, msg->front.iov_len = sizeof(*head); list_add(&msg->list_head, &session->s_cap_releases); } - - spin_unlock(&session->s_cap_lock); } /* @@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc, int mds = -1; int err = -EAGAIN; - if (req->r_err || req->r_got_result) + if (req->r_err || req->r_got_result) { + if (req->r_aborted) + __unregister_request(mdsc, req); goto out; + } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { @@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (head->safe) { req->r_got_safe = true; __unregister_request(mdsc, req); - complete_all(&req->r_safe_completion); if (req->r_got_unsafe) { /* @@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || - req->r_op == CEPH_MDS_OP_LSSNAP) && - rinfo->dir_nr) + req->r_op == CEPH_MDS_OP_LSSNAP)) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); } @@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ cap->mseq = 0; /* and migrate_seq */ + cap->cap_gen = cap->session->s_cap_gen; if (recon_state->flock) { rec.v2.cap_id = cpu_to_le64(cap->cap_id); @@ -2552,6 +2553,8 @@ encode_again: } else { err = ceph_pagelist_append(pagelist, &rec, reclen); } + + recon_state->nr_caps++; out_free: kfree(path); out_dput: @@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, struct rb_node *p; int mds = session->s_mds; int err = -ENOMEM; + int s_nr_caps; struct ceph_pagelist *pagelist; struct ceph_reconnect_state recon_state; @@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, dout("session %p state %s\n", session, session_state_name(session->s_state)); + spin_lock(&session->s_gen_ttl_lock); + session->s_cap_gen++; + spin_unlock(&session->s_gen_ttl_lock); + + spin_lock(&session->s_cap_lock); + /* + * notify __ceph_remove_cap() that we are composing cap reconnect. + * If a cap get released before being added to the cap reconnect, + * __ceph_remove_cap() should skip queuing cap release. + */ + session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ discard_cap_releases(mdsc, session); + spin_unlock(&session->s_cap_lock); /* traverse this session's caps */ - err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); + s_nr_caps = session->s_nr_caps; + err = ceph_pagelist_encode_32(pagelist, s_nr_caps); if (err) goto fail; + recon_state.nr_caps = 0; recon_state.pagelist = pagelist; recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; err = iterate_session_caps(session, encode_caps_cb, &recon_state); if (err < 0) goto fail; + spin_lock(&session->s_cap_lock); + session->s_cap_reconnect = 0; + spin_unlock(&session->s_cap_lock); + /* * snaprealms. we provide mds with the ino, seq (version), and * parent for all of our realms. If the mds has any newer info, @@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, if (recon_state.flock) reply->hdr.version = cpu_to_le16(2); - if (pagelist->length) { - /* set up outbound data if we have any */ - reply->hdr.data_len = cpu_to_le32(pagelist->length); - ceph_msg_data_add_pagelist(reply, pagelist); + + /* raced with cap release? */ + if (s_nr_caps != recon_state.nr_caps) { + struct page *page = list_first_entry(&pagelist->head, + struct page, lru); + __le32 *addr = kmap_atomic(page); + *addr = cpu_to_le32(recon_state.nr_caps); + kunmap_atomic(addr); } + + reply->hdr.data_len = cpu_to_le32(pagelist->length); + ceph_msg_data_add_pagelist(reply, pagelist); ceph_con_send(&session->s_con, reply); mutex_unlock(&session->s_mutex); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c2a19fbbe517..4c053d099ae4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -132,6 +132,7 @@ struct ceph_mds_session { struct list_head s_caps; /* all caps issued by this session */ int s_nr_caps, s_trim_caps; int s_num_cap_releases; + int s_cap_reconnect; struct list_head s_cap_releases; /* waiting cap_release messages */ struct list_head s_cap_releases_done; /* ready to send */ struct ceph_cap *s_cap_iterator; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6014b0a3c405..ef4ac38bb614 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode, int fmode, unsigned issued, unsigned wanted, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap_reservation *caps_reservation); -extern void __ceph_remove_cap(struct ceph_cap *cap); -static inline void ceph_remove_cap(struct ceph_cap *cap) -{ - spin_lock(&cap->ci->i_ceph_lock); - __ceph_remove_cap(cap); - spin_unlock(&cap->ci->i_ceph_lock); -} +extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d9ea7ada1378..f918a998a087 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -384,6 +384,7 @@ struct smb_version_operations { int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); + int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); }; struct smb_version_values { diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 409b45eefe70..77492301cc2b 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -26,13 +26,15 @@ #include <linux/mount.h> #include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/btrfs.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" +#define CIFS_IOCTL_MAGIC 0xCF +#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) + static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, unsigned long srcfd, u64 off, u64 len, u64 destoff) { @@ -213,7 +215,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) cifs_dbg(FYI, "set compress flag rc %d\n", rc); } break; - case BTRFS_IOC_CLONE: + case CIFS_IOC_COPYCHUNK_FILE: rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); break; default: diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 11dde4b24f8a..757da3e54d3d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -532,7 +532,10 @@ smb2_clone_range(const unsigned int xid, int rc; unsigned int ret_data_len; struct copychunk_ioctl *pcchunk; - char *retbuf = NULL; + struct copychunk_ioctl_rsp *retbuf = NULL; + struct cifs_tcon *tcon; + int chunks_copied = 0; + bool chunk_sizes_updated = false; pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); @@ -547,27 +550,96 @@ smb2_clone_range(const unsigned int xid, /* Note: request_res_key sets res_key null only if rc !=0 */ if (rc) - return rc; + goto cchunk_out; /* For now array only one chunk long, will make more flexible later */ pcchunk->ChunkCount = __constant_cpu_to_le32(1); pcchunk->Reserved = 0; - pcchunk->SourceOffset = cpu_to_le64(src_off); - pcchunk->TargetOffset = cpu_to_le64(dest_off); - pcchunk->Length = cpu_to_le32(len); pcchunk->Reserved2 = 0; - /* Request that server copy to target from src file identified by key */ - rc = SMB2_ioctl(xid, tlink_tcon(trgtfile->tlink), - trgtfile->fid.persistent_fid, - trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, - true /* is_fsctl */, (char *)pcchunk, - sizeof(struct copychunk_ioctl), &retbuf, &ret_data_len); + tcon = tlink_tcon(trgtfile->tlink); - /* BB need to special case rc = EINVAL to alter chunk size */ + while (len > 0) { + pcchunk->SourceOffset = cpu_to_le64(src_off); + pcchunk->TargetOffset = cpu_to_le64(dest_off); + pcchunk->Length = + cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk)); - cifs_dbg(FYI, "rc %d data length out %d\n", rc, ret_data_len); + /* Request server copy to target from src identified by key */ + rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, + trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, + true /* is_fsctl */, (char *)pcchunk, + sizeof(struct copychunk_ioctl), (char **)&retbuf, + &ret_data_len); + if (rc == 0) { + if (ret_data_len != + sizeof(struct copychunk_ioctl_rsp)) { + cifs_dbg(VFS, "invalid cchunk response size\n"); + rc = -EIO; + goto cchunk_out; + } + if (retbuf->TotalBytesWritten == 0) { + cifs_dbg(FYI, "no bytes copied\n"); + rc = -EIO; + goto cchunk_out; + } + /* + * Check if server claimed to write more than we asked + */ + if (le32_to_cpu(retbuf->TotalBytesWritten) > + le32_to_cpu(pcchunk->Length)) { + cifs_dbg(VFS, "invalid copy chunk response\n"); + rc = -EIO; + goto cchunk_out; + } + if (le32_to_cpu(retbuf->ChunksWritten) != 1) { + cifs_dbg(VFS, "invalid num chunks written\n"); + rc = -EIO; + goto cchunk_out; + } + chunks_copied++; + + src_off += le32_to_cpu(retbuf->TotalBytesWritten); + dest_off += le32_to_cpu(retbuf->TotalBytesWritten); + len -= le32_to_cpu(retbuf->TotalBytesWritten); + + cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n", + le32_to_cpu(retbuf->ChunksWritten), + le32_to_cpu(retbuf->ChunkBytesWritten), + le32_to_cpu(retbuf->TotalBytesWritten)); + } else if (rc == -EINVAL) { + if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) + goto cchunk_out; + + cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n", + le32_to_cpu(retbuf->ChunksWritten), + le32_to_cpu(retbuf->ChunkBytesWritten), + le32_to_cpu(retbuf->TotalBytesWritten)); + + /* + * Check if this is the first request using these sizes, + * (ie check if copy succeed once with original sizes + * and check if the server gave us different sizes after + * we already updated max sizes on previous request). + * if not then why is the server returning an error now + */ + if ((chunks_copied != 0) || chunk_sizes_updated) + goto cchunk_out; + + /* Check that server is not asking us to grow size */ + if (le32_to_cpu(retbuf->ChunkBytesWritten) < + tcon->max_bytes_chunk) + tcon->max_bytes_chunk = + le32_to_cpu(retbuf->ChunkBytesWritten); + else + goto cchunk_out; /* server gave us bogus size */ + + /* No need to change MaxChunks since already set to 1 */ + chunk_sizes_updated = true; + } + } +cchunk_out: kfree(pcchunk); return rc; } @@ -1247,6 +1319,7 @@ struct smb_version_operations smb30_operations = { .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, .clone_range = smb2_clone_range, + .validate_negotiate = smb3_validate_negotiate, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d65270c290a1..2013234b73ad 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -454,6 +454,81 @@ neg_exit: return rc; } +int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) +{ + int rc = 0; + struct validate_negotiate_info_req vneg_inbuf; + struct validate_negotiate_info_rsp *pneg_rsp; + u32 rsplen; + + cifs_dbg(FYI, "validate negotiate\n"); + + /* + * validation ioctl must be signed, so no point sending this if we + * can not sign it. We could eventually change this to selectively + * sign just this, the first and only signed request on a connection. + * This is good enough for now since a user who wants better security + * would also enable signing on the mount. Having validation of + * negotiate info for signed connections helps reduce attack vectors + */ + if (tcon->ses->server->sign == false) + return 0; /* validation requires signing */ + + vneg_inbuf.Capabilities = + cpu_to_le32(tcon->ses->server->vals->req_capabilities); + memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); + + if (tcon->ses->sign) + vneg_inbuf.SecurityMode = + cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); + else if (global_secflags & CIFSSEC_MAY_SIGN) + vneg_inbuf.SecurityMode = + cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); + else + vneg_inbuf.SecurityMode = 0; + + vneg_inbuf.DialectCount = cpu_to_le16(1); + vneg_inbuf.Dialects[0] = + cpu_to_le16(tcon->ses->server->vals->protocol_id); + + rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, + FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, + (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), + (char **)&pneg_rsp, &rsplen); + + if (rc != 0) { + cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); + return -EIO; + } + + if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { + cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); + return -EIO; + } + + /* check validate negotiate info response matches what we got earlier */ + if (pneg_rsp->Dialect != + cpu_to_le16(tcon->ses->server->vals->protocol_id)) + goto vneg_out; + + if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) + goto vneg_out; + + /* do not validate server guid because not saved at negprot time yet */ + + if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND | + SMB2_LARGE_FILES) != tcon->ses->server->capabilities) + goto vneg_out; + + /* validate negotiate successful */ + cifs_dbg(FYI, "validate negotiate info successful\n"); + return 0; + +vneg_out: + cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); + return -EIO; +} + int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -829,6 +904,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); init_copy_chunk_defaults(tcon); + if (tcon->ses->server->ops->validate_negotiate) + rc = tcon->ses->server->ops->validate_negotiate(xid, tcon); tcon_exit: free_rsp_buf(resp_buftype, rsp); kfree(unc_path); @@ -1214,10 +1291,17 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; - if (rc != 0) { + if ((rc != 0) && (rc != -EINVAL)) { if (tcon) cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); goto ioctl_exit; + } else if (rc == -EINVAL) { + if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && + (opcode != FSCTL_SRV_COPYCHUNK)) { + if (tcon) + cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); + goto ioctl_exit; + } } /* check if caller wants to look at return data or just return rc */ @@ -2154,11 +2238,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; - if (rc != 0) { + if (rc != 0) cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); - goto out; - } -out: + free_rsp_buf(resp_buftype, rsp); kfree(iov); return rc; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f88320bbb477..2022c542ea3a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -577,13 +577,19 @@ struct copychunk_ioctl_rsp { __le32 TotalBytesWritten; } __packed; -/* Response and Request are the same format */ -struct validate_negotiate_info { +struct validate_negotiate_info_req { __le32 Capabilities; __u8 Guid[SMB2_CLIENT_GUID_SIZE]; __le16 SecurityMode; __le16 DialectCount; - __le16 Dialect[1]; + __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ +} __packed; + +struct validate_negotiate_info_rsp { + __le32 Capabilities; + __u8 Guid[SMB2_CLIENT_GUID_SIZE]; + __le16 SecurityMode; + __le16 Dialect; /* Dialect in use for the connection */ } __packed; #define RSS_CAPABLE 0x00000001 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index b4eea105b08c..93adc64666f3 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -162,5 +162,6 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_lock_element *buf); extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, __u8 *lease_key, const __le32 lease_state); +extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index a4b2391fe66e..0e538b5c9622 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -90,7 +90,7 @@ #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ -#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ +#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* Perform server-side data movement */ #define FSCTL_SRV_COPYCHUNK 0x001440F2 #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 79b65c3b9e87..8b5e2584c840 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1852,8 +1852,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) - epds.events &= ~EPOLLWAKEUP; + ep_take_care_of_epollwakeup(&epds); /* * We have to check that the file structure underneath the file descriptor diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index b51a6079108d..e9a97a0d4314 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -24,13 +24,6 @@ struct hfsplus_wd { u16 embed_count; }; -static void hfsplus_end_io_sync(struct bio *bio, int err) -{ - if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - complete(bio->bi_private); -} - /* * hfsplus_submit_bio - Perfrom block I/O * @sb: super block of volume for I/O @@ -53,7 +46,6 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, int rw) { - DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; u64 io_size; @@ -73,8 +65,6 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; bio->bi_bdev = sb->s_bdev; - bio->bi_end_io = hfsplus_end_io_sync; - bio->bi_private = &wait; if (!(rw & WRITE) && data) *data = (u8 *)buf + offset; @@ -93,12 +83,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, buf = (u8 *)buf + len; } - submit_bio(rw, bio); - wait_for_completion(&wait); - - if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - + ret = submit_bio_wait(rw, bio); out: bio_put(bio); return ret < 0 ? ret : 0; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 550475ca6a0e..0f95f0d0b313 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -14,16 +14,10 @@ #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) -static void request_complete(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static int sync_request(struct page *page, struct block_device *bdev, int rw) { struct bio bio; struct bio_vec bio_vec; - struct completion complete; bio_init(&bio); bio.bi_max_vecs = 1; @@ -35,13 +29,8 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) bio.bi_size = PAGE_SIZE; bio.bi_bdev = bdev; bio.bi_sector = page->index * (PAGE_SIZE >> 9); - init_completion(&complete); - bio.bi_private = &complete; - bio.bi_end_io = request_complete; - submit_bio(rw, &bio); - wait_for_completion(&complete); - return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; + return submit_bio_wait(rw, &bio); } static int bdev_readpage(void *_sb, struct page *page) diff --git a/fs/namei.c b/fs/namei.c index 8f77a8cea289..c53d3a9547f9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -513,8 +513,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) if (!lockref_get_not_dead(&parent->d_lockref)) { nd->path.dentry = NULL; - rcu_read_unlock(); - return -ECHILD; + goto out; } /* diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 8485978993e8..9838fb020473 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -36,6 +36,7 @@ #include <linux/nfs_fs.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include "../nfs4_fs.h" #include "../pnfs.h" #include "../netns.h" diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 9c3e117c3ed1..4d0161442565 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -44,7 +44,7 @@ static inline sector_t normalize(sector_t s, int base) { sector_t tmp = s; /* Since do_div modifies its argument */ - return s - do_div(tmp, base); + return s - sector_div(tmp, base); } static inline sector_t normalize_up(sector_t s, int base) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index fc0f95ec7358..d25f10fb4926 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "dns_resolve.h" #include "cache_lib.h" #include "netns.h" diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 18ab2da4eeb6..00ad1c2b217d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -312,7 +312,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else -void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label) { } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bca6a3e3c49c..8b5cc04a8611 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -269,6 +269,21 @@ extern const u32 nfs41_maxgetdevinfo_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); +static inline void nfs4_label_free(struct nfs4_label *label) +{ + if (label) { + kfree(label->label); + kfree(label); + } + return; +} +#else +static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } +static inline void nfs4_label_free(void *label) {} +#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ + /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3ce79b04522e..5609edc742a0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,6 +9,14 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H +#if defined(CONFIG_NFS_V4_2) +#define NFS4_MAX_MINOR_VERSION 2 +#elif defined(CONFIG_NFS_V4_1) +#define NFS4_MAX_MINOR_VERSION 1 +#else +#define NFS4_MAX_MINOR_VERSION 0 +#endif + #if IS_ENABLED(CONFIG_NFS_V4) #define NFS4_MAX_LOOP_ON_RECOVER (10) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 659990c0109e..15052b81df42 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2518,9 +2518,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->roc_barrier); nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); - nfs4_close_clear_stateid_flags(state, - calldata->arg.fmode); break; + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: @@ -2528,9 +2527,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) break; default: - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { rpc_restart_call_prepare(task); + goto out_release; + } } + nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); +out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); @@ -4802,7 +4805,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto restart_call; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); @@ -4987,11 +4990,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + task->tk_status = 0; + break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { @@ -7589,7 +7598,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; server = NFS_SERVER(lrp->args.inode); - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + switch (task->tk_status) { + default: + task->tk_status = 0; + case 0: + break; + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) + break; rpc_restart_call_prepare(task); return; } diff --git a/fs/pipe.c b/fs/pipe.c index d2c45e14e6d8..0e0752ef2715 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait) return mask; } +static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) +{ + int kill = 0; + + spin_lock(&inode->i_lock); + if (!--pipe->files) { + inode->i_pipe = NULL; + kill = 1; + } + spin_unlock(&inode->i_lock); + + if (kill) + free_pipe_info(pipe); +} + static int pipe_release(struct inode *inode, struct file *file) { - struct pipe_inode_info *pipe = inode->i_pipe; - int kill = 0; + struct pipe_inode_info *pipe = file->private_data; __pipe_lock(pipe); if (file->f_mode & FMODE_READ) @@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - if (kill) - free_pipe_info(pipe); - + put_pipe_info(inode, pipe); return 0; } @@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp) { struct pipe_inode_info *pipe; bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; - int kill = 0; int ret; filp->f_version = 0; @@ -1130,15 +1135,9 @@ err_wr: goto err; err: - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - if (kill) - free_pipe_info(pipe); + + put_pipe_info(inode, pipe); return ret; } diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 2943b2bfae48..62a0de6632e1 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -84,6 +84,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) */ res = squashfs_read_cache(target_page, block, bsize, pages, page); + if (res < 0) + goto mark_errored; + goto out; } @@ -119,7 +122,7 @@ mark_errored: * dealt with by the caller */ for (i = 0; i < pages; i++) { - if (page[i] == target_page) + if (page[i] == NULL || page[i] == target_page) continue; flush_dcache_page(page[i]); SetPageError(page[i]); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 79b5da2acbe1..b94f93685093 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; struct sysfs_open_file *of; - bool has_read, has_write; + bool has_read, has_write, has_mmap; int error = -EACCES; /* need attr_sd for attr and ops, its parent for kobj */ @@ -621,6 +621,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) has_read = battr->read || battr->mmap; has_write = battr->write || battr->mmap; + has_mmap = battr->mmap; } else { const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); @@ -632,6 +633,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) has_read = ops->show; has_write = ops->store; + has_mmap = false; } /* check perms and supported operations */ @@ -649,7 +651,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file) if (!of) goto err_out; - mutex_init(&of->mutex); + /* + * The following is done to give a different lockdep key to + * @of->mutex for files which implement mmap. This is a rather + * crude way to avoid false positive lockdep warning around + * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and + * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under + * which mm->mmap_sem nests, while holding @of->mutex. As each + * open file has a separate mutex, it's okay as long as those don't + * happen on the same file. At this point, we can't easily give + * each file a separate locking class. Let's differentiate on + * whether the file has mmap or not for now. + */ + if (has_mmap) + mutex_init(&of->mutex); + else + mutex_init(&of->mutex); + of->sd = attr_sd; of->file = file; |