diff options
author | Joel Stanley <joel@jms.id.au> | 2020-07-22 12:42:41 +0300 |
---|---|---|
committer | Joel Stanley <joel@jms.id.au> | 2020-07-22 12:42:46 +0300 |
commit | 8a9b346382056b52cd7ff141ae9f15a0fcfeb13d (patch) | |
tree | 7b855ed138c412bc27713ea8d3feef8939c954a0 /fs | |
parent | 2b4829edfc1c225c717652153097470529d171db (diff) | |
parent | d811d29517d1ea05bc159579231652d3ca1c2a01 (diff) | |
download | linux-8a9b346382056b52cd7ff141ae9f15a0fcfeb13d.tar.xz |
Merge tag 'v5.4.53' into dev-5.4
This is the 5.4.53 stable release
Signed-off-by: Joel Stanley <joel@jms.id.au>
Diffstat (limited to 'fs')
61 files changed, 749 insertions, 441 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 78ba5f932287..296b489861a9 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -154,10 +154,17 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, return ERR_PTR(-ENOMEM); } + cell->name = kmalloc(namelen + 1, GFP_KERNEL); + if (!cell->name) { + kfree(cell); + return ERR_PTR(-ENOMEM); + } + cell->net = net; cell->name_len = namelen; for (i = 0; i < namelen; i++) cell->name[i] = tolower(name[i]); + cell->name[i] = 0; atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); @@ -203,6 +210,7 @@ parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); error: + kfree(cell->name); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -483,6 +491,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); key_put(cell->anonymous_key); + kfree(cell->name); kfree(cell); _leave(" [destroyed]"); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index d1e1caa23c8b..3c486340b220 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -658,7 +658,8 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->ctx.actor = afs_lookup_filldir; cookie->name = dentry->d_name; - cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ + cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want + * and slot 1 for the directory */ read_seqlock_excl(&dvnode->cb_lock); dcbi = rcu_dereference_protected(dvnode->cb_interest, @@ -709,7 +710,11 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, if (!cookie->inodes) goto out_s; - for (i = 1; i < cookie->nr_fids; i++) { + cookie->fids[1] = dvnode->fid; + cookie->statuses[1].cb_break = afs_calc_vnode_cb_break(dvnode); + cookie->inodes[1] = igrab(&dvnode->vfs_inode); + + for (i = 2; i < cookie->nr_fids; i++) { scb = &cookie->statuses[i]; /* Find any inodes that already exist and get their diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 0a4fed9e706b..5c2729fc07e5 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -56,16 +56,15 @@ static void xdr_dump_bad(const __be32 *bp) /* * decode an AFSFetchStatus block */ -static int xdr_decode_AFSFetchStatus(const __be32 **_bp, - struct afs_call *call, - struct afs_status_cb *scb) +static void xdr_decode_AFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; - int ret; abort_code = ntohl(xdr->abort_code); @@ -79,7 +78,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, */ status->abort_code = abort_code; scb->have_error = true; - goto good; + goto advance; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -89,7 +88,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; scb->have_error = true; - goto good; + goto advance; } type = ntohl(xdr->type); @@ -125,15 +124,13 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, data_version |= (u64)ntohl(xdr->data_version_hi) << 32; status->data_version = data_version; scb->have_status = true; -good: - ret = 0; advance: *_bp = (const void *)*_bp + sizeof(*xdr); - return ret; + return; bad: xdr_dump_bad(*_bp); - ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); goto advance; } @@ -254,9 +251,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -419,9 +414,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -579,12 +572,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->out_fid); - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -693,9 +682,7 @@ static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -786,12 +773,8 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -880,12 +863,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->out_fid); - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -988,16 +967,12 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (ret < 0) return ret; + bp = call->buffer; /* If the two dirs are the same, we have two copies of the same status * report, so we just decode it twice. */ - bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -1105,9 +1080,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -1285,9 +1258,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -1956,9 +1927,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSCallBack(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); @@ -2064,10 +2033,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; scb = &call->out_scb[call->count]; - ret = xdr_decode_AFSFetchStatus(&bp, call, scb); - if (ret < 0) - return ret; - + xdr_decode_AFSFetchStatus(&bp, call, scb); call->count++; if (call->count < call->count2) goto more_counts; @@ -2245,9 +2211,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); call->unmarshall++; @@ -2328,9 +2292,7 @@ static int afs_deliver_fs_file_status_and_vol(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 46d2d7cb461d..a74e8e209454 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -171,6 +171,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, struct timespec64 t; umode_t mode; bool data_changed = false; + bool change_size = false; BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags)); @@ -226,6 +227,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, } else { set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } + change_size = true; } else if (vnode->status.type == AFS_FTYPE_DIR) { /* Expected directory change is handled elsewhere so * that we can locally edit the directory and save on a @@ -233,11 +235,19 @@ static void afs_apply_status(struct afs_fs_cursor *fc, */ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) data_changed = false; + change_size = true; } if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - afs_set_i_size(vnode, status->size); + + /* Only update the size if the data version jumped. If the + * file is being modified locally, then we might have our own + * idea of what the size should be that's not the same as + * what's on the server. + */ + if (change_size) + afs_set_i_size(vnode, status->size); } } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 485cc3b2aaa8..7fe88d918b23 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -161,6 +161,7 @@ struct afs_call { bool upgrade; /* T to request service upgrade */ bool have_reply_time; /* T if have got reply_time */ bool intr; /* T if interruptible */ + bool unmarshalling_error; /* T if an unmarshalling error occurred */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ @@ -396,7 +397,7 @@ struct afs_cell { struct afs_vlserver_list __rcu *vl_servers; u8 name_len; /* Length of name */ - char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ + char *name; /* Cell name, case-flattened and NUL-padded */ }; /* diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 52b19e9c1535..5334f1bd2bca 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -83,6 +83,7 @@ int afs_abort_to_error(u32 abort_code) case UAENOLCK: return -ENOLCK; case UAENOTEMPTY: return -ENOTEMPTY; case UAELOOP: return -ELOOP; + case UAEOVERFLOW: return -EOVERFLOW; case UAENOMEDIUM: return -ENOMEDIUM; case UAEDQUOT: return -EDQUOT; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index fba2ec3a3a9c..106b27011f6d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -562,6 +562,7 @@ void afs_put_sysnames(struct afs_sysnames *sysnames) if (sysnames->subs[i] != afs_init_sysname && sysnames->subs[i] != sysnames->blank) kfree(sysnames->subs[i]); + kfree(sysnames); } } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 52aa90fb4fbd..6adab30a8399 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -540,6 +540,8 @@ static void afs_deliver_to_call(struct afs_call *call) ret = call->type->deliver(call); state = READ_ONCE(call->state); + if (ret == 0 && call->unmarshalling_error) + ret = -EBADMSG; switch (ret) { case 0: afs_queue_call_work(call); @@ -963,5 +965,7 @@ noinline int afs_protocol_error(struct afs_call *call, int error, enum afs_eproto_cause cause) { trace_afs_protocol_error(call, error, cause); + if (call) + call->unmarshalling_error = true; return error; } diff --git a/fs/afs/write.c b/fs/afs/write.c index cb76566763db..96b042af6248 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -194,11 +194,11 @@ int afs_write_end(struct file *file, struct address_space *mapping, i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) { - spin_lock(&vnode->wb_lock); + write_seqlock(&vnode->cb_lock); i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) i_size_write(&vnode->vfs_inode, maybe_i_size); - spin_unlock(&vnode->wb_lock); + write_sequnlock(&vnode->cb_lock); } if (!PageUptodate(page)) { @@ -811,6 +811,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) vmf->page->index, priv); SetPagePrivate(vmf->page); set_page_private(vmf->page, priv); + file_update_time(file); sb_end_pagefault(inode->i_sb); return VM_FAULT_LOCKED; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 8af7f093305d..d21cf61d86b9 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -179,21 +179,20 @@ static void xdr_dump_bad(const __be32 *bp) /* * Decode a YFSFetchStatus block */ -static int xdr_decode_YFSFetchStatus(const __be32 **_bp, - struct afs_call *call, - struct afs_status_cb *scb) +static void xdr_decode_YFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; u32 type; - int ret; status->abort_code = ntohl(xdr->abort_code); if (status->abort_code != 0) { if (status->abort_code == VNOVNODE) status->nlink = 0; scb->have_error = true; - goto good; + goto advance; } type = ntohl(xdr->type); @@ -221,15 +220,13 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, status->size = xdr_to_u64(xdr->size); status->data_version = xdr_to_u64(xdr->data_version); scb->have_status = true; -good: - ret = 0; advance: *_bp += xdr_size(xdr); - return ret; + return; bad: xdr_dump_bad(*_bp); - ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); goto advance; } @@ -348,9 +345,7 @@ static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); xdr_decode_YFSCallBack(&bp, call, call->out_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); @@ -372,9 +367,7 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -534,9 +527,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); xdr_decode_YFSCallBack(&bp, call, call->out_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); @@ -645,12 +636,8 @@ static int yfs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_YFSFid(&bp, call->out_fid); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_YFSCallBack(&bp, call, call->out_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); @@ -803,14 +790,9 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - + xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_YFSFid(&bp, &fid); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); /* Was deleted if vnode->status.abort_code == VNOVNODE. */ xdr_decode_YFSVolSync(&bp, call->out_volsync); @@ -890,10 +872,7 @@ static int yfs_deliver_fs_remove(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - + xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); return 0; } @@ -975,12 +954,8 @@ static int yfs_deliver_fs_link(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1062,12 +1037,8 @@ static int yfs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_YFSFid(&bp, call->out_fid); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); @@ -1155,13 +1126,11 @@ static int yfs_deliver_fs_rename(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ + xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); return 0; @@ -1846,9 +1815,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; scb = &call->out_scb[call->count]; - ret = xdr_decode_YFSFetchStatus(&bp, call, scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, scb); call->count++; if (call->count < call->count2) @@ -2068,9 +2035,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) bp = call->buffer; yacl->inherit_flag = ntohl(*bp++); yacl->num_cleaned = ntohl(*bp++); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); xdr_decode_YFSVolSync(&bp, call->out_volsync); call->unmarshall++; diff --git a/fs/block_dev.c b/fs/block_dev.c index 34644ce4b502..2dc9c73a4cb2 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1546,10 +1546,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) */ if (!for_part) { ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret != 0) { - bdput(bdev); + if (ret != 0) return ret; - } } restart: @@ -1618,8 +1616,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, mode, 1); - if (ret) + if (ret) { + bdput(whole); goto out_clear; + } bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || @@ -1669,7 +1669,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_unblock_events(disk); put_disk_and_module(disk); out: - bdput(bdev); return ret; } @@ -1736,6 +1735,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) bdput(whole); } + if (res) + bdput(bdev); + return res; } EXPORT_SYMBOL(blkdev_get); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c2dd94e1b274..42d69e77f89d 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -910,7 +910,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out_put_group; + goto out; } /* @@ -948,7 +948,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out_put_group; + goto out; } clear_nlink(inode); /* One for the block groups ref */ @@ -971,13 +971,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out_put_group; + goto out; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out_put_group; + goto out; btrfs_release_path(path); } @@ -986,6 +986,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); + /* Once for the block groups rbtree */ + btrfs_put_block_group(block_group); + if (fs_info->first_logical_byte == block_group->key.objectid) fs_info->first_logical_byte = (u64)-1; spin_unlock(&fs_info->block_group_cache_lock); @@ -1094,10 +1097,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, block_group); if (ret) - goto out_put_group; - - /* Once for the block groups rbtree */ - btrfs_put_block_group(block_group); + goto out; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) @@ -1120,10 +1120,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, free_extent_map(em); } -out_put_group: +out: /* Once for the lookup reference */ btrfs_put_block_group(block_group); -out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_free_path(path); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6d2c277c6e0a..36cd210ee2ef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -940,6 +940,8 @@ enum { BTRFS_ROOT_DEAD_RELOC_TREE, /* Mark dead root stored on device whose cleanup needs to be resumed */ BTRFS_ROOT_DEAD_TREE, + /* The root has a log tree. Used only for subvolume roots. */ + BTRFS_ROOT_HAS_LOG_TREE, }; /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8aab286f2028..9b214b14a3aa 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5025,25 +5025,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page + /* + * The TREE_REF bit is first set when the extent_buffer is added + * to the radix tree. It is also reset, if unset, when a new reference + * is created by find_extent_buffer. * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. + * It is only cleared in two cases: freeing the last non-tree + * reference to the extent_buffer when its STALE bit is set or + * calling releasepage when the tree reference is the only reference. * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. + * In both cases, care is taken to ensure that the extent_buffer's + * pages are not under io. However, releasepage can be concurrently + * called with creating new references, which is prone to race + * conditions between the calls to check_buffer_tree_ref in those + * codepaths and clearing TREE_REF in try_release_extent_buffer. * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. + * The actual lifetime of the extent_buffer in the radix tree is + * adequately protected by the refcount, but the TREE_REF bit and + * its corresponding reference are not. To protect against this + * class of races, we call check_buffer_tree_ref from the codepaths + * which trigger io after they set eb->io_pages. Note that once io is + * initiated, TREE_REF can no longer be cleared, so that is the + * moment at which any such race is best fixed. */ refs = atomic_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) @@ -5493,6 +5496,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); + /* + * It is possible for releasepage to clear the TREE_REF bit before we + * set io_pages. See check_buffer_tree_ref for a more detailed comment. + */ + check_buffer_tree_ref(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 127cdecbe872..e408181a5eba 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -975,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode, u64 num_bytes; unsigned long ram_size; u64 cur_alloc_size = 0; + u64 min_alloc_size; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; @@ -1025,10 +1026,26 @@ static noinline int cow_file_range(struct inode *inode, btrfs_drop_extent_cache(BTRFS_I(inode), start, start + num_bytes - 1, 0); + /* + * Relocation relies on the relocated extents to have exactly the same + * size as the original extents. Normally writeback for relocation data + * extents follows a NOCOW path because relocation preallocates the + * extents. However, due to an operation such as scrub turning a block + * group to RO mode, it may fallback to COW mode, so we must make sure + * an extent allocated during COW has exactly the requested size and can + * not be split into smaller extents, otherwise relocation breaks and + * fails during the stage where it updates the bytenr of file extent + * items. + */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + min_alloc_size = num_bytes; + else + min_alloc_size = fs_info->sectorsize; + while (num_bytes > 0) { cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, - fs_info->sectorsize, 0, alloc_hint, + min_alloc_size, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; @@ -1328,6 +1345,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, int *page_started, unsigned long *nr_written) { const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode)); + const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID); const u64 range_bytes = end + 1 - start; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 range_start = start; @@ -1358,18 +1377,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, * data space info, which we incremented in the step above. * * If we need to fallback to cow and the inode corresponds to a free - * space cache inode, we must also increment bytes_may_use of the data - * space_info for the same reason. Space caches always get a prealloc + * space cache inode or an inode of the data relocation tree, we must + * also increment bytes_may_use of the data space_info for the same + * reason. Space caches and relocated data extents always get a prealloc * extent for them, however scrub or balance may have set the block - * group that contains that extent to RO mode. + * group that contains that extent to RO mode and therefore force COW + * when starting writeback. */ count = count_range_bits(io_tree, &range_start, end, range_bytes, EXTENT_NORESERVE, 0); - if (count > 0 || is_space_ino) { - const u64 bytes = is_space_ino ? range_bytes : count; + if (count > 0 || is_space_ino || is_reloc_ino) { + u64 bytes = count; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_space_info *sinfo = fs_info->data_sinfo; + if (is_space_ino || is_reloc_ino) + bytes = range_bytes; + spin_lock(&sinfo->lock); btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); spin_unlock(&sinfo->lock); @@ -1633,12 +1657,8 @@ out_check: ret = fallback_to_cow(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written); - if (ret) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); + if (ret) goto error; - } cow_start = (u64)-1; } @@ -1654,9 +1674,6 @@ out_check: ram_bytes, BTRFS_COMPRESS_NONE, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); ret = PTR_ERR(em); goto error; } @@ -8833,9 +8850,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.overwrite = 1; inode_unlock(inode); relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - ret = -EAGAIN; - goto out; } ret = btrfs_delalloc_reserve_space(inode, &data_reserved, offset, count); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7d464b049507..f46afbff668e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -167,6 +167,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, if (ret) goto out; + set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); root->log_start_pid = current->pid; } @@ -193,6 +194,9 @@ static int join_running_log_trans(struct btrfs_root *root) { int ret = -ENOENT; + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state)) + return ret; + mutex_lock(&root->log_mutex); if (root->log_root) { ret = 0; @@ -3327,6 +3331,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) if (root->log_root) { free_log_tree(trans, root->log_root); root->log_root = NULL; + clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); } return 0; } diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 79dc06881e78..e088843a7734 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -172,9 +172,16 @@ struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino) static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) { struct inode *inode = __lookup_inode(sb, ino); + int err; + if (IS_ERR(inode)) return ERR_CAST(inode); - if (inode->i_nlink == 0) { + /* We need LINK caps to reliably check i_nlink */ + err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false); + if (err) + return ERR_PTR(err); + /* -ESTALE if inode as been unlinked and no file is open */ + if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) { iput(inode); return ERR_PTR(-ESTALE); } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 721b2560caa7..f5df2a4195c2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -614,26 +614,26 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); mutex_lock(&server->srv_mutex); +#ifdef CONFIG_CIFS_DFS_UPCALL /* * Set up next DFS target server (if any) for reconnect. If DFS * feature is disabled, then we will retry last server we * connected to before. */ + reconn_inval_dfs_target(server, cifs_sb, &tgt_list, &tgt_it); +#endif + rc = reconn_set_ipaddr(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); + } + if (cifs_rdma_enabled(server)) rc = smbd_reconnect(server); else rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); -#ifdef CONFIG_CIFS_DFS_UPCALL - reconn_inval_dfs_target(server, cifs_sb, &tgt_list, - &tgt_it); -#endif - rc = reconn_set_ipaddr(server); - if (rc) { - cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", - __func__, rc); - } mutex_unlock(&server->srv_mutex); msleep(3000); } else { @@ -5281,9 +5281,15 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) vol_info->nocase = master_tcon->nocase; vol_info->nohandlecache = master_tcon->nohandlecache; vol_info->local_lease = master_tcon->local_lease; + vol_info->no_lease = master_tcon->no_lease; + vol_info->resilient = master_tcon->use_resilient; + vol_info->persistent = master_tcon->use_persistent; + vol_info->handle_timeout = master_tcon->handle_timeout; vol_info->no_linux_ext = !master_tcon->unix_ext; + vol_info->linux_ext = master_tcon->posix_extensions; vol_info->sectype = master_tcon->ses->sectype; vol_info->sign = master_tcon->ses->sign; + vol_info->seal = master_tcon->seal; rc = cifs_set_vol_auth(vol_info, master_tcon->ses); if (rc) { @@ -5309,10 +5315,6 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } - /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ - if (tcon->posix_extensions) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; - if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, vol_info); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5e6bc8fa4e46..5ae458505f63 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1791,6 +1791,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; + bool new_target = d_really_is_negative(target_dentry); if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -1867,8 +1868,13 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, */ unlink_target: - /* Try unlinking the target dentry if it's not negative */ - if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) { + /* + * If the target dentry was created during the rename, try + * unlinking it if it's not negative + */ + if (new_target && + d_really_is_positive(target_dentry) && + (rc == -EACCES || rc == -EEXIST)) { if (d_is_dir(target_dentry)) tmprc = cifs_rmdir(target_dir, target_dentry); else @@ -2264,6 +2270,15 @@ set_size_out: if (rc == 0) { cifsInode->server_eof = attrs->ia_size; cifs_setsize(inode, attrs->ia_size); + + /* + * The man page of truncate says if the size changed, + * then the st_ctime and st_mtime fields for the file + * are updated. + */ + attrs->ia_ctime = attrs->ia_mtime = current_time(inode); + attrs->ia_valid |= ATTR_CTIME | ATTR_MTIME; + cifs_truncate_page(inode->i_mapping, inode->i_size); } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 58915d882285..7ccbfc656478 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -736,6 +736,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) /* close extra handle outside of crit sec */ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } + rc = 0; goto oshr_free; } @@ -2969,6 +2970,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); + /* + * We zero the range through ioctl, so we need remove the page caches + * first, otherwise the data may be inconsistent with the server. + */ + truncate_pagecache_range(inode, offset, offset + len - 1); /* if file not oplocked can't be sure whether asking to extend size */ if (!CIFS_CACHE_READ(cifsi)) @@ -3035,6 +3041,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, return rc; } + /* + * We implement the punch hole through ioctl, so we need remove the page + * caches first, otherwise the data may be inconsistent with the server. + */ + truncate_pagecache_range(inode, offset, offset + len - 1); + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); fsctl_buf.FileOffset = cpu_to_le64(offset); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index fe1552cc8a0a..eafc49de4d7f 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -528,7 +528,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, const int timeout, const int flags, unsigned int *instance) { - int rc; + long rc; int *credits; int optype; long int t; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 416d9de35679..4311d01b02a8 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -97,7 +97,6 @@ do { \ __LINE__, __FILE__, #x, jiffies); \ {do} \ printk("\n"); \ - BUG(); \ panic("DLM: Record message above and reboot.\n"); \ } \ } diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index faf950189bd7..568d5a493876 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -148,22 +148,22 @@ static inline void z_erofs_onlinepage_init(struct page *page) static inline void z_erofs_onlinepage_fixup(struct page *page, uintptr_t index, bool down) { - unsigned long *p, o, v, id; -repeat: - p = &page_private(page); - o = READ_ONCE(*p); + union z_erofs_onlinepage_converter u = { .v = &page_private(page) }; + int orig, orig_index, val; - id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; - if (id) { +repeat: + orig = atomic_read(u.o); + orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; + if (orig_index) { if (!index) return; - DBG_BUGON(id != index); + DBG_BUGON(orig_index != index); } - v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | - ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); - if (cmpxchg(p, o, v) != o) + val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | + ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); + if (atomic_cmpxchg(u.o, orig, val) != orig) goto repeat; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2743c6f8a457..0589e914663f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -677,6 +677,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, struct qstr qstr = {.name = str, .len = len }; const struct dentry *parent = READ_ONCE(dentry->d_parent); const struct inode *inode = READ_ONCE(parent->d_inode); + char strbuf[DNAME_INLINE_LEN]; if (!inode || !IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { @@ -685,6 +686,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, return memcmp(str, name->name, len); } + /* + * If the dentry name is stored in-line, then it may be concurrently + * modified by a rename. If this happens, the VFS will eventually retry + * the lookup, so it doesn't matter what ->d_compare() returns. + * However, it's unsafe to call utf8_strncasecmp() with an unstable + * string. Therefore, we have to copy the name into a temporary buffer. + */ + if (len <= DNAME_INLINE_LEN - 1) { + memcpy(strbuf, str, len); + strbuf[len] = 0; + qstr.name = strbuf; + /* prevent compiler from optimizing out the temporary buffer */ + barrier(); + } + return ext4_ci_compare(inode, name, &qstr, false); } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9bd44588eb77..3193f0b4a02d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3010,7 +3010,7 @@ again: * in use to avoid freeing it when removing blocks. */ if (sbi->s_cluster_ratio > 1) { - pblk = ext4_ext_pblock(ex) + end - ee_block + 2; + pblk = ext4_ext_pblock(ex) + end - ee_block + 1; partial.pclu = EXT4_B2C(sbi, pblk); partial.state = nofree; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d3500eaf900e..f7c20bb20da3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2034,6 +2034,16 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, #endif } else if (token == Opt_dax) { #ifdef CONFIG_FS_DAX + if (is_remount && test_opt(sb, DAX)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dax"); + return -1; + } + if (is_remount && !(sbi->s_mount_opt & EXT4_MOUNT_DAX)) { + ext4_msg(sb, KERN_ERR, "can't change " + "dax mount option while remounting"); + return -1; + } ext4_msg(sb, KERN_WARNING, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); sbi->s_mount_opt |= m->mount_opt; @@ -2294,6 +2304,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_msg(sb, KERN_ERR, "revision level too high, " "forcing read-only mode"); err = -EROFS; + goto done; } if (read_only) goto done; @@ -5366,12 +5377,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) err = -EINVAL; goto restore_opts; } - if (test_opt(sb, DAX)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and dax"); - err = -EINVAL; - goto restore_opts; - } } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) { if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ext4_msg(sb, KERN_ERR, "can't mount with " @@ -5387,12 +5392,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { - ext4_msg(sb, KERN_WARNING, "warning: refusing change of " - "dax flag with busy inodes while remounting"); - sbi->s_mount_opt ^= EXT4_MOUNT_DAX; - } - if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, "Abort forced by user"); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a28ffecc0f95..bbd07fe8a492 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -892,8 +892,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) int i; int err; - sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), - GFP_KERNEL); + sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks), + GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 84280ad3786c..e9af46dc06f7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -107,36 +107,28 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, /* * Test whether a case-insensitive directory entry matches the filename * being searched for. - * - * Returns: 0 if the directory entry matches, more than 0 if it - * doesn't match or less than zero on error. */ -int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry, bool quick) +static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, + const struct qstr *entry, bool quick) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); + const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); const struct unicode_map *um = sbi->s_encoding; - int ret; + int res; if (quick) - ret = utf8_strncasecmp_folded(um, name, entry); + res = utf8_strncasecmp_folded(um, name, entry); else - ret = utf8_strncasecmp(um, name, entry); - - if (ret < 0) { - /* Handle invalid character sequence as either an error - * or as an opaque byte sequence. + res = utf8_strncasecmp(um, name, entry); + if (res < 0) { + /* + * In strict mode, ignore invalid names. In non-strict mode, + * fall back to treating them as opaque byte sequences. */ - if (f2fs_has_strict_mode(sbi)) - return -EINVAL; - - if (name->len != entry->len) - return 1; - - return !!memcmp(name->name, entry->name, name->len); + if (f2fs_has_strict_mode(sbi) || name->len != entry->len) + return false; + return !memcmp(name->name, entry->name, name->len); } - - return ret; + return res == 0; } static void f2fs_fname_setup_ci_filename(struct inode *dir, @@ -188,10 +180,10 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, &entry, true); + return f2fs_match_ci_name(parent, &cf, &entry, true); } - return !f2fs_ci_compare(parent, fname->usr_fname, &entry, - false); + return f2fs_match_ci_name(parent, fname->usr_fname, &entry, + false); } #endif if (fscrypt_match_name(fname, d->filename[bit_pos], @@ -1067,17 +1059,41 @@ const struct file_operations f2fs_dir_operations = { static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { - struct qstr qstr = {.name = str, .len = len }; const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *inode = READ_ONCE(parent->d_inode); + const struct inode *dir = READ_ONCE(parent->d_inode); + const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct qstr entry = QSTR_INIT(str, len); + char strbuf[DNAME_INLINE_LEN]; + int res; + + if (!dir || !IS_CASEFOLDED(dir)) + goto fallback; - if (!inode || !IS_CASEFOLDED(inode)) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); + /* + * If the dentry name is stored in-line, then it may be concurrently + * modified by a rename. If this happens, the VFS will eventually retry + * the lookup, so it doesn't matter what ->d_compare() returns. + * However, it's unsafe to call utf8_strncasecmp() with an unstable + * string. Therefore, we have to copy the name into a temporary buffer. + */ + if (len <= DNAME_INLINE_LEN - 1) { + memcpy(strbuf, str, len); + strbuf[len] = 0; + entry.name = strbuf; + /* prevent compiler from optimizing out the temporary buffer */ + barrier(); } - return f2fs_ci_compare(inode, name, &qstr, false); + res = utf8_strncasecmp(sbi->s_encoding, name, &entry); + if (res >= 0) + return res; + + if (f2fs_has_strict_mode(sbi)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); } static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a26ea1e6ba88..03693d6b1c10 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2790,18 +2790,12 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { - void *ret; - if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(FAULT_KMALLOC); return NULL; } - ret = kmalloc(size, flags); - if (ret) - return ret; - - return kvmalloc(size, flags); + return kmalloc(size, flags); } static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, @@ -2960,11 +2954,6 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -extern int f2fs_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry, - bool quick); - /* * dir.c */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c3a9da79ac99..5d94abe467a4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2056,8 +2056,15 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (in != F2FS_GOING_DOWN_FULLSYNC) { ret = mnt_want_write_file(filp); - if (ret) + if (ret) { + if (ret == -EROFS) { + ret = 0; + f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + trace_f2fs_shutdown(sbi, in, ret); + } return ret; + } } switch (in) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f14401a77d60..90a20bd12961 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2933,7 +2933,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); - nm_i->nat_bits = f2fs_kzalloc(sbi, + nm_i->nat_bits = f2fs_kvzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3066,9 +3066,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) int i; nm_i->free_nid_bitmap = - f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *), - nm_i->nat_blocks), - GFP_KERNEL); + f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e36543c9f2b7..f4b882ee48dd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1230,7 +1230,8 @@ static int f2fs_statfs_project(struct super_block *sb, limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { - curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; + curblock = (dquot->dq_dqb.dqb_curspace + + dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; buf->f_blocks = limit; buf->f_bfree = buf->f_bavail = (buf->f_blocks > curblock) ? @@ -2900,7 +2901,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_seq = f2fs_kzalloc(sbi, + FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi, BITS_TO_LONGS(FDEV(devi).nr_blkz) * sizeof(unsigned long), GFP_KERNEL); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3dd37a998ea9..f8d8a8e34b80 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -18,6 +18,7 @@ #include <linux/swap.h> #include <linux/falloc.h> #include <linux/uio.h> +#include <linux/fs.h> static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, struct fuse_page_desc **desc) @@ -712,6 +713,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc, spin_unlock(&io->lock); ia->ap.args.end = fuse_aio_complete_req; + ia->ap.args.may_block = io->should_dirty; err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); if (err) fuse_aio_complete_req(fc, &ia->ap.args, err); @@ -2147,10 +2149,8 @@ static int fuse_writepages(struct address_space *mapping, err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); if (data.wpa) { - /* Ignore errors if we can write at least one page */ WARN_ON(!data.wpa->ia.ap.num_pages); fuse_writepages_send(&data); - err = 0; } if (data.ff) fuse_file_put(data.ff, false, false); @@ -2759,7 +2759,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, struct iovec *iov = iov_page; iov->iov_base = (void __user *)arg; - iov->iov_len = _IOC_SIZE(cmd); + + switch (cmd) { + case FS_IOC_GETFLAGS: + case FS_IOC_SETFLAGS: + iov->iov_len = sizeof(int); + break; + default: + iov->iov_len = _IOC_SIZE(cmd); + break; + } if (_IOC_DIR(cmd) & _IOC_WRITE) { in_iov = iov; @@ -3279,13 +3288,11 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (fc->writeback_cache) { - inode_lock(inode_in); - err = fuse_writeback_range(inode_in, pos_in, pos_in + len); - inode_unlock(inode_in); - if (err) - return err; - } + inode_lock(inode_in); + err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1); + inode_unlock(inode_in); + if (err) + return err; inode_lock(inode_out); @@ -3293,11 +3300,27 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (err) goto out; - if (fc->writeback_cache) { - err = fuse_writeback_range(inode_out, pos_out, pos_out + len); - if (err) - goto out; - } + /* + * Write out dirty pages in the destination file before sending the COPY + * request to userspace. After the request is completed, truncate off + * pages (including partial ones) from the cache that have been copied, + * since these contain stale data at that point. + * + * This should be mostly correct, but if the COPY writes to partial + * pages (at the start or end) and the parts not covered by the COPY are + * written through a memory map after calling fuse_writeback_range(), + * then these partial page modifications will be lost on truncation. + * + * It is unlikely that someone would rely on such mixed style + * modifications. Yet this does give less guarantees than if the + * copying was performed with write(2). + * + * To fix this a i_mmap_sem style lock could be used to prevent new + * faults while the copy is ongoing. + */ + err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1); + if (err) + goto out; if (is_unstable) set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); @@ -3318,6 +3341,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (err) goto out; + truncate_inode_pages_range(inode_out->i_mapping, + ALIGN_DOWN(pos_out, PAGE_SIZE), + ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); + if (fc->writeback_cache) { fuse_write_update_size(inode_out, pos_out + outarg.size); file_update_time(file_out); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ca344bf71404..d7cde216fc87 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -249,6 +249,7 @@ struct fuse_args { bool out_argvar:1; bool page_zeroing:1; bool page_replace:1; + bool may_block:1; struct fuse_in_arg in_args[3]; struct fuse_arg out_args[2]; void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 16aec32f7f3d..5dca643a257c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -121,10 +121,12 @@ static void fuse_evict_inode(struct inode *inode) } } -static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) +static int fuse_reconfigure(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; + sync_filesystem(sb); - if (*flags & SB_MANDLOCK) + if (fc->sb_flags & SB_MANDLOCK) return -EINVAL; return 0; @@ -473,6 +475,13 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fuse_fs_context *ctx = fc->fs_private; int opt; + /* + * Ignore options coming from mount(MS_REMOUNT) for backward + * compatibility. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) + return 0; + opt = fs_parse(fc, &fuse_fs_parameters, param, &result); if (opt < 0) return opt; @@ -815,7 +824,6 @@ static const struct super_operations fuse_super_operations = { .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, .drop_inode = generic_delete_inode, - .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, @@ -1289,6 +1297,7 @@ static int fuse_get_tree(struct fs_context *fc) static const struct fs_context_operations fuse_context_ops = { .free = fuse_free_fc, .parse_param = fuse_parse_param, + .reconfigure = fuse_reconfigure, .get_tree = fuse_get_tree, }; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index a5c86048b96e..7505f8102762 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -55,6 +55,12 @@ struct virtio_fs_forget { struct list_head list; }; +struct virtio_fs_req_work { + struct fuse_req *req; + struct virtio_fs_vq *fsvq; + struct work_struct done_work; +}; + static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, struct fuse_req *req, bool in_flight); @@ -443,19 +449,67 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) } /* Work function for request completion */ +static void virtio_fs_request_complete(struct fuse_req *req, + struct virtio_fs_vq *fsvq) +{ + struct fuse_pqueue *fpq = &fsvq->fud->pq; + struct fuse_conn *fc = fsvq->fud->fc; + struct fuse_args *args; + struct fuse_args_pages *ap; + unsigned int len, i, thislen; + struct page *page; + + /* + * TODO verify that server properly follows FUSE protocol + * (oh.uniq, oh.len) + */ + args = req->args; + copy_args_from_argbuf(args, req); + + if (args->out_pages && args->page_zeroing) { + len = args->out_args[args->out_numargs - 1].size; + ap = container_of(args, typeof(*ap), args); + for (i = 0; i < ap->num_pages; i++) { + thislen = ap->descs[i].length; + if (len < thislen) { + WARN_ON(ap->descs[i].offset); + page = ap->pages[i]; + zero_user_segment(page, len, thislen); + len = 0; + } else { + len -= thislen; + } + } + } + + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + spin_unlock(&fpq->lock); + + fuse_request_end(fc, req); + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_complete_req_work(struct work_struct *work) +{ + struct virtio_fs_req_work *w = + container_of(work, typeof(*w), done_work); + + virtio_fs_request_complete(w->req, w->fsvq); + kfree(w); +} + static void virtio_fs_requests_done_work(struct work_struct *work) { struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, done_work); struct fuse_pqueue *fpq = &fsvq->fud->pq; - struct fuse_conn *fc = fsvq->fud->fc; struct virtqueue *vq = fsvq->vq; struct fuse_req *req; - struct fuse_args_pages *ap; struct fuse_req *next; - struct fuse_args *args; - unsigned int len, i, thislen; - struct page *page; + unsigned int len; LIST_HEAD(reqs); /* Collect completed requests off the virtqueue */ @@ -473,38 +527,20 @@ static void virtio_fs_requests_done_work(struct work_struct *work) /* End requests */ list_for_each_entry_safe(req, next, &reqs, list) { - /* - * TODO verify that server properly follows FUSE protocol - * (oh.uniq, oh.len) - */ - args = req->args; - copy_args_from_argbuf(args, req); - - if (args->out_pages && args->page_zeroing) { - len = args->out_args[args->out_numargs - 1].size; - ap = container_of(args, typeof(*ap), args); - for (i = 0; i < ap->num_pages; i++) { - thislen = ap->descs[i].length; - if (len < thislen) { - WARN_ON(ap->descs[i].offset); - page = ap->pages[i]; - zero_user_segment(page, len, thislen); - len = 0; - } else { - len -= thislen; - } - } - } - - spin_lock(&fpq->lock); - clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); - spin_unlock(&fpq->lock); - fuse_request_end(fc, req); - spin_lock(&fsvq->lock); - dec_in_flight_req(fsvq); - spin_unlock(&fsvq->lock); + /* blocking async request completes in a worker context */ + if (req->args->may_block) { + struct virtio_fs_req_work *w; + + w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); + INIT_WORK(&w->done_work, virtio_fs_complete_req_work); + w->fsvq = fsvq; + w->req = req; + schedule_work(&w->done_work); + } else { + virtio_fs_request_complete(req, fsvq); + } } } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 110e5c4db819..a4b6a49462a4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -881,8 +881,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) * @new: New transaction to be merged */ -static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) +static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new) { + struct gfs2_trans *old = sdp->sd_log_tr; + WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags)); old->tr_num_buf_new += new->tr_num_buf_new; @@ -893,6 +895,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); + + spin_lock(&sdp->sd_ail_lock); + list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list); + list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list); + spin_unlock(&sdp->sd_ail_lock); } static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) @@ -904,7 +911,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_lock(sdp); if (sdp->sd_log_tr) { - gfs2_merge_trans(sdp->sd_log_tr, tr); + gfs2_merge_trans(sdp, tr); } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags)); sdp->sd_log_tr = tr; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 18daf494abab..e0c55765b06d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -911,7 +911,7 @@ fail: } static const match_table_t nolock_tokens = { - { Opt_jid, "jid=%d\n", }, + { Opt_jid, "jid=%d", }, { Opt_err, NULL }, }; @@ -1168,7 +1168,17 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_per_node; } - if (!sb_rdonly(sb)) { + if (sb_rdonly(sb)) { + struct gfs2_holder freeze_gh; + + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + GL_EXACT, &freeze_gh); + if (error) { + fs_err(sdp, "can't make FS RO: %d\n", error); + goto fail_per_node; + } + gfs2_glock_dq_uninit(&freeze_gh); + } else { error = gfs2_make_fs_rw(sdp); if (error) { fs_err(sdp, "can't make FS RW: %d\n", error); diff --git a/fs/io_uring.c b/fs/io_uring.c index 7fa3cd3fff4d..e0200406765c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -267,6 +267,9 @@ struct io_ring_ctx { #if defined(CONFIG_UNIX) struct socket *ring_sock; #endif + + struct list_head task_list; + spinlock_t task_lock; }; struct sqe_submit { @@ -331,14 +334,18 @@ struct io_kiocb { #define REQ_F_ISREG 2048 /* regular file */ #define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */ #define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */ +#define REQ_F_CANCEL 16384 /* cancel request */ unsigned long fsize; u64 user_data; u32 result; u32 sequence; + struct task_struct *task; struct fs_struct *fs; struct work_struct work; + struct task_struct *work_task; + struct list_head task_list; }; #define IO_PLUG_THRESHOLD 2 @@ -425,6 +432,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cancel_list); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); + INIT_LIST_HEAD(&ctx->task_list); + spin_lock_init(&ctx->task_lock); return ctx; } @@ -492,6 +501,7 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) static inline void io_queue_async_work(struct io_ring_ctx *ctx, struct io_kiocb *req) { + unsigned long flags; int rw = 0; if (req->submit.sqe) { @@ -503,6 +513,13 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx, } } + req->task = current; + + spin_lock_irqsave(&ctx->task_lock, flags); + list_add(&req->task_list, &ctx->task_list); + req->work_task = NULL; + spin_unlock_irqrestore(&ctx->task_lock, flags); + queue_work(ctx->sqo_wq[rw], &req->work); } @@ -2201,6 +2218,8 @@ static void io_sq_wq_submit_work(struct work_struct *work) old_cred = override_creds(ctx->creds); async_list = io_async_list_from_sqe(ctx, req->submit.sqe); + + allow_kernel_signal(SIGINT); restart: do { struct sqe_submit *s = &req->submit; @@ -2232,6 +2251,12 @@ restart: } if (!ret) { + req->work_task = current; + if (req->flags & REQ_F_CANCEL) { + ret = -ECANCELED; + goto end_req; + } + s->has_user = cur_mm != NULL; s->needs_lock = true; do { @@ -2246,6 +2271,12 @@ restart: break; cond_resched(); } while (1); +end_req: + if (!list_empty(&req->task_list)) { + spin_lock_irq(&ctx->task_lock); + list_del_init(&req->task_list); + spin_unlock_irq(&ctx->task_lock); + } } /* drop submission reference */ @@ -2311,6 +2342,7 @@ restart: } out: + disallow_signal(SIGINT); if (cur_mm) { set_fs(old_fs); unuse_mm(cur_mm); @@ -3675,12 +3707,32 @@ static int io_uring_fasync(int fd, struct file *file, int on) return fasync_helper(fd, file, on, &ctx->cq_fasync); } +static void io_cancel_async_work(struct io_ring_ctx *ctx, + struct task_struct *task) +{ + if (list_empty(&ctx->task_list)) + return; + + spin_lock_irq(&ctx->task_lock); + while (!list_empty(&ctx->task_list)) { + struct io_kiocb *req; + + req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list); + list_del_init(&req->task_list); + req->flags |= REQ_F_CANCEL; + if (req->work_task && (!task || req->task == task)) + send_sig(SIGINT, req->work_task, 1); + } + spin_unlock_irq(&ctx->task_lock); +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); mutex_unlock(&ctx->uring_lock); + io_cancel_async_work(ctx, NULL); io_kill_timeouts(ctx); io_poll_remove_all(ctx); io_iopoll_reap_events(ctx); @@ -3688,6 +3740,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_ring_ctx_free(ctx); } +static int io_uring_flush(struct file *file, void *data) +{ + struct io_ring_ctx *ctx = file->private_data; + + if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) + io_cancel_async_work(ctx, current); + + return 0; +} + static int io_uring_release(struct inode *inode, struct file *file) { struct io_ring_ctx *ctx = file->private_data; @@ -3792,6 +3854,7 @@ out_fput: static const struct file_operations io_uring_fops = { .release = io_uring_release, + .flush = io_uring_flush, .mmap = io_uring_mmap, .poll = io_uring_poll, .fasync = io_uring_fasync, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c1ce2805c563..fa58835668a6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -96,7 +96,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); EXPORT_SYMBOL(jbd2_inode_cache); -static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); #ifdef CONFIG_JBD2_DEBUG @@ -805,7 +804,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, "at offset %lu on %s\n", __func__, blocknr, journal->j_devname); err = -EIO; - __journal_abort_soft(journal, err); + jbd2_journal_abort(journal, err); } } else { *retp = blocknr; /* +journal->j_blk_offset */ @@ -2070,64 +2069,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) return err; } -/* - * Journal abort has very specific semantics, which we describe - * for journal abort. - * - * Two internal functions, which provide abort to the jbd layer - * itself are here. - */ - -/* - * Quick version for internal journal use (doesn't lock the journal). - * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, - * and don't attempt to make any other journal updates. - */ -void __jbd2_journal_abort_hard(journal_t *journal) -{ - transaction_t *transaction; - - if (journal->j_flags & JBD2_ABORT) - return; - - printk(KERN_ERR "Aborting journal on device %s.\n", - journal->j_devname); - - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_ABORT; - transaction = journal->j_running_transaction; - if (transaction) - __jbd2_log_start_commit(journal, transaction->t_tid); - write_unlock(&journal->j_state_lock); -} - -/* Soft abort: record the abort error status in the journal superblock, - * but don't do any other IO. */ -static void __journal_abort_soft (journal_t *journal, int errno) -{ - int old_errno; - - write_lock(&journal->j_state_lock); - old_errno = journal->j_errno; - if (!journal->j_errno || errno == -ESHUTDOWN) - journal->j_errno = errno; - - if (journal->j_flags & JBD2_ABORT) { - write_unlock(&journal->j_state_lock); - if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) - jbd2_journal_update_sb_errno(journal); - return; - } - write_unlock(&journal->j_state_lock); - - __jbd2_journal_abort_hard(journal); - - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); -} - /** * void jbd2_journal_abort () - Shutdown the journal immediately. * @journal: the journal to shutdown. @@ -2171,7 +2112,47 @@ static void __journal_abort_soft (journal_t *journal, int errno) void jbd2_journal_abort(journal_t *journal, int errno) { - __journal_abort_soft(journal, errno); + transaction_t *transaction; + + /* + * ESHUTDOWN always takes precedence because a file system check + * caused by any other journal abort error is not required after + * a shutdown triggered. + */ + write_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_ABORT) { + int old_errno = journal->j_errno; + + write_unlock(&journal->j_state_lock); + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) { + journal->j_errno = errno; + jbd2_journal_update_sb_errno(journal); + } + return; + } + + /* + * Mark the abort as occurred and start current running transaction + * to release all journaled buffer. + */ + pr_err("Aborting journal on device %s.\n", journal->j_devname); + + journal->j_flags |= JBD2_ABORT; + journal->j_errno = errno; + transaction = journal->j_running_transaction; + if (transaction) + __jbd2_log_start_commit(journal, transaction->t_tid); + write_unlock(&journal->j_state_lock); + + /* + * Record errno to the journal super block, so that fsck and jbd2 + * layer could realise that a filesystem check is needed. + */ + jbd2_journal_update_sb_errno(journal); + + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); } /** diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6b0bf4ebd812..70cf8c5760c7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -367,8 +367,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) { struct inode *inode = dreq->inode; - inode_dio_end(inode); - if (dreq->iocb) { long res = (long) dreq->error; if (dreq->count != 0) { @@ -380,7 +378,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) complete(&dreq->completion); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); } static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) @@ -510,8 +511,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; } @@ -923,8 +926,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 95dc90570786..7b3136753205 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp) dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); + inode_dio_wait(inode); nfs_file_clear_open_context(filp); return 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5657b7f2611f..1741d902b0d8 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -984,9 +984,8 @@ retry: goto out_mds; /* Use a direct mapping of ds_idx to pgio mirror_idx */ - if (WARN_ON_ONCE(pgio->pg_mirror_count != - FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) - goto out_mds; + if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)) + goto out_eagain; for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); @@ -1008,7 +1007,10 @@ retry: (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) pgio->pg_maxretrans = io_maxretrans; return; - +out_eagain: + pnfs_generic_pg_cleanup(pgio); + pgio->pg_error = -EAGAIN; + return; out_mds: trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode, 0, NFS4_MAX_UINT64, IOMODE_RW, @@ -1018,6 +1020,7 @@ out_mds: pgio->pg_lseg = NULL; pgio->pg_maxretrans = 0; nfs_pageio_reset_write_mds(pgio); + pgio->pg_error = -EAGAIN; } static unsigned int diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3802c88e8372..6de41f741280 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -826,6 +826,8 @@ int nfs_getattr(const struct path *path, struct kstat *stat, do_update |= cache_validity & NFS_INO_INVALID_ATIME; if (request_mask & (STATX_CTIME|STATX_MTIME)) do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE; + if (request_mask & STATX_BLOCKS) + do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; if (do_update) { /* Update the attribute cache */ if (!(server->flags & NFS_MOUNT_NOAC)) @@ -1750,7 +1752,8 @@ out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_MTIME); + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_BLOCKS); return status; } @@ -1857,7 +1860,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED - | NFS_INO_REVAL_PAGECACHE); + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_BLOCKS); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -2019,8 +2023,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; - else + else { + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_BLOCKS + | NFS_INO_REVAL_FORCED); cache_revalidated = false; + } /* Update attrtimeo value if we're out of the unstable period */ if (attr_changed) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e257653f25ab..1a1bd2fe6e98 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, slot->seq_nr_last_acked = seqnr; } +static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred, + struct nfs4_slot *slot) +{ + struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task *task, goto out; session = slot->table->session; + clp = session->clp; trace_nfs4_sequence_done(session, res); @@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task *task, nfs4_slot_sequence_acked(slot, slot->seq_nr); /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; - clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags, @@ -852,10 +860,18 @@ static int nfs41_sequence_process(struct rpc_task *task, /* * Were one or more calls using this slot interrupted? * If the server never received the request, then our - * transmitted slot sequence number may be too high. + * transmitted slot sequence number may be too high. However, + * if the server did receive the request then it might + * accidentally give us a reply with a mismatched operation. + * We can sort this out by sending a lone sequence operation + * to the server on the same slot. */ if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { slot->seq_nr--; + if (task->tk_msg.rpc_proc != &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE]) { + nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot); + res->sr_slot = NULL; + } goto retry_nowait; } /* @@ -7870,7 +7886,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { - .rpc_call_done = &nfs4_bind_one_conn_to_session_done, + .rpc_call_done = nfs4_bind_one_conn_to_session_done, }; /* diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 10ec5ecdf117..65c331f75e9c 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -78,6 +78,8 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) +int nfsd_drc_slab_create(void); +void nfsd_drc_slab_free(void); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 9a4ef815fb8c..ed53e206a299 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -139,7 +139,6 @@ struct nfsd_net { * Duplicate reply cache */ struct nfsd_drc_bucket *drc_hashtbl; - struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ unsigned int max_drc_entries; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index afca3287184b..efe55d101b0e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1230,6 +1230,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) err = setup_callback_client(clp, &conn, ses); if (err) { nfsd4_mark_cb_down(clp, err); + if (c) + svc_xprt_put(c->cn_xprt); return; } } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8650a97e2ba9..9af9b673f292 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7705,9 +7705,14 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - ret = nfs4_state_create_net(net); + ret = get_nfsdfs(net); if (ret) return ret; + ret = nfs4_state_create_net(net); + if (ret) { + mntput(nn->nfsd_mnt); + return ret; + } locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) @@ -7776,6 +7781,7 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); + mntput(nn->nfsd_mnt); } void diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 96352ab7bd81..4a258065188e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -36,6 +36,8 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; +static struct kmem_cache *drc_slab; + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); @@ -95,7 +97,7 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, { struct svc_cacherep *rp; - rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -129,7 +131,7 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, atomic_dec(&nn->num_drc_entries); nn->drc_mem_usage -= sizeof(*rp); } - kmem_cache_free(nn->drc_slab, rp); + kmem_cache_free(drc_slab, rp); } static void @@ -141,6 +143,18 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, spin_unlock(&b->cache_lock); } +int nfsd_drc_slab_create(void) +{ + drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + return drc_slab ? 0: -ENOMEM; +} + +void nfsd_drc_slab_free(void) +{ + kmem_cache_destroy(drc_slab); +} + int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; @@ -159,18 +173,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) if (status) goto out_nomem; - nn->drc_slab = kmem_cache_create("nfsd_drc", - sizeof(struct svc_cacherep), 0, 0, NULL); - if (!nn->drc_slab) - goto out_shrinker; - nn->drc_hashtbl = kcalloc(hashsize, sizeof(*nn->drc_hashtbl), GFP_KERNEL); if (!nn->drc_hashtbl) { nn->drc_hashtbl = vzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl))); if (!nn->drc_hashtbl) - goto out_slab; + goto out_shrinker; } for (i = 0; i < hashsize; i++) { @@ -180,8 +189,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) nn->drc_hashsize = hashsize; return 0; -out_slab: - kmem_cache_destroy(nn->drc_slab); out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); out_nomem: @@ -209,8 +216,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) nn->drc_hashtbl = NULL; nn->drc_hashsize = 0; - kmem_cache_destroy(nn->drc_slab); - nn->drc_slab = NULL; } /* @@ -464,8 +469,7 @@ found_entry: rtn = RC_REPLY; break; default: - printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp, nn); + WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type); } goto out; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d77c5261c03c..be418fccc9d8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1335,6 +1335,7 @@ void nfsd_client_rmdir(struct dentry *dentry) WARN_ON_ONCE(ret); fsnotify_rmdir(dir, dentry); d_delete(dentry); + dput(dentry); inode_unlock(dir); } @@ -1424,6 +1425,18 @@ static struct file_system_type nfsd_fs_type = { }; MODULE_ALIAS_FS("nfsd"); +int get_nfsdfs(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct vfsmount *mnt; + + mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + nn->nfsd_mnt = mnt; + return 0; +} + #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) { @@ -1452,7 +1465,6 @@ unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; - struct vfsmount *mnt; struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); @@ -1479,16 +1491,8 @@ static __net_init int nfsd_init_net(struct net *net) init_waitqueue_head(&nn->ntf_wq); seqlock_init(&nn->boot_lock); - mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); - if (IS_ERR(mnt)) { - retval = PTR_ERR(mnt); - goto out_mount_err; - } - nn->nfsd_mnt = mnt; return 0; -out_mount_err: - nfsd_reply_cache_shutdown(nn); out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: @@ -1501,7 +1505,6 @@ static __net_exit void nfsd_exit_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - mntput(nn->nfsd_mnt); nfsd_reply_cache_shutdown(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); @@ -1534,6 +1537,9 @@ static int __init init_nfsd(void) goto out_free_slabs; nfsd_fault_inject_init(); /* nfsd fault injection controls */ nfsd_stat_init(); /* Statistics */ + retval = nfsd_drc_slab_create(); + if (retval) + goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1547,6 +1553,8 @@ out_free_all: remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); nfsd4_exit_pnfs(); @@ -1561,6 +1569,7 @@ out_unregister_pernet: static void __exit exit_nfsd(void) { + nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index af2947551e9c..4ff0c5318a02 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -87,6 +87,8 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +int get_nfsdfs(struct net *); + struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); @@ -97,6 +99,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); void nfsd_client_rmdir(struct dentry *dentry); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern const struct svc_version nfsd_acl_version2; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 005d1802ab40..b6f4b552c9af 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1184,6 +1184,9 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode = 0; iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; + if (!IS_POSIXACL(dirp)) + iap->ia_mode &= ~current_umask(); + err = 0; host_err = 0; switch (type) { @@ -1416,6 +1419,9 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } + if (!IS_POSIXACL(dirp)) + iap->ia_mode &= ~current_umask(); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (host_err < 0) { fh_drop_write(fhp); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8a2e284ccfcd..e2c34c704185 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb) +{ + ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + init_rwsem(&osb->nfs_sync_rwlock); +} + void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb) { struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; @@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ex) + down_write(&osb->nfs_sync_rwlock); + else + down_read(&osb->nfs_sync_rwlock); + if (ocfs2_mount_local(osb)) return 0; @@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) if (!ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE); + if (ex) + up_write(&osb->nfs_sync_rwlock); + else + up_read(&osb->nfs_sync_rwlock); } int ocfs2_trim_fs_lock(struct ocfs2_super *osb, @@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); - ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + ocfs2_nfs_sync_lock_init(osb); ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); osb->cconn = conn; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9150cfa4df7d..9461bd3e1c0c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -394,6 +394,7 @@ struct ocfs2_super struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_nfs_sync_lockres; + struct rw_semaphore nfs_sync_rwlock; struct ocfs2_lock_res osb_trim_fs_lockres; struct mutex obs_trim_fs_mutex; struct ocfs2_dlm_debug *osb_dlm_debug; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 0db4a7ec58a2..dcef83c8796d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -290,7 +290,7 @@ #define OCFS2_MAX_SLOTS 255 /* Slot map indicator for an empty slot */ -#define OCFS2_INVALID_SLOT -1 +#define OCFS2_INVALID_SLOT ((u16)-1) #define OCFS2_VOL_UUID_LEN 16 #define OCFS2_MAX_VOL_LABEL_LEN 64 @@ -326,8 +326,8 @@ struct ocfs2_system_inode_info { enum { BAD_BLOCK_SYSTEM_INODE = 0, GLOBAL_INODE_ALLOC_SYSTEM_INODE, +#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE, -#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE HEARTBEAT_SYSTEM_INODE, GLOBAL_BITMAP_SYSTEM_INODE, USER_QUOTA_SYSTEM_INODE, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 69c21a3843af..503e724d39f5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2827,9 +2827,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) goto bail; } - inode_alloc_inode = - ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, - suballoc_slot); + if (suballoc_slot == (u16)OCFS2_INVALID_SLOT) + inode_alloc_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot); + else + inode_alloc_inode = ocfs2_get_system_file_inode(osb, + INODE_ALLOC_SYSTEM_INODE, suballoc_slot); if (!inode_alloc_inode) { /* the error code could be inaccurate, but we are not able to * get the correct one. */ diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 73c9775215b3..11dd8177770d 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -482,7 +482,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, if (IS_ERR_OR_NULL(this)) return this; - if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { + if (ovl_dentry_real_at(this, layer->idx) != real) { dput(this); this = ERR_PTR(-EIO); } diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 15e4fa288475..7a08a576f7b2 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -21,13 +21,16 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) return 'm'; } +/* No atime modificaton nor notify on underlying */ +#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY) + static struct file *ovl_open_realfile(const struct file *file, struct inode *realinode) { struct inode *inode = file_inode(file); struct file *realfile; const struct cred *old_cred; - int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY; + int flags = file->f_flags | OVL_OPEN_FLAGS; old_cred = ovl_override_creds(inode->i_sb); realfile = open_with_fake_path(&file->f_path, flags, realinode, @@ -48,8 +51,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags) struct inode *inode = file_inode(file); int err; - /* No atime modificaton on underlying */ - flags |= O_NOATIME | FMODE_NONOTIFY; + flags |= OVL_OPEN_FLAGS; /* If some flag changed that cannot be changed then something's amiss */ if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK)) @@ -102,7 +104,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real, } /* Did the flags change since open? */ - if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME)) + if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS)) return ovl_change_flags(real->file, file->f_flags); return 0; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7621ff176d15..d6b724beb304 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1258,6 +1258,18 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) if (!ofs->config.nfs_export && !ofs->upper_mnt) return true; + /* + * We allow using single lower with null uuid for index and nfs_export + * for example to support those features with single lower squashfs. + * To avoid regressions in setups of overlay with re-formatted lower + * squashfs, do not allow decoding origin with lower null uuid unless + * user opted-in to one of the new features that require following the + * lower inode of non-dir upper. + */ + if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino && + uuid_is_null(uuid)) + return false; + for (i = 0; i < ofs->numlowerfs; i++) { /* * We use uuid to associate an overlay lower file handle with a @@ -1344,14 +1356,23 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, if (err < 0) goto out; + /* + * Check if lower root conflicts with this overlay layers before + * checking if it is in-use as upperdir/workdir of "another" + * mount, because we do not bother to check in ovl_is_inuse() if + * the upperdir/workdir is in fact in-use by our + * upperdir/workdir. + */ err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); if (err) goto out; if (ovl_is_inuse(stack[i].dentry)) { err = ovl_report_in_use(ofs, "lowerdir"); - if (err) + if (err) { + iput(trap); goto out; + } } mnt = clone_private_mount(&stack[i]); diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 533b04aaf6f6..0a36f532cf86 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2598,6 +2598,13 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) return __this_address; + if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) + return __this_address; + + if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || + be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) + return __this_address; + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || @@ -2609,6 +2616,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return __this_address; + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) + return __this_address; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2623,6 +2634,11 @@ xfs_agf_verify( return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && + be32_to_cpu(agf->agf_refcount_blocks) > + be32_to_cpu(agf->agf_length)) + return __this_address; + + if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) return __this_address; |