diff options
Diffstat (limited to 'fs')
62 files changed, 647 insertions, 507 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 6765949b3aab..380ad5ace7cf 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -169,7 +169,7 @@ static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server) spin_lock(&server->probe_lock); - if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) { + if (!test_and_set_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) { server->cm_epoch = call->epoch; server->probe.cm_epoch = call->epoch; goto out; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5c794f4b051a..d1e1caa23c8b 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1032,7 +1032,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct inode *inode; struct key *key; - afs_dataversion_t dir_version; + afs_dataversion_t dir_version, invalid_before; long de_version; int ret; @@ -1084,8 +1084,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (de_version == (long)dir_version) goto out_valid_noupdate; - dir_version = dir->invalid_before; - if (de_version - (long)dir_version >= 0) + invalid_before = dir->invalid_before; + if (de_version - (long)invalid_before >= 0) goto out_valid; _debug("dir modified"); @@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; mode |= S_IFDIR; @@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error_key; } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, - afs_edit_dir_for_create); + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) + afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, + afs_edit_dir_for_create); + up_write(&dvnode->validate_lock); + } key_put(key); kfree(scb); @@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%pd}", @@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = afs_end_vnode_operation(&fc); if (ret == 0) { afs_dir_remove_subdir(dentry); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_remove(dvnode, &dentry->d_name, afs_edit_dir_for_rmdir); + up_write(&dvnode->validate_lock); } } @@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = afs_end_vnode_operation(&fc); if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) ret = afs_dir_remove_link(dvnode, dentry, key); - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); + } } if (need_rehash && ret < 0 && ret != -ENOENT) @@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; mode |= S_IFREG; @@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_create); + up_write(&dvnode->validate_lock); kfree(scb); key_put(key); @@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%llx:%llu},{%pd}", @@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); @@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, afs_edit_dir_for_link); + up_write(&dvnode->validate_lock); key_put(key); kfree(scb); @@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, struct afs_status_cb *scb; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + afs_dataversion_t data_version; int ret; _enter("{%llx:%llu},{%pd},%s", @@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; + data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == data_version) afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_for_symlink); + up_write(&dvnode->validate_lock); key_put(key); kfree(scb); @@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *tmp = NULL, *rehash = NULL; struct inode *new_inode; struct key *key; + afs_dataversion_t orig_data_version; + afs_dataversion_t new_data_version; bool new_negative = d_is_negative(new_dentry); int ret; @@ -1890,10 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { - afs_dataversion_t orig_data_version; - afs_dataversion_t new_data_version; - struct afs_status_cb *new_scb = &scb[1]; - orig_data_version = orig_dvnode->status.data_version + 1; if (orig_dvnode != new_dvnode) { @@ -1904,7 +1928,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_data_version = new_dvnode->status.data_version + 1; } else { new_data_version = orig_data_version; - new_scb = &scb[0]; } while (afs_select_fileserver(&fc)) { @@ -1912,7 +1935,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, - &scb[0], new_scb); + &scb[0], &scb[1]); } afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break, @@ -1930,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret == 0) { if (rehash) d_rehash(rehash); - if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) - afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, - afs_edit_dir_for_rename_0); + down_write(&orig_dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && + orig_dvnode->status.data_version == orig_data_version) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename_0); + if (orig_dvnode != new_dvnode) { + up_write(&orig_dvnode->validate_lock); - if (!new_negative && - test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) - afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, - afs_edit_dir_for_rename_1); + down_write(&new_dvnode->validate_lock); + } + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) && + orig_dvnode->status.data_version == new_data_version) { + if (!new_negative) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename_1); - if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) afs_edit_dir_add(new_dvnode, &new_dentry->d_name, &vnode->fid, afs_edit_dir_for_rename_2); + } new_inode = d_inode(new_dentry); if (new_inode) { @@ -1957,14 +1987,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, * Note that if we ever implement RENAME_EXCHANGE, we'll have * to update both dentries with opposing dir versions. */ - if (new_dvnode != orig_dvnode) { - afs_update_dentry_version(&fc, old_dentry, &scb[1]); - afs_update_dentry_version(&fc, new_dentry, &scb[1]); - } else { - afs_update_dentry_version(&fc, old_dentry, &scb[0]); - afs_update_dentry_version(&fc, new_dentry, &scb[0]); - } + afs_update_dentry_version(&fc, old_dentry, &scb[1]); + afs_update_dentry_version(&fc, new_dentry, &scb[1]); d_move(old_dentry, new_dentry); + up_write(&new_dvnode->validate_lock); goto error_tmp; } diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 361088a5edb9..d94e2b7cddff 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode { struct afs_fs_cursor fc; struct afs_status_cb *scb; + afs_dataversion_t dir_data_version; int ret = -ERESTARTSYS; _enter("%pd,%pd", old, new); @@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode trace_afs_silly_rename(vnode, false); if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; + dir_data_version = dvnode->status.data_version + 1; while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); @@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode dvnode->silly_key = key_get(key); } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dir_data_version) { afs_edit_dir_remove(dvnode, &old->d_name, afs_edit_dir_for_silly_0); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); + } + up_write(&dvnode->validate_lock); } kfree(scb); @@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + if (ret == 0) { + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dir_data_version) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); + } } kfree(scb); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index e1b9ed679045..a587767b6ae1 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -117,11 +117,8 @@ out: (unsigned int)rtt, ret); have_result |= afs_fs_probe_done(server); - if (have_result) { - server->probe.have_result = true; - wake_up_var(&server->probe.have_result); + if (have_result) wake_up_all(&server->probe_wq); - } } /* diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1f9c5d8e6fe5..68fc46634346 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -65,6 +65,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; + int ret; abort_code = ntohl(xdr->abort_code); @@ -78,7 +79,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, */ status->abort_code = abort_code; scb->have_error = true; - return 0; + goto good; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -87,7 +88,8 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; - return 0; + scb->have_error = true; + goto good; } type = ntohl(xdr->type); @@ -123,13 +125,16 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, data_version |= (u64)ntohl(xdr->data_version_hi) << 32; status->data_version = data_version; scb->have_status = true; - +good: + ret = 0; +advance: *_bp = (const void *)*_bp + sizeof(*xdr); - return 0; + return ret; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + goto advance; } static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) @@ -981,16 +986,16 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ bp = call->buffer; ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } + ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; xdr_decode_AFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ef732dd4e7ef..80255513e230 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -533,12 +533,10 @@ struct afs_server { u32 abort_code; u32 cm_epoch; short error; - bool have_result; bool responded:1; bool is_yfs:1; bool not_yfs:1; bool local_failure:1; - bool no_epoch:1; bool cm_probed:1; bool said_rebooted:1; bool said_inconsistent:1; @@ -1335,7 +1333,7 @@ extern struct afs_volume *afs_create_volume(struct afs_fs_context *); extern void afs_activate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *); extern void afs_put_volume(struct afs_cell *, struct afs_volume *); -extern int afs_check_volume_status(struct afs_volume *, struct key *); +extern int afs_check_volume_status(struct afs_volume *, struct afs_fs_cursor *); /* * write.c diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 172ba569cd60..2a3305e42b14 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -192,7 +192,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) write_unlock(&vnode->volume->servers_lock); set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - error = afs_check_volume_status(vnode->volume, fc->key); + error = afs_check_volume_status(vnode->volume, fc); if (error < 0) goto failed_set_error; @@ -281,7 +281,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - error = afs_check_volume_status(vnode->volume, fc->key); + error = afs_check_volume_status(vnode->volume, fc); if (error < 0) goto failed_set_error; @@ -341,7 +341,7 @@ start: /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ - error = afs_check_volume_status(vnode->volume, fc->key); + error = afs_check_volume_status(vnode->volume, fc); if (error < 0) goto failed_set_error; diff --git a/fs/afs/server.c b/fs/afs/server.c index b7f3cb2130ca..11b90ac7ea30 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -594,12 +594,9 @@ retry: } ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, - TASK_INTERRUPTIBLE); + (fc->flags & AFS_FS_CURSOR_INTR) ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret == -ERESTARTSYS) { - if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) { - _leave(" = t [intr]"); - return true; - } fc->error = ret; _leave(" = f [intr]"); return false; diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 9a5ce9687779..72eacc14e6e1 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -302,8 +302,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) pr_notice("VC: - nr=%u/%u/%u pf=%u\n", a->nr_ipv4, a->nr_addrs, a->max_addrs, a->preferred); - pr_notice("VC: - pr=%lx R=%lx F=%lx\n", - a->probed, a->responded, a->failed); + pr_notice("VC: - R=%lx F=%lx\n", + a->responded, a->failed); if (a == vc->ac.alist) pr_notice("VC: - current\n"); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 92ca5e27573b..4310336b9bb8 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -281,7 +281,7 @@ error: /* * Make sure the volume record is up to date. */ -int afs_check_volume_status(struct afs_volume *volume, struct key *key) +int afs_check_volume_status(struct afs_volume *volume, struct afs_fs_cursor *fc) { time64_t now = ktime_get_real_seconds(); int ret, retries = 0; @@ -299,7 +299,7 @@ retry: } if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { - ret = afs_update_volume_status(volume, key); + ret = afs_update_volume_status(volume, fc->key); clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); @@ -312,7 +312,9 @@ retry: return 0; } - ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE); + ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, + (fc->flags & AFS_FS_CURSOR_INTR) ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret == -ERESTARTSYS) { _leave(" = %d", ret); return ret; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index a26126ac7bf1..b5b45c57e1b1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -165,15 +165,15 @@ static void xdr_dump_bad(const __be32 *bp) int i; pr_notice("YFS XDR: Bad status record\n"); - for (i = 0; i < 5 * 4 * 4; i += 16) { + for (i = 0; i < 6 * 4 * 4; i += 16) { memcpy(x, bp, 16); bp += 4; pr_notice("%03x: %08x %08x %08x %08x\n", i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); } - memcpy(x, bp, 4); - pr_notice("0x50: %08x\n", ntohl(x[0])); + memcpy(x, bp, 8); + pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1])); } /* @@ -186,13 +186,14 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; u32 type; + int ret; status->abort_code = ntohl(xdr->abort_code); if (status->abort_code != 0) { if (status->abort_code == VNOVNODE) status->nlink = 0; scb->have_error = true; - return 0; + goto good; } type = ntohl(xdr->type); @@ -220,13 +221,16 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, status->size = xdr_to_u64(xdr->size); status->data_version = xdr_to_u64(xdr->data_version); scb->have_status = true; - +good: + ret = 0; +advance: *_bp += xdr_size(xdr); - return 0; + return ret; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + goto advance; } /* @@ -1153,11 +1157,9 @@ static int yfs_deliver_fs_rename(struct afs_call *call) ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); if (ret < 0) return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } + ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); + if (ret < 0) + return ret; xdr_decode_YFSVolSync(&bp, call->out_volsync); _leave(" = 0 [done]"); diff --git a/fs/block_dev.c b/fs/block_dev.c index 52b6f646cdbd..93672c3f1c78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -19,7 +19,6 @@ #include <linux/module.h> #include <linux/blkpg.h> #include <linux/magic.h> -#include <linux/dax.h> #include <linux/buffer_head.h> #include <linux/swap.h> #include <linux/pagevec.h> @@ -1893,6 +1892,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; + /* + * Sync early if it looks like we're the last one. If someone else + * opens the block device between now and the decrement of bd_openers + * then we did a sync that we didn't need to, but that's not the end + * of the world and we want to avoid long (could be several minute) + * syncs while holding the mutex. + */ + if (bdev->bd_openers == 1) + sync_blockdev(bdev); + mutex_lock_nested(&bdev->bd_mutex, for_part); if (for_part) bdev->bd_part_count--; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 786849fcc319..47f66c6a7d7f 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3370,6 +3370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) space_info->bytes_reserved > 0 || space_info->bytes_may_use > 0)) btrfs_dump_space_info(info, space_info, 0, 0); + WARN_ON(space_info->reclaim_size > 0); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8a144f9cb7ac..719e68ab552c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2098,6 +2098,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges and races between logging and + * completion of ordered extents for adjancent ranges - both races + * could lead to file extent items in the log with overlapping ranges. + * Do this while holding the inode lock, to avoid races with other + * tasks. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * Before we acquired the inode's lock, someone may have dirtied more * pages in the target range. We need to make sure that writeback for * any such pages does not start while we are logging the inode, because diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index d1973141d3bb..040009d1cc31 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -264,6 +264,7 @@ copy_inline_extent: size); inode_add_bytes(dst, datal); set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); + ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); out: if (!ret && !trans) { /* diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f65595602aa8..d35936c934ab 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -611,8 +611,8 @@ static int should_ignore_root(struct btrfs_root *root) if (!reloc_root) return 0; - if (btrfs_root_last_snapshot(&reloc_root->root_item) == - root->fs_info->running_transaction->transid - 1) + if (btrfs_header_generation(reloc_root->commit_root) == + root->fs_info->running_transaction->transid) return 0; /* * if there is reloc tree and it was created in previous @@ -1527,8 +1527,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, int clear_rsv = 0; int ret; - if (!rc || !rc->create_reloc_tree || - root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + if (!rc) return 0; /* @@ -1538,12 +1537,28 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, if (reloc_root_is_dead(root)) return 0; + /* + * This is subtle but important. We do not do + * record_root_in_transaction for reloc roots, instead we record their + * corresponding fs root, and then here we update the last trans for the + * reloc root. This means that we have to do this for the entire life + * of the reloc root, regardless of which stage of the relocation we are + * in. + */ if (root->reloc_root) { reloc_root = root->reloc_root; reloc_root->last_trans = trans->transid; return 0; } + /* + * We are merging reloc roots, we do not need new reloc trees. Also + * reloc trees never need their own reloc tree. + */ + if (!rc->create_reloc_tree || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + return 0; + if (!trans->reloc_reserved) { rsv = trans->block_rsv; trans->block_rsv = rc->block_rsv; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 8b0fe053a25d..ff17a4420358 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -361,6 +361,16 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; } +static void remove_ticket(struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) +{ + if (!list_empty(&ticket->list)) { + list_del_init(&ticket->list); + ASSERT(space_info->reclaim_size >= ticket->bytes); + space_info->reclaim_size -= ticket->bytes; + } +} + /* * This is for space we already have accounted in space_info->bytes_may_use, so * basically when we're returning space from block_rsv's. @@ -388,9 +398,7 @@ again: btrfs_space_info_update_bytes_may_use(fs_info, space_info, ticket->bytes); - list_del_init(&ticket->list); - ASSERT(space_info->reclaim_size >= ticket->bytes); - space_info->reclaim_size -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; space_info->tickets_id++; wake_up(&ticket->wait); @@ -899,7 +907,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, btrfs_info(fs_info, "failing ticket with %llu bytes", ticket->bytes); - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); ticket->error = -ENOSPC; wake_up(&ticket->wait); @@ -1063,7 +1071,7 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info, * despite getting an error, resulting in a space leak * (bytes_may_use counter of our space_info). */ - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); ticket->error = -EINTR; break; } @@ -1121,7 +1129,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, * either the async reclaim job deletes the ticket from the list * or we delete it ourselves at wait_reserve_ticket(). */ - list_del_init(&ticket->list); + remove_ticket(space_info, ticket); if (!ret) ret = -ENOSPC; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 58c111474ba5..ec36a7c6ba3d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -96,8 +96,8 @@ enum { static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, - u64 start, - u64 end, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -4533,15 +4533,13 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, static int btrfs_log_holes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, - struct btrfs_path *path, - const u64 start, - const u64 end) + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); - u64 prev_extent_end = start; + u64 prev_extent_end = 0; int ret; if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) @@ -4549,21 +4547,14 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = start; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - if (ret > 0 && path->slots[0] > 0) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); - if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) - path->slots[0]--; - } - while (true) { struct extent_buffer *leaf = path->nodes[0]; - u64 extent_end; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); @@ -4580,18 +4571,9 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) break; - extent_end = btrfs_file_extent_end(path); - if (extent_end <= start) - goto next_slot; - /* We have a hole, log it. */ if (prev_extent_end < key.offset) { - u64 hole_len; - - if (key.offset >= end) - hole_len = end - prev_extent_end; - else - hole_len = key.offset - prev_extent_end; + const u64 hole_len = key.offset - prev_extent_end; /* * Release the path to avoid deadlocks with other code @@ -4621,20 +4603,16 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; } - prev_extent_end = min(extent_end, end); - if (extent_end >= end) - break; -next_slot: + prev_extent_end = btrfs_file_extent_end(path); path->slots[0]++; cond_resched(); } - if (prev_extent_end < end && prev_extent_end < i_size) { + if (prev_extent_end < i_size) { u64 hole_len; btrfs_release_path(path); - hole_len = min(ALIGN(i_size, fs_info->sectorsize), end); - hole_len -= prev_extent_end; + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); ret = btrfs_insert_file_extent(trans, root->log_root, ino, prev_extent_end, 0, 0, hole_len, 0, hole_len, @@ -4971,8 +4949,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, const u64 logged_isize, const bool recursive_logging, const int inode_only, - const u64 start, - const u64 end, struct btrfs_log_ctx *ctx, bool *need_log_inode_item) { @@ -4981,21 +4957,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, int ins_nr = 0; int ret; - /* - * We must make sure we don't copy extent items that are entirely out of - * the range [start, end - 1]. This is not just an optimization to avoid - * copying but also needed to avoid a corruption where we end up with - * file extent items in the log tree that have overlapping ranges - this - * can happen if we race with ordered extent completion for ranges that - * are outside our target range. For example we copy an extent item and - * when we move to the next leaf, that extent was trimmed and a new one - * covering a subrange of it, but with a higher key, was inserted - we - * would then copy this other extent too, resulting in a log tree with - * 2 extent items that represent overlapping ranges. - * - * We can copy the entire extents at the range bondaries however, even - * if they cover an area outside the target range. That's ok. - */ while (1) { ret = btrfs_search_forward(root, min_key, path, trans->transid); if (ret < 0) @@ -5063,29 +5024,6 @@ again: goto next_slot; } - if (min_key->type == BTRFS_EXTENT_DATA_KEY) { - const u64 extent_end = btrfs_file_extent_end(path); - - if (extent_end <= start) { - if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, - path, ins_start_slot, - ins_nr, inode_only, - logged_isize); - if (ret < 0) - return ret; - ins_nr = 0; - } - goto next_slot; - } - if (extent_end >= end) { - ins_nr++; - if (ins_nr == 1) - ins_start_slot = path->slots[0]; - break; - } - } - if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; goto next_slot; @@ -5151,8 +5089,8 @@ next_key: static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, int inode_only, - u64 start, - u64 end, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -5180,9 +5118,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return -ENOMEM; } - start = ALIGN_DOWN(start, fs_info->sectorsize); - end = ALIGN(end, fs_info->sectorsize); - min_key.objectid = ino; min_key.type = BTRFS_INODE_ITEM_KEY; min_key.offset = 0; @@ -5298,8 +5233,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, err = copy_inode_items_to_log(trans, inode, &min_key, &max_key, path, dst_path, logged_isize, - recursive_logging, inode_only, - start, end, ctx, &need_log_inode_item); + recursive_logging, inode_only, ctx, + &need_log_inode_item); if (err) goto out_unlock; @@ -5312,7 +5247,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_holes(trans, root, inode, path, start, end); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } diff --git a/fs/buffer.c b/fs/buffer.c index f73276d746bb..a60f60396cfa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -967,7 +967,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; sector_t end_block; - int ret = 0; /* Will call free_more_memory() */ + int ret = 0; gfp_t gfp_mask; gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; @@ -1371,6 +1371,17 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__breadahead); +void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, + gfp_t gfp) +{ + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); + if (likely(bh)) { + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + brelse(bh); + } +} +EXPORT_SYMBOL(__breadahead_gfp); + /** * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d594c2627430..4c4202c93b71 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1051,8 +1051,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, /* If op failed, mark everyone involved for errors */ if (result) { - int pathlen; - u64 base; + int pathlen = 0; + u64 base = 0; char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &base, 0); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4a5ccbb7e808..afdfca965a7f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -527,8 +527,8 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, if (result) { struct dentry *dentry = req->r_dentry; - int pathlen; - u64 base; + int pathlen = 0; + u64 base = 0; char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &base, 0); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e5be79bf080..903d9edfd4bf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -521,7 +521,7 @@ extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); static inline void ceph_mdsc_free_path(char *path, int len) { - if (path) + if (!IS_ERR_OR_NULL(path)) __putname(path - (PATH_MAX - 1 - len)); } diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 22cf04fb32d3..604f65f4b6c5 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -202,7 +202,7 @@ config CIFS_SMB_DIRECT help Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, - say N. + say Y. config CIFS_FSCACHE bool "Provide CIFS client caching support" diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 276e4b5ea8e0..916567d770f5 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -323,10 +323,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) atomic_read(&server->smbd_conn->send_credits), atomic_read(&server->smbd_conn->receive_credits), server->smbd_conn->receive_credit_target); - seq_printf(m, "\nPending send_pending: %x " - "send_payload_pending: %x", - atomic_read(&server->smbd_conn->send_pending), - atomic_read(&server->smbd_conn->send_payload_pending)); + seq_printf(m, "\nPending send_pending: %x ", + atomic_read(&server->smbd_conn->send_pending)); seq_printf(m, "\nReceive buffers count_receive_queue: %x " "count_empty_packet_queue: %x", server->smbd_conn->count_receive_queue, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 94e3ed4850b5..c31f362fa098 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1208,6 +1208,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, { unsigned int xid = get_xid(); ssize_t rc; + struct cifsFileInfo *cfile = dst_file->private_data; + + if (cfile->swapfile) + return -EOPNOTSUPP; rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0d956360e984..05dd3dea684b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -426,7 +426,8 @@ struct smb_version_operations { /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *); - int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *); + int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, + bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, @@ -1312,6 +1313,7 @@ struct cifsFileInfo { struct tcon_link *tlink; unsigned int f_flags; bool invalidHandle:1; /* file closed via session abend */ + bool swapfile:1; bool oplock_break_cancelled:1; unsigned int oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 140efc1a9374..182b864b3075 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -594,6 +594,8 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) cifs_max_pending); set_credits(server, server->maxReq); server->maxBuf = le16_to_cpu(rsp->MaxBufSize); + /* set up max_read for readpages check */ + server->max_read = server->maxBuf; /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { @@ -755,6 +757,8 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) set_credits(server, server->maxReq); /* probably no need to store and check maxvcs */ server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); + /* set up max_read for readpages check */ + server->max_read = server->maxBuf; server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); cifs_dbg(NOISY, "Max buf = %d\n", ses->server->maxBuf); server->capabilities = le32_to_cpu(pSMBr->Capabilities); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5920820bfbd0..0b1528edebcf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4808,6 +4808,60 @@ cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) return -EINVAL; } +static int cifs_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) +{ + struct cifsFileInfo *cfile = swap_file->private_data; + struct inode *inode = swap_file->f_mapping->host; + unsigned long blocks; + long long isize; + + cifs_dbg(FYI, "swap activate\n"); + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } + *span = sis->pages; + + printk_once(KERN_WARNING "Swap support over SMB3 is experimental\n"); + + /* + * TODO: consider adding ACL (or documenting how) to prevent other + * users (on this or other systems) from reading it + */ + + + /* TODO: add sk_set_memalloc(inet) or similar */ + + if (cfile) + cfile->swapfile = true; + /* + * TODO: Since file already open, we can't open with DENY_ALL here + * but we could add call to grab a byte range lock to prevent others + * from reading or writing the file + */ + + return 0; +} + +static void cifs_swap_deactivate(struct file *file) +{ + struct cifsFileInfo *cfile = file->private_data; + + cifs_dbg(FYI, "swap deactivate\n"); + + /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ + + if (cfile) + cfile->swapfile = false; + + /* do we need to unpin (or unlock) the file */ +} const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, @@ -4821,6 +4875,13 @@ const struct address_space_operations cifs_addr_ops = { .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, + /* + * TODO: investigate and if useful we could add an cifs_migratePage + * helper (under an CONFIG_MIGRATION) in the future, and also + * investigate and add an is_dirty_writeback helper if needed + */ + .swap_activate = cifs_swap_activate, + .swap_deactivate = cifs_swap_deactivate, }; /* diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 8d01ec2dca66..390d2b15ef6e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -61,7 +61,7 @@ static void cifs_set_ops(struct inode *inode) } /* check if server can support readpages */ - if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf < + if (cifs_sb_master_tcon(cifs_sb)->ses->server->max_read < PAGE_SIZE + MAX_CIFS_HDR_SIZE) inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else @@ -2026,6 +2026,10 @@ cifs_revalidate_mapping(struct inode *inode) int rc; unsigned long *flags = &CIFS_I(inode)->flags; + /* swapfiles are not supposed to be shared */ + if (IS_SWAPFILE(inode)) + return 0; + rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, TASK_KILLABLE); if (rc) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 19e4a5d3b4ca..50f776a8d4ba 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -246,7 +246,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, */ fattr->cf_mode = le32_to_cpu(info->Mode) & ~S_IFMT; - cifs_dbg(VFS, "XXX dev %d, reparse %d, mode %o", + cifs_dbg(FYI, "posix fattr: dev %d, reparse %d, mode %o", le32_to_cpu(info->DeviceId), le32_to_cpu(info->ReparseTag), le32_to_cpu(info->Mode)); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 0511aaf451d4..497afb0b9960 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -766,6 +766,20 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); + if (tcon->tc_count <= 0) { + struct TCP_Server_Info *server = NULL; + + WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative"); + spin_unlock(&cifs_tcp_ses_lock); + + if (tcon->ses) + server = tcon->ses->server; + + cifs_server_dbg(FYI, "tid=%u: tcon is closing, skipping async close retry of fid %llu %llu\n", + tcon->tid, persistent_fid, volatile_fid); + + return 0; + } tcon->tc_count++; spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 47d3e382ecaa..b30aa3cdd845 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1552,6 +1552,21 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) } rc = SMB2_sess_establish_session(sess_data); +#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS + if (ses->server->dialect < SMB30_PROT_ID) { + cifs_dbg(VFS, "%s: dumping generated SMB2 session keys\n", __func__); + /* + * The session id is opaque in terms of endianness, so we can't + * print it as a long long. we dump it as we got it on the wire + */ + cifs_dbg(VFS, "Session Id %*ph\n", (int)sizeof(ses->Suid), + &ses->Suid); + cifs_dbg(VFS, "Session Key %*ph\n", + SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response); + cifs_dbg(VFS, "Signing Key %*ph\n", + SMB3_SIGN_KEY_SIZE, ses->auth_key.response); + } +#endif out: kfree(ntlmssp_blob); SMB2_sess_free_buffer(sess_data); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 4d1ff7b66fdc..087d5f14320b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -55,9 +55,11 @@ extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server); + struct TCP_Server_Info *server, + bool allocate_crypto); extern int smb3_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server); + struct TCP_Server_Info *server, + bool allocate_crypto); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); extern bool smb2_is_valid_oplock_break(char *buffer, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 20cc79e5c15d..c0348e3b1695 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -41,14 +41,6 @@ #include "smb2glob.h" static int -smb2_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - return cifs_alloc_hash("hmac(sha256)", - &server->secmech.hmacsha256, - &server->secmech.sdeschmacsha256); -} - -static int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; @@ -219,7 +211,8 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) } int -smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) +smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + bool allocate_crypto) { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; @@ -228,6 +221,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; struct shash_desc *shash; + struct crypto_shash *hash; + struct sdesc *sdesc = NULL; struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); @@ -239,24 +234,32 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - rc = smb2_crypto_shash_allocate(server); - if (rc) { - cifs_server_dbg(VFS, "%s: sha256 alloc failed\n", __func__); - return rc; + if (allocate_crypto) { + rc = cifs_alloc_hash("hmac(sha256)", &hash, &sdesc); + if (rc) { + cifs_server_dbg(VFS, + "%s: sha256 alloc failed\n", __func__); + return rc; + } + shash = &sdesc->shash; + } else { + hash = server->secmech.hmacsha256; + shash = &server->secmech.sdeschmacsha256->shash; } - rc = crypto_shash_setkey(server->secmech.hmacsha256, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + rc = crypto_shash_setkey(hash, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with response\n", __func__); - return rc; + cifs_server_dbg(VFS, + "%s: Could not update with response\n", + __func__); + goto out; } - shash = &server->secmech.sdeschmacsha256->shash; rc = crypto_shash_init(shash); if (rc) { cifs_server_dbg(VFS, "%s: Could not init sha256", __func__); - return rc; + goto out; } /* @@ -271,9 +274,10 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_update(shash, iov[0].iov_base, iov[0].iov_len); if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with payload\n", - __func__); - return rc; + cifs_server_dbg(VFS, + "%s: Could not update with payload\n", + __func__); + goto out; } drqst.rq_iov++; drqst.rq_nvec--; @@ -283,6 +287,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); +out: + if (allocate_crypto) + cifs_free_hash(&hash, &sdesc); return rc; } @@ -504,14 +511,17 @@ generate_smb311signingkey(struct cifs_ses *ses) } int -smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) +smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + bool allocate_crypto) { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; - struct shash_desc *shash = &server->secmech.sdesccmacaes->shash; + struct shash_desc *shash; + struct crypto_shash *hash; + struct sdesc *sdesc = NULL; struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; @@ -519,14 +529,24 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (rc) return 0; + if (allocate_crypto) { + rc = cifs_alloc_hash("cmac(aes)", &hash, &sdesc); + if (rc) + return rc; + + shash = &sdesc->shash; + } else { + hash = server->secmech.cmacaes; + shash = &server->secmech.sdesccmacaes->shash; + } + memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - rc = crypto_shash_setkey(server->secmech.cmacaes, - key, SMB2_CMACAES_SIZE); + rc = crypto_shash_setkey(hash, key, SMB2_CMACAES_SIZE); if (rc) { cifs_server_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); - return rc; + goto out; } /* @@ -537,7 +557,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_init(shash); if (rc) { cifs_server_dbg(VFS, "%s: Could not init cmac aes\n", __func__); - return rc; + goto out; } /* @@ -554,7 +574,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (rc) { cifs_server_dbg(VFS, "%s: Could not update with payload\n", __func__); - return rc; + goto out; } drqst.rq_iov++; drqst.rq_nvec--; @@ -564,6 +584,9 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); +out: + if (allocate_crypto) + cifs_free_hash(&hash, &sdesc); return rc; } @@ -593,7 +616,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) return 0; } - rc = server->ops->calc_signature(rqst, server); + rc = server->ops->calc_signature(rqst, server, false); return rc; } @@ -631,16 +654,14 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE); - mutex_lock(&server->srv_mutex); - rc = server->ops->calc_signature(rqst, server); - mutex_unlock(&server->srv_mutex); + rc = server->ops->calc_signature(rqst, server, true); if (rc) return rc; if (memcmp(server_response_sig, shdr->Signature, SMB2_SIGNATURE_SIZE)) { - dump_stack(); - cifs_dbg(VFS, "sign fail cmd 0x%x message id 0x%llx\n", shdr->Command, shdr->MessageId); + cifs_dbg(VFS, "sign fail cmd 0x%x message id 0x%llx\n", + shdr->Command, shdr->MessageId); return -EACCES; } else return 0; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 8da43a500686..1a5834a5d597 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -284,13 +284,10 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].length, DMA_TO_DEVICE); - if (request->has_payload) { - if (atomic_dec_and_test(&request->info->send_payload_pending)) - wake_up(&request->info->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&request->info->send_pending)) - wake_up(&request->info->wait_send_pending); - } + if (atomic_dec_and_test(&request->info->send_pending)) + wake_up(&request->info->wait_send_pending); + + wake_up(&request->info->wait_post_send); mempool_free(request, request->info->request_mempool); } @@ -383,27 +380,6 @@ static bool process_negotiation_response( return true; } -/* - * Check and schedule to send an immediate packet - * This is used to extend credtis to remote peer to keep the transport busy - */ -static void check_and_send_immediate(struct smbd_connection *info) -{ - if (info->transport_status != SMBD_CONNECTED) - return; - - info->send_immediate = true; - - /* - * Promptly send a packet if our peer is running low on receive - * credits - */ - if (atomic_read(&info->receive_credits) < - info->receive_credit_target - 1) - queue_delayed_work( - info->workqueue, &info->send_immediate_work, 0); -} - static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; @@ -453,10 +429,16 @@ static void smbd_post_send_credits(struct work_struct *work) info->new_credits_offered += ret; spin_unlock(&info->lock_new_credits_offered); - atomic_add(ret, &info->receive_credits); - - /* Check if we can post new receive and grant credits to peer */ - check_and_send_immediate(info); + /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ + info->send_immediate = true; + if (atomic_read(&info->receive_credits) < + info->receive_credit_target - 1) { + if (info->keep_alive_requested == KEEP_ALIVE_PENDING || + info->send_immediate) { + log_keep_alive(INFO, "send an empty message\n"); + smbd_post_send_empty(info); + } + } } /* Called from softirq, when recv is done */ @@ -551,12 +533,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->keep_alive_requested = KEEP_ALIVE_PENDING; } - /* - * Check if we need to send something to remote peer to - * grant more credits or respond to KEEP_ALIVE packet - */ - check_and_send_immediate(info); - return; default: @@ -749,7 +725,6 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].addr, request->sge[0].length, request->sge[0].lkey); - request->has_payload = false; atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, NULL); if (!rc) @@ -806,45 +781,96 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) return 0; } -/* - * Build and prepare the SMBD packet header - * This function waits for avaialbe send credits and build a SMBD packet - * header. The caller then optional append payload to the packet after - * the header - * intput values - * size: the size of the payload - * remaining_data_length: remaining data to send if this is part of a - * fragmented packet - * output values - * request_out: the request allocated from this function - * return values: 0 on success, otherwise actual error code returned - */ -static int smbd_create_header(struct smbd_connection *info, - int size, int remaining_data_length, - struct smbd_request **request_out) +/* Post the send request */ +static int smbd_post_send(struct smbd_connection *info, + struct smbd_request *request) +{ + struct ib_send_wr send_wr; + int rc, i; + + for (i = 0; i < request->num_sge; i++) { + log_rdma_send(INFO, + "rdma_request sge[%d] addr=%llu length=%u\n", + i, request->sge[i].addr, request->sge[i].length); + ib_dma_sync_single_for_device( + info->id->device, + request->sge[i].addr, + request->sge[i].length, + DMA_TO_DEVICE); + } + + request->cqe.done = send_done; + + send_wr.next = NULL; + send_wr.wr_cqe = &request->cqe; + send_wr.sg_list = request->sge; + send_wr.num_sge = request->num_sge; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + rc = ib_post_send(info->id->qp, &send_wr, NULL); + if (rc) { + log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); + smbd_disconnect_rdma_connection(info); + rc = -EAGAIN; + } else + /* Reset timer for idle connection after packet is sent */ + mod_delayed_work(info->workqueue, &info->idle_timer_work, + info->keep_alive_interval*HZ); + + return rc; +} + +static int smbd_post_send_sgl(struct smbd_connection *info, + struct scatterlist *sgl, int data_length, int remaining_data_length) { + int num_sgs; + int i, rc; + int header_length; struct smbd_request *request; struct smbd_data_transfer *packet; - int header_length; - int rc; + int new_credits; + struct scatterlist *sg; +wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(info->wait_send_queue, atomic_read(&info->send_credits) > 0 || info->transport_status != SMBD_CONNECTED); if (rc) - return rc; + goto err_wait_credit; + + if (info->transport_status != SMBD_CONNECTED) { + log_outgoing(ERR, "disconnected not sending on wait_credit\n"); + rc = -EAGAIN; + goto err_wait_credit; + } + if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { + atomic_inc(&info->send_credits); + goto wait_credit; + } + +wait_send_queue: + wait_event(info->wait_post_send, + atomic_read(&info->send_pending) < info->send_credit_target || + info->transport_status != SMBD_CONNECTED); if (info->transport_status != SMBD_CONNECTED) { - log_outgoing(ERR, "disconnected not sending\n"); - return -EAGAIN; + log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); + rc = -EAGAIN; + goto err_wait_send_queue; + } + + if (unlikely(atomic_inc_return(&info->send_pending) > + info->send_credit_target)) { + atomic_dec(&info->send_pending); + goto wait_send_queue; } - atomic_dec(&info->send_credits); request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) { rc = -ENOMEM; - goto err; + goto err_alloc; } request->info = info; @@ -852,8 +878,11 @@ static int smbd_create_header(struct smbd_connection *info, /* Fill in the packet header */ packet = smbd_request_payload(request); packet->credits_requested = cpu_to_le16(info->send_credit_target); - packet->credits_granted = - cpu_to_le16(manage_credits_prior_sending(info)); + + new_credits = manage_credits_prior_sending(info); + atomic_add(new_credits, &info->receive_credits); + packet->credits_granted = cpu_to_le16(new_credits); + info->send_immediate = false; packet->flags = 0; @@ -861,11 +890,11 @@ static int smbd_create_header(struct smbd_connection *info, packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); packet->reserved = 0; - if (!size) + if (!data_length) packet->data_offset = 0; else packet->data_offset = cpu_to_le32(24); - packet->data_length = cpu_to_le32(size); + packet->data_length = cpu_to_le32(data_length); packet->remaining_data_length = cpu_to_le32(remaining_data_length); packet->padding = 0; @@ -880,7 +909,7 @@ static int smbd_create_header(struct smbd_connection *info, /* Map the packet to DMA */ header_length = sizeof(struct smbd_data_transfer); /* If this is a packet without payload, don't send padding */ - if (!size) + if (!data_length) header_length = offsetof(struct smbd_data_transfer, padding); request->num_sge = 1; @@ -889,102 +918,15 @@ static int smbd_create_header(struct smbd_connection *info, header_length, DMA_TO_DEVICE); if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { - mempool_free(request, info->request_mempool); rc = -EIO; - goto err; + request->sge[0].addr = 0; + goto err_dma; } request->sge[0].length = header_length; request->sge[0].lkey = info->pd->local_dma_lkey; - *request_out = request; - return 0; - -err: - atomic_inc(&info->send_credits); - return rc; -} - -static void smbd_destroy_header(struct smbd_connection *info, - struct smbd_request *request) -{ - - ib_dma_unmap_single(info->id->device, - request->sge[0].addr, - request->sge[0].length, - DMA_TO_DEVICE); - mempool_free(request, info->request_mempool); - atomic_inc(&info->send_credits); -} - -/* Post the send request */ -static int smbd_post_send(struct smbd_connection *info, - struct smbd_request *request, bool has_payload) -{ - struct ib_send_wr send_wr; - int rc, i; - - for (i = 0; i < request->num_sge; i++) { - log_rdma_send(INFO, - "rdma_request sge[%d] addr=%llu length=%u\n", - i, request->sge[i].addr, request->sge[i].length); - ib_dma_sync_single_for_device( - info->id->device, - request->sge[i].addr, - request->sge[i].length, - DMA_TO_DEVICE); - } - - request->cqe.done = send_done; - - send_wr.next = NULL; - send_wr.wr_cqe = &request->cqe; - send_wr.sg_list = request->sge; - send_wr.num_sge = request->num_sge; - send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; - - if (has_payload) { - request->has_payload = true; - atomic_inc(&info->send_payload_pending); - } else { - request->has_payload = false; - atomic_inc(&info->send_pending); - } - - rc = ib_post_send(info->id->qp, &send_wr, NULL); - if (rc) { - log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - if (has_payload) { - if (atomic_dec_and_test(&info->send_payload_pending)) - wake_up(&info->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); - } - smbd_disconnect_rdma_connection(info); - rc = -EAGAIN; - } else - /* Reset timer for idle connection after packet is sent */ - mod_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); - - return rc; -} - -static int smbd_post_send_sgl(struct smbd_connection *info, - struct scatterlist *sgl, int data_length, int remaining_data_length) -{ - int num_sgs; - int i, rc; - struct smbd_request *request; - struct scatterlist *sg; - - rc = smbd_create_header( - info, data_length, remaining_data_length, &request); - if (rc) - return rc; - + /* Fill in the packet data payload */ num_sgs = sgl ? sg_nents(sgl) : 0; for_each_sg(sgl, sg, num_sgs, i) { request->sge[i+1].addr = @@ -994,25 +936,41 @@ static int smbd_post_send_sgl(struct smbd_connection *info, info->id->device, request->sge[i+1].addr)) { rc = -EIO; request->sge[i+1].addr = 0; - goto dma_mapping_failure; + goto err_dma; } request->sge[i+1].length = sg->length; request->sge[i+1].lkey = info->pd->local_dma_lkey; request->num_sge++; } - rc = smbd_post_send(info, request, data_length); + rc = smbd_post_send(info, request); if (!rc) return 0; -dma_mapping_failure: - for (i = 1; i < request->num_sge; i++) +err_dma: + for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) ib_dma_unmap_single(info->id->device, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); - smbd_destroy_header(info, request); + mempool_free(request, info->request_mempool); + + /* roll back receive credits and credits to be offered */ + spin_lock(&info->lock_new_credits_offered); + info->new_credits_offered += new_credits; + spin_unlock(&info->lock_new_credits_offered); + atomic_sub(new_credits, &info->receive_credits); + +err_alloc: + if (atomic_dec_and_test(&info->send_pending)) + wake_up(&info->wait_send_pending); + +err_wait_send_queue: + /* roll back send credits and pending */ + atomic_inc(&info->send_credits); + +err_wait_credit: return rc; } @@ -1334,25 +1292,6 @@ static void destroy_receive_buffers(struct smbd_connection *info) mempool_free(response, info->response_mempool); } -/* - * Check and send an immediate or keep alive packet - * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1 - * Connection.KeepaliveRequested and Connection.SendImmediate - * The idea is to extend credits to server as soon as it becomes available - */ -static void send_immediate_work(struct work_struct *work) -{ - struct smbd_connection *info = container_of( - work, struct smbd_connection, - send_immediate_work.work); - - if (info->keep_alive_requested == KEEP_ALIVE_PENDING || - info->send_immediate) { - log_keep_alive(INFO, "send an empty message\n"); - smbd_post_send_empty(info); - } -} - /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ static void idle_connection_timer(struct work_struct *work) { @@ -1407,14 +1346,10 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); - log_rdma_event(INFO, "cancelling send immediate work\n"); - cancel_delayed_work_sync(&info->send_immediate_work); log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); wait_event(info->wait_send_pending, atomic_read(&info->send_pending) == 0); - wait_event(info->wait_send_payload_pending, - atomic_read(&info->send_payload_pending) == 0); /* It's not posssible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); @@ -1744,15 +1679,13 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->wait_send_queue); INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); - INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work); queue_delayed_work(info->workqueue, &info->idle_timer_work, info->keep_alive_interval*HZ); init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); - init_waitqueue_head(&info->wait_send_payload_pending); - atomic_set(&info->send_payload_pending, 0); + init_waitqueue_head(&info->wait_post_send); INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); @@ -2226,8 +2159,8 @@ done: * that means all the I/Os have been out and we are good to return */ - wait_event(info->wait_send_payload_pending, - atomic_read(&info->send_payload_pending) == 0); + wait_event(info->wait_send_pending, + atomic_read(&info->send_pending) == 0); return rc; } diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 8ede915f2b24..a87fca82a796 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -114,8 +114,7 @@ struct smbd_connection { /* Activity accoutning */ atomic_t send_pending; wait_queue_head_t wait_send_pending; - atomic_t send_payload_pending; - wait_queue_head_t wait_send_payload_pending; + wait_queue_head_t wait_post_send; /* Receive queue */ struct list_head receive_queue; @@ -154,7 +153,6 @@ struct smbd_connection { struct workqueue_struct *workqueue; struct delayed_work idle_timer_work; - struct delayed_work send_immediate_work; /* Memory pool for preallocating buffers */ /* request pool for RDMA send */ @@ -234,9 +232,6 @@ struct smbd_request { struct smbd_connection *info; struct ib_cqe cqe; - /* true if this request carries upper layer payload */ - bool has_payload; - /* the SGE entries for this packet */ struct ib_sge sge[SMBDIRECT_MAX_SGE]; int num_sge; diff --git a/fs/coredump.c b/fs/coredump.c index f8296a82d01d..408418e6aa13 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -211,6 +211,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, return -ENOMEM; (*argv)[(*argc)++] = 0; ++pat_ptr; + if (!(*pat_ptr)) + return -ENOMEM; } /* Repeat as long as we have more pattern to process and more output diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 6a04cc02565a..6774a5a6ded8 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -91,7 +91,6 @@ static int exfat_allocate_bitmap(struct super_block *sb, } } - sbi->pbr_bh = NULL; return 0; } @@ -137,8 +136,6 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi) { int i; - brelse(sbi->pbr_bh); - for (i = 0; i < sbi->map_sectors; i++) __brelse(sbi->vol_amap[i]); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 67d4e46fb810..d67fb8a6f770 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -507,6 +507,7 @@ void exfat_msg(struct super_block *sb, const char *lv, const char *fmt, ...) __printf(3, 4) __cold; void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 tz, __le16 time, __le16 date, u8 time_ms); +void exfat_truncate_atime(struct timespec64 *ts); void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 *tz, __le16 *time, __le16 *date, u8 *time_ms); unsigned short exfat_calc_chksum_2byte(void *data, int len, diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 483f683757aa..4f76764165cf 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -273,6 +273,7 @@ int exfat_getattr(const struct path *path, struct kstat *stat, struct exfat_inode_info *ei = EXFAT_I(inode); generic_fillattr(inode, stat); + exfat_truncate_atime(&stat->atime); stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = ei->i_crtime.tv_sec; stat->btime.tv_nsec = ei->i_crtime.tv_nsec; @@ -339,6 +340,7 @@ int exfat_setattr(struct dentry *dentry, struct iattr *attr) } setattr_copy(inode, attr); + exfat_truncate_atime(&inode->i_atime); mark_inode_dirty(inode); out: diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c index 14a3300848f6..ebd2cbe3cbc1 100644 --- a/fs/exfat/misc.c +++ b/fs/exfat/misc.c @@ -88,7 +88,8 @@ void exfat_get_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, if (time_ms) { ts->tv_sec += time_ms / 100; ts->tv_nsec = (time_ms % 100) * 10 * NSEC_PER_MSEC; - } + } else + ts->tv_nsec = 0; if (tz & EXFAT_TZ_VALID) /* Adjust timezone to UTC0. */ @@ -124,6 +125,17 @@ void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, *tz = EXFAT_TZ_VALID; } +/* + * The timestamp for access_time has double seconds granularity. + * (There is no 10msIncrement field for access_time unlike create/modify_time) + * atime also has only a 2-second resolution. + */ +void exfat_truncate_atime(struct timespec64 *ts) +{ + ts->tv_sec = round_down(ts->tv_sec, 2); + ts->tv_nsec = 0; +} + unsigned short exfat_calc_chksum_2byte(void *data, int len, unsigned short chksum, int type) { diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index a8681d91f569..b72d782568b8 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -595,6 +595,7 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode_inc_iversion(inode); inode->i_mtime = inode->i_atime = inode->i_ctime = EXFAT_I(inode)->i_crtime = current_time(inode); + exfat_truncate_atime(&inode->i_atime); /* timestamp is already written, so mark_inode_dirty() is unneeded. */ d_instantiate(dentry, inode); @@ -854,6 +855,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); + exfat_truncate_atime(&dir->i_atime); if (IS_DIRSYNC(dir)) exfat_sync_inode(dir); else @@ -861,6 +863,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); + exfat_truncate_atime(&inode->i_atime); exfat_unhash_inode(inode); exfat_d_version_set(dentry, inode_query_iversion(dir)); unlock: @@ -903,6 +906,7 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode_inc_iversion(inode); inode->i_mtime = inode->i_atime = inode->i_ctime = EXFAT_I(inode)->i_crtime = current_time(inode); + exfat_truncate_atime(&inode->i_atime); /* timestamp is already written, so mark_inode_dirty() is unneeded. */ d_instantiate(dentry, inode); @@ -1019,6 +1023,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); + exfat_truncate_atime(&dir->i_atime); if (IS_DIRSYNC(dir)) exfat_sync_inode(dir); else @@ -1027,6 +1032,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); + exfat_truncate_atime(&inode->i_atime); exfat_unhash_inode(inode); exfat_d_version_set(dentry, inode_query_iversion(dir)); unlock: @@ -1387,6 +1393,7 @@ static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry, inode_inc_iversion(new_dir); new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime = EXFAT_I(new_dir)->i_crtime = current_time(new_dir); + exfat_truncate_atime(&new_dir->i_atime); if (IS_DIRSYNC(new_dir)) exfat_sync_inode(new_dir); else diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 16ed202ef527..0565d5539d57 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -49,6 +49,7 @@ static void exfat_put_super(struct super_block *sb) sync_blockdev(sb->s_bdev); exfat_set_vol_flags(sb, VOL_CLEAN); exfat_free_bitmap(sbi); + brelse(sbi->pbr_bh); mutex_unlock(&sbi->s_lock); call_rcu(&sbi->rcu, exfat_delayed_free); @@ -100,7 +101,7 @@ static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf) int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) { struct exfat_sb_info *sbi = EXFAT_SB(sb); - struct pbr64 *bpb; + struct pbr64 *bpb = (struct pbr64 *)sbi->pbr_bh->b_data; bool sync = 0; /* flags are not changed */ @@ -115,15 +116,6 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) if (sb_rdonly(sb)) return 0; - if (!sbi->pbr_bh) { - sbi->pbr_bh = sb_bread(sb, 0); - if (!sbi->pbr_bh) { - exfat_msg(sb, KERN_ERR, "failed to read boot sector"); - return -ENOMEM; - } - } - - bpb = (struct pbr64 *)sbi->pbr_bh->b_data; bpb->bsx.vol_flags = cpu_to_le16(new_flag); if (new_flag == VOL_DIRTY && !buffer_dirty(sbi->pbr_bh)) @@ -159,7 +151,6 @@ static int exfat_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",iocharset=utf8"); else if (sbi->nls_io) seq_printf(m, ",iocharset=%s", sbi->nls_io->charset); - seq_printf(m, ",bps=%ld", sb->s_blocksize); if (opts->errors == EXFAT_ERRORS_CONT) seq_puts(m, ",errors=continue"); else if (opts->errors == EXFAT_ERRORS_PANIC) @@ -351,14 +342,15 @@ static int exfat_read_root(struct inode *inode) exfat_save_attr(inode, ATTR_SUBDIR); inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = current_time(inode); + exfat_truncate_atime(&inode->i_atime); exfat_cache_init_inode(inode); return 0; } -static struct pbr *exfat_read_pbr_with_logical_sector(struct super_block *sb, - struct buffer_head **prev_bh) +static struct pbr *exfat_read_pbr_with_logical_sector(struct super_block *sb) { - struct pbr *p_pbr = (struct pbr *) (*prev_bh)->b_data; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct pbr *p_pbr = (struct pbr *) (sbi->pbr_bh)->b_data; unsigned short logical_sect = 0; logical_sect = 1 << p_pbr->bsx.f64.sect_size_bits; @@ -378,26 +370,23 @@ static struct pbr *exfat_read_pbr_with_logical_sector(struct super_block *sb, } if (logical_sect > sb->s_blocksize) { - struct buffer_head *bh = NULL; - - __brelse(*prev_bh); - *prev_bh = NULL; + brelse(sbi->pbr_bh); + sbi->pbr_bh = NULL; if (!sb_set_blocksize(sb, logical_sect)) { exfat_msg(sb, KERN_ERR, "unable to set blocksize %u", logical_sect); return NULL; } - bh = sb_bread(sb, 0); - if (!bh) { + sbi->pbr_bh = sb_bread(sb, 0); + if (!sbi->pbr_bh) { exfat_msg(sb, KERN_ERR, "unable to read boot sector (logical sector size = %lu)", sb->s_blocksize); return NULL; } - *prev_bh = bh; - p_pbr = (struct pbr *) bh->b_data; + p_pbr = (struct pbr *)sbi->pbr_bh->b_data; } return p_pbr; } @@ -408,21 +397,20 @@ static int __exfat_fill_super(struct super_block *sb) int ret; struct pbr *p_pbr; struct pbr64 *p_bpb; - struct buffer_head *bh; struct exfat_sb_info *sbi = EXFAT_SB(sb); /* set block size to read super block */ sb_min_blocksize(sb, 512); /* read boot sector */ - bh = sb_bread(sb, 0); - if (!bh) { + sbi->pbr_bh = sb_bread(sb, 0); + if (!sbi->pbr_bh) { exfat_msg(sb, KERN_ERR, "unable to read boot sector"); return -EIO; } /* PRB is read */ - p_pbr = (struct pbr *)bh->b_data; + p_pbr = (struct pbr *)sbi->pbr_bh->b_data; /* check the validity of PBR */ if (le16_to_cpu((p_pbr->signature)) != PBR_SIGNATURE) { @@ -433,7 +421,7 @@ static int __exfat_fill_super(struct super_block *sb) /* check logical sector size */ - p_pbr = exfat_read_pbr_with_logical_sector(sb, &bh); + p_pbr = exfat_read_pbr_with_logical_sector(sb); if (!p_pbr) { ret = -EIO; goto free_bh; @@ -514,7 +502,7 @@ free_alloc_bitmap: free_upcase_table: exfat_free_upcase_table(sbi); free_bh: - brelse(bh); + brelse(sbi->pbr_bh); return ret; } @@ -531,17 +519,18 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) if (opts->discard) { struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) + if (!blk_queue_discard(q)) { exfat_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but the device does not support discard"); - opts->discard = 0; + opts->discard = 0; + } } sb->s_flags |= SB_NODIRATIME; sb->s_magic = EXFAT_SUPER_MAGIC; sb->s_op = &exfat_sops; - sb->s_time_gran = 1; + sb->s_time_gran = 10 * NSEC_PER_MSEC; sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS; sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS; @@ -605,6 +594,7 @@ put_inode: free_table: exfat_free_upcase_table(sbi); exfat_free_bitmap(sbi); + brelse(sbi->pbr_bh); check_nls_io: unload_nls(sbi->nls_io); @@ -717,6 +707,7 @@ static void __exit exit_exfat_fs(void) module_init(init_exfat_fs); module_exit(exit_exfat_fs); +MODULE_ALIAS_FS("exfat"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("exFAT filesystem support"); MODULE_AUTHOR("Samsung Electronics Co., Ltd."); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0e0a4d6209c7..a32e5f7b5385 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -410,7 +410,7 @@ verified: * Read the bitmap for a given block_group,and validate the * bits for block/inode/inode tables are set in the bitmaps * - * Return buffer_head on success or NULL in case of failure. + * Return buffer_head on success or an ERR_PTR in case of failure. */ struct buffer_head * ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) @@ -502,7 +502,7 @@ out: return ERR_PTR(err); } -/* Returns 0 on success, 1 on error */ +/* Returns 0 on success, -errno on error */ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh) { diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 7f16e1af8d5c..0c76cdd44d90 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -338,9 +338,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { - struct ext4_super_block *es; - - es = EXT4_SB(inode->i_sb)->s_es; ext4_error_inode_err(inode, where, line, bh->b_blocknr, EIO, "IO error syncing itable block"); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 031752cfb6f7..f2b577b315a0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3374,8 +3374,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; @@ -3627,8 +3627,8 @@ static int ext4_split_convert_extents(handle_t *handle, __func__, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b420c9dc444d..4b8c9a9bdf0c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -113,7 +113,7 @@ verified: * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. * - * Return buffer_head of bitmap on success or NULL. + * Return buffer_head of bitmap on success, or an ERR_PTR on error. */ static struct buffer_head * ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) @@ -662,7 +662,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * block has been written back to disk. (Yes, these values are * somewhat arbitrary...) */ -#define RECENTCY_MIN 5 +#define RECENTCY_MIN 60 #define RECENTCY_DIRTY 300 static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e416096fc081..2a4aae6acdcb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1973,7 +1973,7 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - ext4_invalidatepage(page, 0, PAGE_SIZE); + inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return -EIO; } @@ -4364,7 +4364,7 @@ make_io: if (end > table) end = table; while (b <= end) - sb_breadahead(sb, b++); + sb_breadahead_unmovable(sb, b++); } /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 87c85be4c12e..30d5d97548c4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1943,7 +1943,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, int free; free = e4b->bd_info->bb_free; - BUG_ON(free <= 0); + if (WARN_ON(free <= 0)) + return; i = e4b->bd_info->bb_first_free; @@ -1966,7 +1967,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, } mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); - BUG_ON(ex.fe_len <= 0); + if (WARN_ON(ex.fe_len <= 0)) + break; if (free < ex.fe_len) { ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9728e7b0e84f..bf5fcb477f66 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -596,7 +596,6 @@ void __ext4_error_file(struct file *file, const char *function, { va_list args; struct va_format vaf; - struct ext4_super_block *es; struct inode *inode = file_inode(file); char pathname[80], *path; @@ -604,7 +603,6 @@ void __ext4_error_file(struct file *file, const char *function, return; trace_ext4_error(inode->i_sb, function, line); - es = EXT4_SB(inode->i_sb)->s_es; if (ext4_error_ratelimit(inode->i_sb)) { path = file_path(file, pathname, sizeof(pathname)); if (IS_ERR(path)) @@ -4340,7 +4338,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Pre-read the descriptors into the buffer cache */ for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sb_breadahead(sb, block); + sb_breadahead_unmovable(sb, block); } for (i = 0; i < db_count; i++) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f2dc35c22964..b8d78f393365 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2023,6 +2023,7 @@ lookup_again: goto lookup_again; } + spin_unlock(&ino->i_lock); first = true; status = nfs4_select_rw_stateid(ctx->state, iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, @@ -2032,12 +2033,12 @@ lookup_again: trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - spin_unlock(&ino->i_lock); nfs4_schedule_stateid_recovery(server, ctx->state); pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); goto lookup_again; } + spin_lock(&ino->i_lock); } else { nfs4_stateid_copy(&stateid, &lo->plh_stateid); } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 25f135572fc8..e7ddbce48321 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -501,6 +501,7 @@ pnfs_alloc_ds_commits_list(struct list_head *list, rcu_read_lock(); pnfs_put_commit_array(array, cinfo->inode); } + rcu_read_unlock(); return ret; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c3b11a715082..5cf91322de0f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1312,6 +1312,7 @@ nfsd4_run_cb_work(struct work_struct *work) container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; + int flags; if (cb->cb_need_restart) { cb->cb_need_restart = false; @@ -1340,7 +1341,8 @@ nfsd4_run_cb_work(struct work_struct *work) } cb->cb_msg.rpc_cred = clp->cl_cb_cred; - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e32ecedece0f..c107caa56525 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -267,6 +267,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, if (!nbl) { nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { + INIT_LIST_HEAD(&nbl->nbl_list); + INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, diff --git a/fs/proc/base.c b/fs/proc/base.c index 6042b646ab27..572898dd16a0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1573,6 +1573,7 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf, noffsets = 0; for (pos = kbuf; pos; pos = next_line) { struct proc_timens_offset *off = &offsets[noffsets]; + char clock[10]; int err; /* Find the end of line and ensure we don't look past it */ @@ -1584,10 +1585,21 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf, next_line = NULL; } - err = sscanf(pos, "%u %lld %lu", &off->clockid, + err = sscanf(pos, "%9s %lld %lu", clock, &off->val.tv_sec, &off->val.tv_nsec); if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) goto out; + + clock[sizeof(clock) - 1] = 0; + if (strcmp(clock, "monotonic") == 0 || + strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) + off->clockid = CLOCK_MONOTONIC; + else if (strcmp(clock, "boottime") == 0 || + strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) + off->clockid = CLOCK_BOOTTIME; + else + goto out; + noffsets++; if (noffsets == ARRAY_SIZE(offsets)) { if (next_line) diff --git a/fs/proc/root.c b/fs/proc/root.c index 2633f10446c3..cdbe9293ea55 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -196,6 +196,13 @@ static void proc_kill_sb(struct super_block *sb) if (ns->proc_thread_self) dput(ns->proc_thread_self); kill_anon_super(sb); + + /* Make the pid namespace safe for the next mount of proc */ + ns->proc_self = NULL; + ns->proc_thread_self = NULL; + ns->pid_gid = GLOBAL_ROOT_GID; + ns->hide_pid = 0; + put_pid_ns(ns); } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7dc800cce354..c663202da8de 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -266,7 +266,8 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst, if (start < offset + dump->size) { tsz = min(offset + (u64)dump->size - start, (u64)size); buf = dump->buf + start - offset; - if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) { + if (remap_vmalloc_range_partial(vma, dst, buf, 0, + tsz)) { ret = -EFAULT; goto out_unlock; } @@ -624,7 +625,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, tsz)) + kaddr, 0, tsz)) goto fail; size -= tsz; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index a7be7a9e5c1a..8bf1d15be3f6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -911,7 +911,12 @@ xfs_eofblocks_worker( { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_eofblocks_work); + + if (!sb_start_write_trylock(mp->m_super)) + return; xfs_icache_free_eofblocks(mp, NULL); + sb_end_write(mp->m_super); + xfs_queue_eofblocks(mp); } @@ -938,7 +943,12 @@ xfs_cowblocks_worker( { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_cowblocks_work); + + if (!sb_start_write_trylock(mp->m_super)) + return; xfs_icache_free_cowblocks(mp, NULL); + sb_end_write(mp->m_super); + xfs_queue_cowblocks(mp); } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index cdfb3cd9a25b..309958186d33 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -2363,7 +2363,10 @@ xfs_file_ioctl( if (error) return error; - return xfs_icache_free_eofblocks(mp, &keofb); + sb_start_write(mp->m_super); + error = xfs_icache_free_eofblocks(mp, &keofb); + sb_end_write(mp->m_super); + return error; } default: diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 50c43422fa17..b2e4598fdf7d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -167,8 +167,12 @@ typedef struct xfs_mount { struct xfs_kobj m_error_meta_kobj; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ - struct ratelimit_state m_flush_inodes_ratelimit; + /* + * Workqueue item so that we can coalesce multiple inode flush attempts + * into a single flush. + */ + struct work_struct m_flush_inodes_work; struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_cil_workqueue; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index b0ce04ffd3cd..107bf2a2f344 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1051,6 +1051,7 @@ xfs_reflink_remap_extent( uirec.br_startblock = irec->br_startblock + rlen; uirec.br_startoff = irec->br_startoff + rlen; uirec.br_blockcount = unmap_len - rlen; + uirec.br_state = irec->br_state; unmap_len = rlen; /* If this isn't a real mapping, we're done. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index abf06bf9c3f3..424bb9a2d532 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -516,6 +516,20 @@ xfs_destroy_mount_workqueues( destroy_workqueue(mp->m_buf_workqueue); } +static void +xfs_flush_inodes_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, struct xfs_mount, + m_flush_inodes_work); + struct super_block *sb = mp->m_super; + + if (down_read_trylock(&sb->s_umount)) { + sync_inodes_sb(sb); + up_read(&sb->s_umount); + } +} + /* * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting @@ -526,15 +540,15 @@ void xfs_flush_inodes( struct xfs_mount *mp) { - struct super_block *sb = mp->m_super; - - if (!__ratelimit(&mp->m_flush_inodes_ratelimit)) + /* + * If flush_work() returns true then that means we waited for a flush + * which was already in progress. Don't bother running another scan. + */ + if (flush_work(&mp->m_flush_inodes_work)) return; - if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb); - up_read(&sb->s_umount); - } + queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); + flush_work(&mp->m_flush_inodes_work); } /* Catch misguided souls that try to use this interface on XFS */ @@ -1369,17 +1383,6 @@ xfs_fc_fill_super( if (error) goto out_free_names; - /* - * Cap the number of invocations of xfs_flush_inodes to 16 for every - * quarter of a second. The magic numbers here were determined by - * observation neither to cause stalls in writeback when there are a - * lot of IO threads and the fs is near ENOSPC, nor cause any fstest - * regressions. YMMV. - */ - ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16); - ratelimit_set_flags(&mp->m_flush_inodes_ratelimit, - RATELIMIT_MSG_ON_RELEASE); - error = xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; @@ -1752,6 +1755,7 @@ static int xfs_init_fs_context( spin_lock_init(&mp->m_perag_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); + INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); |