diff options
author | Ingo Molnar <mingo@kernel.org> | 2017-11-02 12:57:24 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-11-02 12:57:24 +0300 |
commit | 3357b0d3c7323d73806571192e9f633bb6ba3d54 (patch) | |
tree | a92fb225bfdda901783ad662a1fea0baddfb81e1 /fs | |
parent | 82c62fa0c49aa305104013cee4468772799bb391 (diff) | |
parent | e27c310af5c05cf876d9cad006928076c27f54d4 (diff) | |
download | linux-3357b0d3c7323d73806571192e9f633bb6ba3d54.tar.xz |
Merge branch 'x86/mpx/prep' into x86/asm
Pick up some of the MPX commits that modify the syscall entry code,
to have a common base and to reduce conflicts.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/super.c | 2 | ||||
-rw-r--r-- | fs/ceph/caps.c | 5 | ||||
-rw-r--r-- | fs/cifs/Kconfig | 5 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 8 | ||||
-rw-r--r-- | fs/cifs/smb2maperror.c | 2 | ||||
-rw-r--r-- | fs/cifs/smb2ops.c | 31 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 33 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.h | 5 | ||||
-rw-r--r-- | fs/cifs/smb2proto.h | 1 | ||||
-rw-r--r-- | fs/cifs/smb2transport.c | 26 | ||||
-rw-r--r-- | fs/crypto/keyinfo.c | 5 | ||||
-rw-r--r-- | fs/direct-io.c | 39 | ||||
-rw-r--r-- | fs/ecryptfs/ecryptfs_kernel.h | 24 | ||||
-rw-r--r-- | fs/ecryptfs/keystore.c | 9 | ||||
-rw-r--r-- | fs/exec.c | 1 | ||||
-rw-r--r-- | fs/ext4/super.c | 4 | ||||
-rw-r--r-- | fs/fscache/object-list.c | 7 | ||||
-rw-r--r-- | fs/fuse/dir.c | 3 | ||||
-rw-r--r-- | fs/fuse/inode.c | 2 | ||||
-rw-r--r-- | fs/iomap.c | 41 | ||||
-rw-r--r-- | fs/namespace.c | 3 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 20 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 32 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 3 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 11 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 11 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 21 | ||||
-rw-r--r-- | fs/xfs/xfs_fsmap.c | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 2 |
32 files changed, 303 insertions, 152 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 35a128acfbd1..161694b66038 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1135,7 +1135,7 @@ static int btrfs_fill_super(struct super_block *sb, #ifdef CONFIG_BTRFS_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; err = super_setup_bdi(sb); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 157fe59fbabe..1978a8cb1cb1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1991,6 +1991,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) retry: spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + spin_unlock(&ci->i_ceph_lock); dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); goto out; } @@ -2008,8 +2009,10 @@ retry: mutex_lock(&session->s_mutex); goto retry; } - if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) + if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) { + spin_unlock(&ci->i_ceph_lock); goto out; + } flushing = __mark_caps_flushing(inode, session, true, &flush_tid, &oldest_flush_tid); diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index f7243617316c..d5b2e12b5d02 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -5,9 +5,14 @@ config CIFS select CRYPTO select CRYPTO_MD4 select CRYPTO_MD5 + select CRYPTO_SHA256 + select CRYPTO_CMAC select CRYPTO_HMAC select CRYPTO_ARC4 + select CRYPTO_AEAD2 + select CRYPTO_CCM select CRYPTO_ECB + select CRYPTO_AES select CRYPTO_DES help This is the client VFS module for the SMB3 family of NAS protocols, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de5b2e1fcce5..e185b2853eab 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -661,7 +661,9 @@ struct TCP_Server_Info { #endif unsigned int max_read; unsigned int max_write; - __u8 preauth_hash[512]; +#ifdef CONFIG_CIFS_SMB311 + __u8 preauth_sha_hash[64]; /* save initital negprot hash */ +#endif /* 3.1.1 */ struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; @@ -849,7 +851,9 @@ struct cifs_ses { __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; - __u8 preauth_hash[512]; +#ifdef CONFIG_CIFS_SMB311 + __u8 preauth_sha_hash[64]; +#endif /* 3.1.1 */ }; static inline bool diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7ca9808a0daa..62c88dfed57b 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_DATATYPE_MISALIGNMENT, -EIO, "STATUS_DATATYPE_MISALIGNMENT"}, {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, - {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, + {STATUS_BUFFER_OVERFLOW, -E2BIG, "STATUS_BUFFER_OVERFLOW"}, {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0dafdbae1f8c..bdb963d0ba32 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -522,6 +522,7 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct smb2_file_full_ea_info *smb2_data; + int ea_buf_size = SMB2_MIN_EA_BUF; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) @@ -541,14 +542,32 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, return rc; } - smb2_data = kzalloc(SMB2_MAX_EA_BUF, GFP_KERNEL); - if (smb2_data == NULL) { - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return -ENOMEM; + while (1) { + smb2_data = kzalloc(ea_buf_size, GFP_KERNEL); + if (smb2_data == NULL) { + SMB2_close(xid, tcon, fid.persistent_fid, + fid.volatile_fid); + return -ENOMEM; + } + + rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, + fid.volatile_fid, + ea_buf_size, smb2_data); + + if (rc != -E2BIG) + break; + + kfree(smb2_data); + ea_buf_size <<= 1; + + if (ea_buf_size > SMB2_MAX_EA_BUF) { + cifs_dbg(VFS, "EA size is too large\n"); + SMB2_close(xid, tcon, fid.persistent_fid, + fid.volatile_fid); + return -ENOMEM; + } } - rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, fid.volatile_fid, - smb2_data); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (!rc) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6f0e6343c15e..5331631386a2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -648,7 +648,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) { int rc = 0; struct validate_negotiate_info_req vneg_inbuf; - struct validate_negotiate_info_rsp *pneg_rsp; + struct validate_negotiate_info_rsp *pneg_rsp = NULL; u32 rsplen; u32 inbuflen; /* max of 4 dialects */ @@ -727,8 +727,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if (rsplen > CIFSMaxBufSize) - return -EIO; + if ((rsplen > CIFSMaxBufSize) + || (rsplen < sizeof(struct validate_negotiate_info_rsp))) + goto err_rsp_free; } /* check validate negotiate info response matches what we got earlier */ @@ -747,10 +748,13 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* validate negotiate successful */ cifs_dbg(FYI, "validate negotiate info successful\n"); + kfree(pneg_rsp); return 0; vneg_out: cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); +err_rsp_free: + kfree(pneg_rsp); return -EIO; } @@ -1255,7 +1259,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct smb2_tree_connect_req *req; struct smb2_tree_connect_rsp *rsp = NULL; struct kvec iov[2]; - struct kvec rsp_iov; + struct kvec rsp_iov = { NULL, 0 }; int rc = 0; int resp_buftype; int unc_path_len; @@ -1372,7 +1376,7 @@ tcon_exit: return rc; tcon_error_exit: - if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { + if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); } goto tcon_exit; @@ -1975,6 +1979,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, } else iov[0].iov_len = get_rfc1002_length(req) + 4; + /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) + req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); @@ -2191,9 +2198,13 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; req->AdditionalInformation = cpu_to_le32(additional_info); - /* 4 for rfc1002 length field and 1 for Buffer */ - req->InputBufferOffset = - cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + + /* + * We do not use the input buffer (do not send extra byte) + */ + req->InputBufferOffset = 0; + inc_rfc1001_len(req, -1); + req->OutputBufferLength = cpu_to_le32(output_len); iov[0].iov_base = (char *)req; @@ -2233,12 +2244,12 @@ qinf_exit: } int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb2_file_full_ea_info *data) + u64 persistent_fid, u64 volatile_fid, + int ea_buf_size, struct smb2_file_full_ea_info *data) { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, - SMB2_MAX_EA_BUF, + ea_buf_size, sizeof(struct smb2_file_full_ea_info), (void **)&data, NULL); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6c9653a130c8..c2ec934be968 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -832,7 +832,7 @@ struct smb2_flush_rsp { /* Channel field for read and write: exactly one of following flags can be set*/ #define SMB2_CHANNEL_NONE 0x00000000 #define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ -#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */ +#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */ /* SMB2 read request without RFC1001 length at the beginning */ struct smb2_read_plain_req { @@ -1178,7 +1178,8 @@ struct smb2_file_link_info { /* encoding of request for level 11 */ char FileName[0]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ -#define SMB2_MAX_EA_BUF 2048 +#define SMB2_MIN_EA_BUF 2048 +#define SMB2_MAX_EA_BUF 65536 struct smb2_file_full_ea_info { /* encoding of response for level 15 */ __le32 next_entry_offset; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 003217099ef3..e9ab5227e7a8 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -134,6 +134,7 @@ extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, + int ea_buf_size, struct smb2_file_full_ea_info *data); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 67367cf1f8cd..99493946e2f9 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -390,6 +390,7 @@ generate_smb30signingkey(struct cifs_ses *ses) return generate_smb3signingkey(ses, &triplet); } +#ifdef CONFIG_CIFS_SMB311 int generate_smb311signingkey(struct cifs_ses *ses) @@ -398,25 +399,26 @@ generate_smb311signingkey(struct cifs_ses *ses) struct derivation *d; d = &triplet.signing; - d->label.iov_base = "SMB2AESCMAC"; - d->label.iov_len = 12; - d->context.iov_base = "SmbSign"; - d->context.iov_len = 8; + d->label.iov_base = "SMBSigningKey"; + d->label.iov_len = 14; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; d = &triplet.encryption; - d->label.iov_base = "SMB2AESCCM"; - d->label.iov_len = 11; - d->context.iov_base = "ServerIn "; - d->context.iov_len = 10; + d->label.iov_base = "SMBC2SCipherKey"; + d->label.iov_len = 16; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; d = &triplet.decryption; - d->label.iov_base = "SMB2AESCCM"; - d->label.iov_len = 11; - d->context.iov_base = "ServerOut"; - d->context.iov_len = 10; + d->label.iov_base = "SMBS2CCipherKey"; + d->label.iov_len = 16; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; return generate_smb3signingkey(ses, &triplet); } +#endif /* 311 */ int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 018c588c7ac3..8e704d12a1cf 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -109,6 +109,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info, goto out; } ukp = user_key_payload_locked(keyring_key); + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + res = -EKEYREVOKED; + goto out; + } if (ukp->datalen != sizeof(struct fscrypt_key)) { res = -EINVAL; goto out; diff --git a/fs/direct-io.c b/fs/direct-io.c index 96415c65bbdc..b53e66d9abd7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -45,6 +45,12 @@ #define DIO_PAGES 64 /* + * Flags for dio_complete() + */ +#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */ +#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */ + +/* * This code generally works in units of "dio_blocks". A dio_block is * somewhere between the hard sector size and the filesystem block size. it * is determined on a per-invocation basis. When talking to the filesystem @@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) +static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) { loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; @@ -259,14 +265,27 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (ret == 0) ret = transferred; + if (dio->end_io) { + // XXX: ki_pos?? + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source * of the write was an mmap'ed region of the file we're writing. Either * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. */ - if (ret > 0 && dio->op == REQ_OP_WRITE && + if (flags & DIO_COMPLETE_INVALIDATE && + ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, @@ -274,18 +293,10 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) WARN_ON_ONCE(err); } - if (dio->end_io) { - - // XXX: ki_pos?? - err = dio->end_io(dio->iocb, offset, ret, dio->private); - if (err) - ret = err; - } - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); - if (is_async) { + if (flags & DIO_COMPLETE_ASYNC) { /* * generic_write_sync expects ki_pos to have been updated * already, but the submission path only does this for @@ -306,7 +317,7 @@ static void dio_aio_complete_work(struct work_struct *work) { struct dio *dio = container_of(work, struct dio, complete_work); - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE); } static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); @@ -348,7 +359,7 @@ static void dio_bio_end_aio(struct bio *bio) queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); } else { - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC); } } } @@ -1360,7 +1371,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, retval, false); + retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE); } else BUG_ON(retval != -EIOCBQUEUED); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 9c351bf757b2..3fbc0ff79699 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -84,11 +84,16 @@ struct ecryptfs_page_crypt_context { static inline struct ecryptfs_auth_tok * ecryptfs_get_encrypted_key_payload_data(struct key *key) { - if (key->type == &key_type_encrypted) - return (struct ecryptfs_auth_tok *) - (&((struct encrypted_key_payload *)key->payload.data[0])->payload_data); - else + struct encrypted_key_payload *payload; + + if (key->type != &key_type_encrypted) return NULL; + + payload = key->payload.data[0]; + if (!payload) + return ERR_PTR(-EKEYREVOKED); + + return (struct ecryptfs_auth_tok *)payload->payload_data; } static inline struct key *ecryptfs_get_encrypted_key(char *sig) @@ -114,12 +119,17 @@ static inline struct ecryptfs_auth_tok * ecryptfs_get_key_payload_data(struct key *key) { struct ecryptfs_auth_tok *auth_tok; + struct user_key_payload *ukp; auth_tok = ecryptfs_get_encrypted_key_payload_data(key); - if (!auth_tok) - return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data; - else + if (auth_tok) return auth_tok; + + ukp = user_key_payload_locked(key); + if (!ukp) + return ERR_PTR(-EKEYREVOKED); + + return (struct ecryptfs_auth_tok *)ukp->data; } #define ECRYPTFS_MAX_KEYSET_SIZE 1024 diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3cf1546dca82..fa218cd64f74 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -459,7 +459,8 @@ out: * @auth_tok_key: key containing the authentication token * @auth_tok: authentication token * - * Returns zero on valid auth tok; -EINVAL otherwise + * Returns zero on valid auth tok; -EINVAL if the payload is invalid; or + * -EKEYREVOKED if the key was revoked before we acquired its semaphore. */ static int ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, @@ -468,6 +469,12 @@ ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, int rc = 0; (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key); + if (IS_ERR(*auth_tok)) { + rc = PTR_ERR(*auth_tok); + *auth_tok = NULL; + goto out; + } + if (ecryptfs_verify_version((*auth_tok)->version)) { printk(KERN_ERR "Data structure version mismatch. Userspace " "tools must match eCryptfs kernel module with major " diff --git a/fs/exec.c b/fs/exec.c index 5470d3c1892a..3e14ba25f678 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; + membarrier_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b104096fce9e..b0915b734a38 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1677,7 +1677,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; return 1; case Opt_i_version: - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; return 1; case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; @@ -2060,7 +2060,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); - if (sb->s_flags & MS_I_VERSION) + if (sb->s_flags & SB_I_VERSION) SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index b5ab06fabc60..0438d4cd91ef 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -331,6 +331,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data) rcu_read_lock(); confkey = user_key_payload_rcu(key); + if (!confkey) { + /* key was revoked */ + rcu_read_unlock(); + key_put(key); + goto no_config; + } + buf = confkey->data; for (len = confkey->datalen - 1; len >= 0; len--) { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 622081b97426..24967382a7b1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1308,7 +1308,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, */ over = !dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, dirent->type); - ctx->pos = dirent->off; + if (!over) + ctx->pos = dirent->off; } buf += reclen; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 65c88379a3a1..94a745acaef8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1059,7 +1059,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_flags & MS_MANDLOCK) goto err; - sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); + sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION); if (!parse_fuse_opt(data, &d, is_bdev)) goto err; diff --git a/fs/iomap.c b/fs/iomap.c index be61cf742b5e..d4801f8dd4fd 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); + loff_t offset = iocb->ki_pos; ssize_t ret; - /* - * Try again to invalidate clean pages which might have been cached by - * non-direct readahead, or faulted in by get_user_pages() if the source - * of the write was an mmap'ed region of the file we're writing. Either - * one is a pretty crazy thing to do, so we don't support it 100%. If - * this invalidation fails, tough, the write still worked... - */ - if (!dio->error && - (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - iocb->ki_pos >> PAGE_SHIFT, - (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - if (dio->end_io) { ret = dio->end_io(iocb, dio->error ? dio->error : dio->size, @@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) if (likely(!ret)) { ret = dio->size; /* check for short read */ - if (iocb->ki_pos + ret > dio->i_size && + if (offset + ret > dio->i_size && !(dio->flags & IOMAP_DIO_WRITE)) - ret = dio->i_size - iocb->ki_pos; + ret = dio->i_size - offset; iocb->ki_pos += ret; } + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. + */ + if (!dio->error && + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { + int err; + err = invalidate_inode_pages2_range(inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + dio->size - 1) >> PAGE_SHIFT); + WARN_ON_ONCE(err); + } + inode_dio_end(file_inode(iocb->ki_filp)); kfree(dio); diff --git a/fs/namespace.c b/fs/namespace.c index 3b601f115b6c..d18deb4c410b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2825,7 +2825,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_MANDLOCK | SB_DIRSYNC | SB_SILENT | - SB_POSIXACL); + SB_POSIXACL | + SB_I_VERSION); if (flags & MS_REMOUNT) retval = do_remount(&path, flags, sb_flags, mnt_flags, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a619addecafc..321511ed8c42 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -598,18 +598,30 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry) +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, + struct dentry *index) { struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; + /* Already indexed or could be indexed on copy up? */ + bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + + if (WARN_ON(upperdentry && indexed && !lowerdentry)) + return ERR_PTR(-EIO); if (!realinode) realinode = d_inode(lowerdentry); - if (!S_ISDIR(realinode->i_mode) && - (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) { - struct inode *key = d_inode(lowerdentry ?: upperdentry); + /* + * Copy up origin (lower) may exist for non-indexed upper, but we must + * not use lower as hash key in that case. + * Hash inodes that are or could be indexed by origin inode and + * non-indexed upper inodes that could be hard linked by upper inode. + */ + if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { + struct inode *key = d_inode(indexed ? lowerdentry : + upperdentry); unsigned int nlink; inode = iget5_locked(dentry->d_sb, (unsigned long) key, diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 654bea1a5ac9..a12dc10bf726 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -405,14 +405,13 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, * be treated as stale (i.e. after unlink of the overlay inode). * We don't know the verification rules for directory and whiteout * index entries, because they have not been implemented yet, so return - * EROFS if those entries are found to avoid corrupting an index that - * was created by a newer kernel. + * EINVAL if those entries are found to abort the mount to avoid + * corrupting an index that was created by a newer kernel. */ - err = -EROFS; + err = -EINVAL; if (d_is_dir(index) || ovl_is_whiteout(index)) goto fail; - err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; @@ -507,6 +506,10 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); + if (err == -ENOENT) { + index = NULL; + goto out; + } pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, @@ -516,18 +519,9 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { - if (upper && d_inode(origin)->i_nlink > 1) { - pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", - d_inode(origin)->i_ino); - goto fail; - } - - dput(index); - index = NULL; + goto out_dput; } else if (upper && d_inode(upper) != inode) { - pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", - index, inode->i_ino, d_inode(upper)->i_ino); - goto fail; + goto out_dput; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -547,6 +541,11 @@ out: kfree(name.name); return index; +out_dput: + dput(index); + index = NULL; + goto out; + fail: dput(index); index = ERR_PTR(-EIO); @@ -635,6 +634,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } if (d.redirect) { + err = -ENOMEM; upperredirect = kstrdup(d.redirect, GFP_KERNEL); if (!upperredirect) goto out_put_upper; @@ -709,7 +709,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperdentry = dget(index); if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry); + inode = ovl_get_inode(dentry, upperdentry, index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c706a6f99928..d9a0edd4e57e 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -286,7 +286,8 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry); +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, + struct dentry *index); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0f85ee9c3268..698b74dd750e 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1021,13 +1021,12 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, break; } err = ovl_verify_index(index, lowerstack, numlower); - if (err) { - if (err == -EROFS) - break; + /* Cleanup stale and orphan index entries */ + if (err && (err == -ESTALE || err == -ENOENT)) err = ovl_cleanup(dir, index); - if (err) - break; - } + if (err) + break; + dput(index); index = NULL; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 092d150643c1..f5738e96a052 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -174,6 +174,9 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) { struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); + if (!oi) + return NULL; + oi->cache = NULL; oi->redirect = NULL; oi->version = 0; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index def32fa1c225..89263797cf32 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3852,6 +3852,17 @@ xfs_trim_extent( } } +/* trim extent to within eof */ +void +xfs_trim_extent_eof( + struct xfs_bmbt_irec *irec, + struct xfs_inode *ip) + +{ + xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, + i_size_read(VFS_I(ip)))); +} + /* * Trim the returned map to the required bounds */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 851982a5dfbc..502e0d8fb4ff 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); +void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f18e5932aec4..a3eeaba156c5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -446,6 +446,19 @@ xfs_imap_valid( { offset >>= inode->i_blkbits; + /* + * We have to make sure the cached mapping is within EOF to protect + * against eofblocks trimming on file release leaving us with a stale + * mapping. Otherwise, a page for a subsequent file extending buffered + * write could get picked up by this writeback cycle and written to the + * wrong blocks. + * + * Note that what we really want here is a generic mapping invalidation + * mechanism to protect us from arbitrary extent modifying contexts, not + * just eofblocks. + */ + xfs_trim_extent_eof(imap, XFS_I(inode)); + return offset >= imap->br_startoff && offset < imap->br_startoff + imap->br_blockcount; } @@ -735,6 +748,14 @@ xfs_vm_invalidatepage( { trace_xfs_invalidatepage(page->mapping->host, page, offset, length); + + /* + * If we are invalidating the entire page, clear the dirty state from it + * so that we can check for attempts to release dirty cached pages in + * xfs_vm_releasepage(). + */ + if (offset == 0 && length >= PAGE_SIZE) + cancel_dirty_page(page); block_invalidatepage(page, offset, length); } @@ -1190,25 +1211,27 @@ xfs_vm_releasepage( * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to * ->releasepage() via shrink_active_list(). Conversely, - * block_invalidatepage() can send pages that are still marked dirty - * but otherwise have invalidated buffers. + * block_invalidatepage() can send pages that are still marked dirty but + * otherwise have invalidated buffers. * * We want to release the latter to avoid unnecessary buildup of the - * LRU, skip the former and warn if we've left any lingering - * delalloc/unwritten buffers on clean pages. Skip pages with delalloc - * or unwritten buffers and warn if the page is not dirty. Otherwise - * try to release the buffers. + * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages + * that are entirely invalidated and need to be released. Hence the + * only time we should get dirty pages here is through + * shrink_active_list() and so we can simply skip those now. + * + * warn if we've left any lingering delalloc/unwritten buffers on clean + * or invalidated pages we are about to release. */ + if (PageDirty(page)) + return 0; + xfs_count_page_state(page, &delalloc, &unwritten); - if (delalloc) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(delalloc)) return 0; - } - if (unwritten) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(unwritten)) return 0; - } return try_to_free_buffers(page); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 56d0e526870c..6526ef0e2a23 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -237,11 +237,13 @@ xfs_file_dax_read( if (!count) return 0; /* skip atime */ - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; + } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } + ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -259,9 +261,10 @@ xfs_file_buffered_aio_read( trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; + } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } ret = generic_file_read_iter(iocb, to); @@ -552,9 +555,10 @@ xfs_file_dio_aio_write( iolock = XFS_IOLOCK_SHARED; } - if (!xfs_ilock_nowait(ip, iolock)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; + } else { xfs_ilock(ip, iolock); } @@ -606,9 +610,10 @@ xfs_file_dax_write( size_t count; loff_t pos; - if (!xfs_ilock_nowait(ip, iolock)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; + } else { xfs_ilock(ip, iolock); } diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 560e0b40ac1b..43cfc07996a4 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper( return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); } -/* Transform a rtbitmap "record" into a fsmap */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_helper( - struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, - void *priv) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_getfsmap_info *info = priv; - struct xfs_rmap_irec irec; - xfs_daddr_t rec_daddr; - - rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); - - irec.rm_startblock = rec->ar_startblock; - irec.rm_blockcount = rec->ar_blockcount; - irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ - irec.rm_offset = 0; - irec.rm_flags = 0; - - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); -} - /* Transform a bnobt irec into a fsmap */ STATIC int xfs_getfsmap_datadev_bnobt_helper( @@ -475,6 +452,30 @@ xfs_getfsmap_logdev( return xfs_getfsmap_helper(tp, info, &rmap, 0); } +#ifdef CONFIG_XFS_RT +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +} + /* Execute a getfsmap query against the realtime device. */ STATIC int __xfs_getfsmap_rtdev( @@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev( return query_fn(tp, info); } -#ifdef CONFIG_XFS_RT /* Actually query the realtime bitmap. */ STATIC int xfs_getfsmap_rtdev_rtbitmap_query( diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 584cf2d573ba..f663022353c0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1637,7 +1637,7 @@ xfs_fs_fill_super( /* version 5 superblocks support inode version counters. */ if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; if (mp->m_flags & XFS_MOUNT_DAX) { xfs_warn(mp, |