diff options
Diffstat (limited to 'fs')
72 files changed, 642 insertions, 375 deletions
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a1b18082991b..183cc5418722 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, trace_afs_notify_call(rxcall, call); call->need_attention = true; - u = __atomic_add_unless(&call->usage, 1, 0); + u = atomic_fetch_add_unless(&call->usage, 1, 0); if (u != 0) { trace_afs_call(call, afs_call_trace_wake, u, atomic_read(&call->net->nr_outstanding_calls), @@ -1896,6 +1896,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, return ret; } +struct __aio_sigset { + const sigset_t __user *sigmask; + size_t sigsetsize; +}; + SYSCALL_DEFINE6(io_pgetevents, aio_context_t, ctx_id, long, min_nr, diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile index 43fedde15c26..1f85d35ec8b7 100644 --- a/fs/autofs/Makefile +++ b/fs/autofs/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux autofs-filesystem routines. # -obj-$(CONFIG_AUTOFS_FS) += autofs.o +obj-$(CONFIG_AUTOFS_FS) += autofs4.o -autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o +autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index ea4ca1445ab7..86eafda4a652 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) cmd); goto out; } + } else { + unsigned int inr = _IOC_NR(cmd); + + if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || + inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || + inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { + err = -EINVAL; + goto out; + } } err = 0; @@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp, dev_t devid; int err, fd; - /* param->path has already been checked */ + /* param->path has been checked in validate_dev_ioctl() */ + if (!param->openmount.devid) return -EINVAL; @@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; @@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; diff --git a/fs/autofs/init.c b/fs/autofs/init.c index cc9447e1903f..79ae07d9592f 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -23,7 +23,7 @@ static struct file_system_type autofs_fs_type = { .kill_sb = autofs_kill_sb, }; MODULE_ALIAS_FS("autofs"); -MODULE_ALIAS("autofs4"); +MODULE_ALIAS("autofs"); static int __init init_autofs_fs(void) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0ac456b52bdd..816cc921cf36 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; + len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); + bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); if (bss > len) { error = vm_brk(len, bss - len); if (error) diff --git a/fs/block_dev.c b/fs/block_dev.c index 0dd87aaeb39a..aba25414231a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -221,7 +221,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, ret = bio_iov_iter_get_pages(&bio, iter); if (unlikely(ret)) - return ret; + goto out; ret = bio.bi_iter.bi_size; if (iov_iter_rw(iter) == READ) { @@ -250,12 +250,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, put_page(bvec->bv_page); } - if (vecs != inline_vecs) - kfree(vecs); - if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); +out: + if (vecs != inline_vecs) + kfree(vecs); + bio_uninit(&bio); return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e55843f536bc..b3e45714d28f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4238,8 +4238,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) struct extent_map *em; u64 start = page_offset(page); u64 end = start + PAGE_SIZE - 1; - struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree; - struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree; + struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host); + struct extent_io_tree *tree = &btrfs_inode->io_tree; + struct extent_map_tree *map = &btrfs_inode->extent_tree; if (gfpflags_allow_blocking(mask) && page->mapping->host->i_size > SZ_16M) { @@ -4262,6 +4263,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) extent_map_end(em) - 1, EXTENT_LOCKED | EXTENT_WRITEBACK, 0, NULL)) { + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &btrfs_inode->runtime_flags); remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 43ecbe620dea..b077544b5232 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3327,11 +3327,13 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) if (pg) { unlock_page(pg); put_page(pg); + cmp->src_pages[i] = NULL; } pg = cmp->dst_pages[i]; if (pg) { unlock_page(pg); put_page(pg); + cmp->dst_pages[i] = NULL; } } } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 572306036477..6702896cdb8f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1151,11 +1151,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) return ret; } - if (sctx->is_dev_replace && !is_metadata && !have_csum) { - sblocks_for_recheck = NULL; - goto nodatasum_case; - } - /* * read all mirrors one after the other. This includes to * re-read the extent or metadata block that failed (that was @@ -1268,13 +1263,19 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) goto out; } - if (!is_metadata && !have_csum) { + /* + * NOTE: Even for nodatasum case, it's still possible that it's a + * compressed data extent, thus scrub_fixup_nodatasum(), which write + * inode page cache onto disk, could cause serious data corruption. + * + * So here we could only read from disk, and hope our recovery could + * reach disk before the newer write. + */ + if (0 && !is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; WARN_ON(sctx->is_dev_replace); -nodatasum_case: - /* * !is_metadata and !have_csum, this means that the data * might not be COWed, that it might be modified diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e034ad9e23b4..1da162928d1a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1146,6 +1146,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, { int ret; + mutex_lock(&uuid_mutex); mutex_lock(&fs_devices->device_list_mutex); if (fs_devices->opened) { fs_devices->opened++; @@ -1155,6 +1156,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = open_fs_devices(fs_devices, flags, holder); } mutex_unlock(&fs_devices->device_list_mutex); + mutex_unlock(&uuid_mutex); return ret; } diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d9f001078e08..4a717d400807 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -218,7 +218,8 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) "%s", fsdef->dentry->d_sb->s_id); - fscache_object_init(&fsdef->fscache, NULL, &cache->cache); + fscache_object_init(&fsdef->fscache, &fscache_fsdef_index, + &cache->cache); ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); if (ret < 0) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ab0bbe93b398..af2b17b21b94 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -186,12 +186,12 @@ try_again: * need to wait for it to be destroyed */ wait_for_old_object: trace_cachefiles_wait_active(object, dentry, xobject); + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); if (fscache_object_is_live(&xobject->fscache)) { pr_err("\n"); pr_err("Error: Unexpected object collision\n"); cachefiles_printk_object(object, xobject); - BUG(); } atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); @@ -248,7 +248,6 @@ wait_for_old_object: goto try_again; requeue: - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo); _leave(" = -ETIMEDOUT"); return -ETIMEDOUT; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 5082c8a49686..40f7595aad10 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, struct cachefiles_one_read *monitor = container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; + struct fscache_retrieval *op = monitor->op; struct wait_bit_key *key = _key; struct page *page = wait->private; @@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, list_del(&wait->entry); /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(monitor->op); + ASSERT(op); - object = container_of(monitor->op->op.object, - struct cachefiles_object, fscache); + /* We need to temporarily bump the usage count as we don't own a ref + * here otherwise cachefiles_read_copier() may free the op between the + * monitor being enqueued on the op->to_do list and the op getting + * enqueued on the work queue. + */ + fscache_get_retrieval(op); + object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &monitor->op->to_do); + list_add_tail(&monitor->op_link, &op->to_do); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(monitor->op); + fscache_enqueue_retrieval(op); + fscache_put_retrieval(op); return 0; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bd78da59a4fd..c923c7854027 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -423,7 +423,7 @@ struct smb_version_operations { void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, bool *); /* create lease context buffer for CREATE request */ - char * (*create_lease_buf)(u8 *, u8); + char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, @@ -1416,6 +1416,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server, /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ + struct kref refcount; struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u32 pid; /* process id */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 03018be17283..1890f534c88b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -82,6 +82,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern void cifs_delete_mid(struct mid_q_entry *mid); +extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d352da325de3..93408eab92e7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -157,8 +157,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * greater than cifs socket timeout which is 7 seconds */ while (server->tcpStatus == CifsNeedReconnect) { - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a57da1b88bdf..5df2c0698cda 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -924,6 +924,7 @@ next_pdu: server->pdu_size = next_offset; } + mid_entry = NULL; if (server->ops->is_transform_hdr && server->ops->receive_transform && server->ops->is_transform_hdr(buf)) { @@ -938,8 +939,11 @@ next_pdu: length = mid_entry->receive(server, mid_entry); } - if (length < 0) + if (length < 0) { + if (mid_entry) + cifs_mid_q_entry_release(mid_entry); continue; + } if (server->large_buf) buf = server->bigbuf; @@ -956,6 +960,8 @@ next_pdu: if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); + + cifs_mid_q_entry_release(mid_entry); } else if (server->ops->is_oplock_break && server->ops->is_oplock_break(buf, server)) { cifs_dbg(FYI, "Received oplock break\n"); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index aff8ce8ba34d..646dcd149de1 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -107,6 +107,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 788412675723..4ed10dd086e6 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -41,7 +41,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, int rc; __le16 *smb2_path; struct smb2_file_all_info *smb2_data = NULL; - __u8 smb2_oplock[17]; + __u8 smb2_oplock; struct cifs_fid *fid = oparms->fid; struct network_resiliency_req nr_ioctl_req; @@ -59,12 +59,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, } oparms->desired_access |= FILE_READ_ATTRIBUTES; - *smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; + smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; - if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) - memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); - - rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL, + rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, NULL); if (rc) goto out; @@ -101,7 +98,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, move_smb2_info_to_cifs(buf, smb2_data); } - *oplock = *smb2_oplock; + *oplock = smb2_oplock; out: kfree(smb2_data); kfree(smb2_path); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0356b5559c71..ea92a38b2f08 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -203,6 +203,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && (mid->command == shdr->Command)) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } @@ -855,6 +856,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea, len); + kfree(ea); + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; @@ -2219,8 +2222,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock) if (!buf) return NULL; - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseState = map_oplock_to_lease(oplock); buf->ccontext.DataOffset = cpu_to_le16(offsetof @@ -2246,8 +2248,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) if (!buf) return NULL; - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseState = map_oplock_to_lease(oplock); buf->ccontext.DataOffset = cpu_to_le16(offsetof @@ -2284,8 +2285,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) return SMB2_OPLOCK_LEVEL_NOCHANGE; if (lease_key) - memcpy(lease_key, &lc->lcontext.LeaseKeyLow, - SMB2_LEASE_KEY_SIZE); + memcpy(lease_key, &lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); return le32_to_cpu(lc->lcontext.LeaseState); } @@ -2521,7 +2521,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, if (!tr_hdr) goto err_free_iov; - orig_len = smb2_rqst_len(old_rq, false); + orig_len = smb_rqst_len(server, old_rq); /* fill the 2nd iov with a transform header */ fill_transform_hdr(tr_hdr, orig_len, old_rq); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 810b85787c91..3c92678cb45b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -155,7 +155,7 @@ out: static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) { - int rc = 0; + int rc; struct nls_table *nls_codepage; struct cifs_ses *ses; struct TCP_Server_Info *server; @@ -166,10 +166,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) * for those three - in the calling routine. */ if (tcon == NULL) - return rc; + return 0; if (smb2_command == SMB2_TREE_CONNECT) - return rc; + return 0; if (tcon->tidStatus == CifsExiting) { /* @@ -212,8 +212,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) return -EAGAIN; } - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) @@ -231,7 +237,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } if (!tcon->ses->need_reconnect && !tcon->need_reconnect) - return rc; + return 0; nls_codepage = load_nls_default(); @@ -340,7 +346,10 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; /* BB eventually switch this to SMB2 specific small buf size */ - *request_buf = cifs_small_buf_get(); + if (smb2_command == SMB2_SET_INFO) + *request_buf = cifs_buf_get(); + else + *request_buf = cifs_small_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -1707,12 +1716,12 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, static int add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int *num_iovec, __u8 *oplock) + unsigned int *num_iovec, u8 *lease_key, __u8 *oplock) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; - iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock); + iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = server->vals->create_lease_size; @@ -2172,7 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; else { - rc = add_lease_context(server, iov, &n_iov, oplock); + rc = add_lease_context(server, iov, &n_iov, + oparms->fid->lease_key, oplock); if (rc) { cifs_small_buf_release(req); kfree(copy_path); @@ -3720,7 +3730,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); + cifs_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; if (rc != 0) { diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 824dddeee3f2..a671adcc44a6 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -678,16 +678,14 @@ struct create_context { #define SMB2_LEASE_KEY_SIZE 16 struct lease_context { - __le64 LeaseKeyLow; - __le64 LeaseKeyHigh; + u8 LeaseKey[SMB2_LEASE_KEY_SIZE]; __le32 LeaseState; __le32 LeaseFlags; __le64 LeaseDuration; } __packed; struct lease_context_v2 { - __le64 LeaseKeyLow; - __le64 LeaseKeyHigh; + u8 LeaseKey[SMB2_LEASE_KEY_SIZE]; __le32 LeaseState; __le32 LeaseFlags; __le64 LeaseDuration; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 3ae208ac2a77..6e6a4f2ec890 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -113,8 +113,8 @@ extern int smb2_unlock_range(struct cifsFileInfo *cfile, extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); extern void smb2_reconnect_server(struct work_struct *work); extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server); -extern unsigned long -smb2_rqst_len(struct smb_rqst *rqst, bool skip_rfc1002_marker); +extern unsigned long smb_rqst_len(struct TCP_Server_Info *server, + struct smb_rqst *rqst); /* * SMB2 Worker functions - most of protocol specific implementation details diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 51b9437c3c7b..719d55e63d88 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -173,6 +173,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; + struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash; + struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); if (!ses) { @@ -190,21 +192,39 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } rc = crypto_shash_setkey(server->secmech.hmacsha256, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } - rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init sha256", __func__); return rc; } - rc = __cifs_calc_signature(rqst, server, sigptr, - &server->secmech.sdeschmacsha256->shash); + /* + * For SMB2+, __cifs_calc_signature() expects to sign only the actual + * data, that is, iov[0] should not contain a rfc1002 length. + * + * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to + * __cifs_calc_signature(). + */ + drqst = *rqst; + if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { + rc = crypto_shash_update(shash, iov[0].iov_base, + iov[0].iov_len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + return rc; + } + drqst.rq_iov++; + drqst.rq_nvec--; + } + rc = __cifs_calc_signature(&drqst, server, sigptr, shash); if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); @@ -408,12 +428,14 @@ generate_smb311signingkey(struct cifs_ses *ses) int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int rc = 0; + int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; + struct shash_desc *shash = &server->secmech.sdesccmacaes->shash; + struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); if (!ses) { @@ -425,8 +447,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); rc = crypto_shash_setkey(server->secmech.cmacaes, - ses->smb3signingkey, SMB2_CMACAES_SIZE); - + ses->smb3signingkey, SMB2_CMACAES_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); return rc; @@ -437,15 +458,33 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) * so unlike smb2 case we do not have to check here if secmech are * initialized */ - rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); + rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); return rc; } - rc = __cifs_calc_signature(rqst, server, sigptr, - &server->secmech.sdesccmacaes->shash); + /* + * For SMB2+, __cifs_calc_signature() expects to sign only the actual + * data, that is, iov[0] should not contain a rfc1002 length. + * + * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to + * __cifs_calc_signature(). + */ + drqst = *rqst; + if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { + rc = crypto_shash_update(shash, iov[0].iov_base, + iov[0].iov_len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + return rc; + } + drqst.rq_iov++; + drqst.rq_nvec--; + } + rc = __cifs_calc_signature(&drqst, server, sigptr, shash); if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); @@ -548,6 +587,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = le64_to_cpu(shdr->MessageId); temp->pid = current->pid; temp->command = shdr->Command; /* Always LE */ diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 6fd94d9ffac2..c55ea4e6201b 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -2083,8 +2083,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * rqst: the data to write * return value: 0 if successfully write, otherwise error code */ -int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) +int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) { + struct smbd_connection *info = server->smbd_conn; struct kvec vec; int nvecs; int size; @@ -2118,7 +2119,7 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and * ends at page boundary */ - buflen = smb2_rqst_len(rqst, true); + buflen = smb_rqst_len(server, rqst); if (buflen + sizeof(struct smbd_data_transfer) > info->max_fragmented_send_size) { diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 1e419c21dc60..a11096254f29 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -292,7 +292,7 @@ void smbd_destroy(struct smbd_connection *info); /* Interface for carrying upper layer I/O through send/recv */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); -int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst); +int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst); enum mr_state { MR_READY, @@ -332,7 +332,7 @@ static inline void *smbd_get_connection( static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } static inline void smbd_destroy(struct smbd_connection *info) {} static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } -static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; } +static inline int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) {return -1; } #endif #endif diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index fb57dfbfb749..a341ec839c83 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -61,6 +61,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -82,6 +83,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } +static void _cifs_mid_q_entry_release(struct kref *refcount) +{ + struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry, + refcount); + + mempool_free(mid, cifs_mid_poolp); +} + +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) +{ + spin_lock(&GlobalMid_Lock); + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); + spin_unlock(&GlobalMid_Lock); +} + void DeleteMidQEntry(struct mid_q_entry *midEntry) { @@ -110,7 +126,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } } #endif - mempool_free(midEntry, cifs_mid_poolp); + cifs_mid_q_entry_release(midEntry); } void @@ -202,14 +218,15 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, } unsigned long -smb2_rqst_len(struct smb_rqst *rqst, bool skip_rfc1002_marker) +smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) { unsigned int i; struct kvec *iov; int nvec; unsigned long buflen = 0; - if (skip_rfc1002_marker && rqst->rq_iov[0].iov_len == 4) { + if (server->vals->header_preamble_size == 0 && + rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; } else { @@ -260,7 +277,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, __be32 rfc1002_marker; if (cifs_rdma_enabled(server) && server->smbd_conn) { - rc = smbd_send(server->smbd_conn, rqst); + rc = smbd_send(server, rqst); goto smbd_done; } if (ssocket == NULL) @@ -271,7 +288,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, (char *)&val, sizeof(val)); for (j = 0; j < num_rqst; j++) - send_length += smb2_rqst_len(&rqst[j], true); + send_length += smb_rqst_len(server, &rqst[j]); rfc1002_marker = cpu_to_be32(send_length); /* Generate a rfc1002 marker for SMB2+ */ diff --git a/fs/dcache.c b/fs/dcache.c index 0e8e5de3c48a..ceb7b491d1b9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -358,14 +358,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - bool hashed = !d_unhashed(dentry); - if (hashed) - raw_write_seqcount_begin(&dentry->d_seq); + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - if (hashed) - raw_write_seqcount_end(&dentry->d_seq); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -1932,10 +1929,12 @@ struct dentry *d_make_root(struct inode *root_inode) if (root_inode) { res = d_alloc_anon(root_inode->i_sb); - if (res) + if (res) { + res->d_flags |= DCACHE_RCUACCESS; d_instantiate(res, root_inode); - else + } else { iput(root_inode); + } } return res; } diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 71fccccf317e..8c6ab6c95727 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, /* length of the variable name itself: remove GUID and separator */ namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; - uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + if (err) + goto out; if (efivar_variable_is_removable(var->var.VendorGuid, dentry->d_name.name, namelen)) diff --git a/fs/exec.c b/fs/exec.c index 2d4e0075bd24..bdd0eacefdf5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,15 +290,15 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; + vma_set_anonymous(vma); if (down_write_killable(&mm->mmap_sem)) { err = -EINTR; goto err_free; } - vma->vm_mm = mm; /* * Place the stack at the largest stack address the architecture @@ -311,7 +311,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) @@ -326,7 +325,7 @@ err: up_write(&mm->mmap_sem); err_free: bprm->vma = NULL; - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return err; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b00481c475cb..aa52d87985aa 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -184,7 +184,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, unsigned int bit, bit_max; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - int flex_bg = 0; J_ASSERT_BH(bh, buffer_locked(bh)); @@ -207,22 +206,19 @@ static int ext4_init_block_bitmap(struct super_block *sb, start = ext4_group_first_block_no(sb, block_group); - if (ext4_has_feature_flex_bg(sb)) - flex_bg = 1; - /* Set bits for block and inode bitmaps, and inode table */ tmp = ext4_block_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_table(sb, gdp); for (; tmp < ext4_inode_table(sb, gdp) + sbi->s_itb_per_group; tmp++) { - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); } @@ -372,6 +368,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, desc, bh))) { ext4_unlock_group(sb, block_group); @@ -390,6 +388,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } @@ -442,7 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) goto verify; } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Block bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0b127853c584..7c7123f265c2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1114,6 +1114,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ @@ -1507,11 +1508,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || - ino == EXT4_USR_QUOTA_INO || - ino == EXT4_GRP_QUOTA_INO || - ino == EXT4_BOOT_LOADER_INO || - ino == EXT4_JOURNAL_INO || - ino == EXT4_RESIZE_INO || (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } @@ -3018,9 +3014,6 @@ extern int ext4_inline_data_fiemap(struct inode *inode, struct iomap; extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap); -extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 98fb0c119c68..adf6668b596f 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -91,6 +91,7 @@ struct ext4_extent_header { }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) +#define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0057fe3f248d..8ce6fd5b10dd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -869,6 +869,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, eh = ext_inode_hdr(inode); depth = ext_depth(inode); + if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) { + EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d", + depth); + ret = -EFSCORRUPTED; + goto err; + } if (path) { ext4_ext_drop_refs(path); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f525f909b559..f336cbc6e932 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -90,6 +90,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; blk = ext4_inode_bitmap(sb, desc); if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, EXT4_INODES_PER_GROUP(sb) / 8)) { @@ -101,6 +103,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSBADCRC; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } @@ -150,7 +153,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Inode bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); @@ -994,7 +1006,8 @@ got: /* recheck and clear flag under lock if we still need to */ ext4_lock_group(sb, group); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); @@ -1375,7 +1388,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ext4_itable_unused_count(sb, gdp)), sbi->s_inodes_per_block); - if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { + if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || + ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp)) < + EXT4_FIRST_INO(sb)))) { ext4_error(sb, "Something is wrong with group %u: " "used itable blocks: %d; " "itable unused count: %u", diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 285ed1588730..3543fe80a3c4 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -437,6 +437,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); + memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || @@ -681,6 +682,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, goto convert; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out; + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); @@ -709,7 +714,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: - if (handle) + if (handle && (ret != 1)) ext4_journal_stop(handle); brelse(iloc.bh); return ret; @@ -751,6 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); + mark_inode_dirty(inode); out: return copied; } @@ -886,18 +892,17 @@ retry_journal: flags |= AOP_FLAG_NOFS; if (ret == -ENOSPC) { + ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); - ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; goto out; } - page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; @@ -915,6 +920,9 @@ retry_journal: if (ret < 0) goto out_release_page; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out_release_page; up_read(&EXT4_I(inode)->xattr_sem); *pagep = page; @@ -935,7 +943,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int i_size_changed = 0; int ret; ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -953,10 +960,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * But it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. */ - if (pos+copied > inode->i_size) { + if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); - i_size_changed = 1; - } unlock_page(page); put_page(page); @@ -966,8 +971,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) - mark_inode_dirty(inode); + mark_inode_dirty(inode); return copied; } @@ -1890,42 +1894,6 @@ out: return (error < 0 ? error : 0); } -/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); -out: - brelse(iloc.bh); - return error; -} - int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2ea07efbe016..4efe77286ecd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -402,9 +402,9 @@ static int __check_block_validity(struct inode *inode, const char *func, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, - "lblock %lu mapped to illegal pblock " + "lblock %lu mapped to illegal pblock %llu " "(length %d)", (unsigned long) map->m_lblk, - map->m_len); + map->m_pblk, map->m_len); return -EFSCORRUPTED; } return 0; @@ -1389,9 +1389,10 @@ static int ext4_write_end(struct file *file, loff_t old_size = inode->i_size; int ret = 0, ret2; int i_size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_write_end(inode, pos, len, copied); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1419,7 +1420,7 @@ static int ext4_write_end(struct file *file, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) + if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && ext4_can_truncate(inode)) @@ -1493,6 +1494,7 @@ static int ext4_journalled_write_end(struct file *file, int partial = 0; unsigned from, to; int size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); @@ -1500,7 +1502,7 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1531,7 +1533,7 @@ static int ext4_journalled_write_end(struct file *file, if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); - if (size_changed) { + if (size_changed || inline_data) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -2028,11 +2030,7 @@ static int __ext4_journalled_writepage(struct page *page, } if (inline_data) { - BUFFER_TRACE(inode_bh, "get write access"); - ret = ext4_journal_get_write_access(handle, inode_bh); - - err = ext4_handle_dirty_metadata(handle, inode, inode_bh); - + ret = ext4_mark_inode_dirty(handle, inode); } else { ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); @@ -4506,7 +4504,8 @@ static int __ext4_get_inode_loc(struct inode *inode, int inodes_per_block, inode_offset; iloc->bh = NULL; - if (!ext4_valid_inum(sb, inode->i_ino)) + if (inode->i_ino < EXT4_ROOT_INO || + inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) return -EFSCORRUPTED; iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6eae2b91aafa..f7ab34088162 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2423,7 +2423,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, * initialize bb_free to be able to skip * empty groups without initialization */ - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { meta_group_info[i]->bb_free = ext4_free_clusters_after_init(sb, group, desc); } else { @@ -2989,7 +2990,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, #endif ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 27b9a76a0dfa..638ad4743477 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -186,11 +186,8 @@ static int kmmpd(void *data) goto exit_thread; } - if (sb_rdonly(sb)) { - ext4_warning(sb, "kmmpd being stopped since filesystem " - "has been remounted as readonly."); - goto exit_thread; - } + if (sb_rdonly(sb)) + break; diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0c4c2201b3aa..b7f7922061be 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -405,6 +405,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) static void ext4_handle_error(struct super_block *sb) { + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + if (sb_rdonly(sb)) return; @@ -740,6 +743,9 @@ __acquires(bitlock) va_end(args); } + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); return; @@ -1371,7 +1377,8 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, - Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, + Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, + Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, @@ -1438,6 +1445,8 @@ static const match_table_t tokens = { {Opt_dax, "dax"}, {Opt_stripe, "stripe=%u"}, {Opt_delalloc, "delalloc"}, + {Opt_warn_on_error, "warn_on_error"}, + {Opt_nowarn_on_error, "nowarn_on_error"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"}, @@ -1602,6 +1611,8 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_CLEAR}, + {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, + {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, @@ -2331,6 +2342,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0); ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -2363,6 +2375,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (block_bitmap >= sb_block + 1 && + block_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Block bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " @@ -2377,6 +2397,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_bitmap >= sb_block + 1 && + inode_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " @@ -2391,6 +2419,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_table >= sb_block + 1 && + inode_table <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode table for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -3097,6 +3133,9 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *gdp = NULL; + if (!ext4_has_group_desc_csum(sb)) + return ngroups; + for (group = 0; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) @@ -3742,6 +3781,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) le32_to_cpu(es->s_log_block_size)); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, @@ -3806,6 +3852,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } else { sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { @@ -3882,13 +3933,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = @@ -3909,10 +3953,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } else { if (clustersize != blocksize) { - ext4_warning(sb, "fragment/cluster size (%d) != " - "block size (%d)", clustersize, - blocksize); - clustersize = blocksize; + ext4_msg(sb, KERN_ERR, + "fragment/cluster size (%d) != " + "block size (%d)", clustersize, blocksize); + goto failed_mount; } if (sbi->s_blocks_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, @@ -3966,6 +4010,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_blocks_count(es)); goto failed_mount; } + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && + (sbi->s_cluster_ratio == 1)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block is 0 with a 1k block and cluster size"); + goto failed_mount; + } + blocks_count = (ext4_blocks_count(es) - le32_to_cpu(es->s_first_data_block) + EXT4_BLOCKS_PER_GROUP(sb) - 1); @@ -4001,6 +4052,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } bgl_lock_init(sbi->s_blockgroup_lock); @@ -4020,14 +4079,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } + sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ret = -EFSCORRUPTED; goto failed_mount2; } - sbi->s_gdb_count = db_count; - timer_setup(&sbi->s_err_report, print_daily_error_info, 0); /* Register extent status tree shrinker */ @@ -4736,6 +4794,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error; + + /* + * The superblock bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(sbh)) + return error; + /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock @@ -5140,6 +5206,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); } else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) || diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fc4ced59c565..723df14f4084 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -230,12 +230,12 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, { int error = -EFSCORRUPTED; - if (buffer_verified(bh)) - return 0; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) goto errout; + if (buffer_verified(bh)) + return 0; + error = -EFSBADCRC; if (!ext4_xattr_block_csum_verify(inode, bh)) goto errout; @@ -1560,7 +1560,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, handle_t *handle, struct inode *inode, bool is_block) { - struct ext4_xattr_entry *last; + struct ext4_xattr_entry *last, *next; struct ext4_xattr_entry *here = s->here; size_t min_offs = s->end - s->base, name_len = strlen(i->name); int in_inode = i->in_inode; @@ -1595,7 +1595,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Compute min_offs and last. */ last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + for (; !IS_LAST_ENTRY(last); last = next) { + next = EXT4_XATTR_NEXT(last); + if ((void *)next >= s->end) { + EXT4_ERROR_INODE(inode, "corrupted xattr entries"); + ret = -EFSCORRUPTED; + goto out; + } if (!last->e_value_inum && last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) @@ -2206,23 +2212,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) { - if (error == -ENOSPC && - ext4_has_inline_data(inode)) { - error = ext4_try_to_evict_inline_data(handle, inode, - EXT4_XATTR_LEN(strlen(i->name) + - EXT4_XATTR_SIZE(i->value_len))); - if (error) - return error; - error = ext4_xattr_ibody_find(inode, i, is); - if (error) - return error; - error = ext4_xattr_set_entry(i, s, handle, inode, - false /* is_block */); - } - if (error) - return error; - } + if (error) + return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); @@ -2651,6 +2642,11 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode, last = IFIRST(header); /* Find the entry best suited to be pushed into EA block */ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + /* never move system.data out of the inode */ + if ((last->e_name_len == 4) && + (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) && + !memcmp(last->e_name, "data", 4)) + continue; total_size = EXT4_XATTR_LEN(last->e_name_len); if (!last->e_value_inum) total_size += EXT4_XATTR_SIZE( diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 065dc919a0ce..bfd589ea74c0 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -707,13 +707,21 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -1132,7 +1140,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1289,8 +1297,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1881,8 +1888,7 @@ out_fail: iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index c184c5a356ff..cdcb376ef8df 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -220,6 +220,7 @@ int fscache_add_cache(struct fscache_cache *cache, { struct fscache_cache_tag *tag; + ASSERTCMP(ifsdef->cookie, ==, &fscache_fsdef_index); BUG_ON(!cache->ops); BUG_ON(!ifsdef); @@ -248,7 +249,6 @@ int fscache_add_cache(struct fscache_cache *cache, if (!cache->kobj) goto error; - ifsdef->cookie = &fscache_fsdef_index; ifsdef->cache = cache; cache->fsdef = ifsdef; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 97137d7ec5ee..83bfe04456b6 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -516,6 +516,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, goto error; } + ASSERTCMP(object->cookie, ==, cookie); fscache_stat(&fscache_n_object_alloc); object->debug_id = atomic_inc_return(&fscache_object_debug_id); @@ -571,6 +572,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie, _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); + ASSERTCMP(object->cookie, ==, cookie); + spin_lock(&cookie->lock); /* there may be multiple initial creations of this object, but we only @@ -610,9 +613,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, spin_unlock(&cache->object_list_lock); } - /* attach to the cookie */ - object->cookie = cookie; - fscache_cookie_get(cookie, fscache_cookie_get_attach_object); + /* Attach to the cookie. The object already has a ref on it. */ hlist_add_head(&object->cookie_link, &cookie->backing_objects); fscache_objlist_add(object); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 20e0d0a4dc8c..9edc920f651f 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -327,6 +327,7 @@ void fscache_object_init(struct fscache_object *object, object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; + fscache_cookie_get(cookie, fscache_cookie_get_attach_object); object->parent = NULL; #ifdef CONFIG_FSCACHE_OBJECT_LIST RB_CLEAR_NODE(&object->objlist_link); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e30c5975ea58..8d265790374c 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -70,7 +70,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERT(fscache_object_is_available(op->object)); ASSERTCMP(atomic_read(&op->usage), >, 0); - ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, + op->state, ==, FSCACHE_OP_ST_CANCELLED); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -499,7 +500,8 @@ void fscache_put_operation(struct fscache_operation *op) struct fscache_cache *cache; _enter("{OBJ%x OP%x,%d}", - op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + op->object ? op->object->debug_id : 0, + op->debug_id, atomic_read(&op->usage)); ASSERTCMP(atomic_read(&op->usage), >, 0); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d508c7844681..40d4c66c7751 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -411,6 +411,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, bool truncate_op = (lend == LLONG_MAX); memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, current->mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pagevec_init(&pvec); next = start; @@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, * as input to create an allocation policy. */ memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; diff --git a/fs/inode.c b/fs/inode.c index 2c300e981796..8c86c809ca17 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1999,8 +1999,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_uid = current_fsuid(); if (dir && dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; + + /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(dir, CAP_FSETID)) + mode &= ~S_ISGID; } else inode->i_gid = current_fsgid(); inode->i_mode = mode; diff --git a/fs/internal.h b/fs/internal.h index 980d005b21b4..5645b4ebf494 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -127,7 +127,6 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); -extern struct file *filp_clone_open(struct file *); /* * inode.c diff --git a/fs/iomap.c b/fs/iomap.c index 77397b5a96ef..0d0bd8845586 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1443,7 +1443,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops) { struct inode *inode = mapping->host; - loff_t pos = bno >> inode->i_blkbits; + loff_t pos = bno << inode->i_blkbits; unsigned blocksize = i_blocksize(inode); if (filemap_write_and_wait(mapping)) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 51dd68e67b0f..c0b66a7a795b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1361,6 +1361,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) if (jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata) { jbd_lock_bh_state(bh); + if (jh->b_transaction == transaction && + jh->b_jlist != BJ_Metadata) + pr_err("JBD2: assertion failure: h_type=%u " + "h_line_no=%u block_no=%llu jlist=%u\n", + handle->h_type, handle->h_line_no, + (unsigned long long) bh->b_blocknr, + jh->b_jlist); J_ASSERT_JH(jh, jh->b_transaction != transaction || jh->b_jlist == BJ_Metadata); jbd_unlock_bh_state(bh); @@ -1380,11 +1387,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * of the transaction. This needs to be done * once a transaction -bzzz */ - jh->b_modified = 1; if (handle->h_buffer_credits <= 0) { ret = -ENOSPC; goto out_unlock_bh; } + jh->b_modified = 1; handle->h_buffer_credits--; } diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 395c4c0d0f06..1682a87c00b2 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -115,6 +115,13 @@ struct dinode { dxd_t _dxd; /* 16: */ union { __le32 _rdev; /* 4: */ + /* + * The fast symlink area + * is expected to overflow + * into _inlineea when + * needed (which will clear + * INLINEEA). + */ u8 _fastsymlink[128]; } _u; u8 _inlineea[128]; diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1f26d1910409..9940a1e04cbf 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -87,6 +87,7 @@ struct jfs_inode_info { struct { unchar _unused[16]; /* 16: */ dxd_t _dxd; /* 16: */ + /* _inline may overflow into _inline_ea when needed */ unchar _inline[128]; /* 128: inline symlink */ /* _inline_ea may overlay the last part of * file._xtroot if maxentry = XTROOTINITSLOT diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 1b9264fd54b6..f08571433aba 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -967,8 +967,7 @@ static int __init init_jfs_fs(void) jfs_inode_cachep = kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - offsetof(struct jfs_inode_info, i_inline), - sizeof_field(struct jfs_inode_info, i_inline), + offsetof(struct jfs_inode_info, i_inline), IDATASIZE, init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/namespace.c b/fs/namespace.c index 8ddd14806799..bd2f4c68506a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6dd146885da9..f6c4ccd693f4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6466,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) - break; + goto out_restart; } - if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - goto out_done; - } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) - goto out_done; - + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_restart; break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (nfs4_stateid_match(&data->arg.open_stateid, + if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - goto out_done; - } else if (nfs4_stateid_match(&data->arg.lock_stateid, + goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - goto out_done; + goto out_restart; } - if (!data->cancelled) - rpc_restart_call_prepare(task); out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + return; +out_restart: + if (!data->cancelled) + rpc_restart_call_prepare(task); + goto out_done; } static void nfs4_lock_release(void *calldata) @@ -6502,7 +6502,7 @@ static void nfs4_lock_release(void *calldata) dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); - if (data->cancelled) { + if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e9679016271f..dfd73a4616ce 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); SEQ_PUT_DEC(" kB\nSwapPss: ", mss->swap_pss >> PSS_SHIFT); - SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nLocked: ", + mss->pss_locked >> PSS_SHIFT); seq_puts(m, " kB\n"); } if (!rollup_mode) { diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 7e288d97adcb..9fed1c05f1f4 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key) } /* %k */ -static void sprintf_le_key(char *buf, struct reiserfs_key *key) +static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + le32_to_cpu(key->k_dir_id), + le32_to_cpu(key->k_objectid), le_offset(key), + le_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } /* %K */ -static void sprintf_cpu_key(char *buf, struct cpu_key *key) +static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), - cpu_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, + reiserfs_cpu_offset(key), cpu_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) +static int scnprintf_de_head(char *buf, size_t size, + struct reiserfs_de_head *deh) { if (deh) - sprintf(buf, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), - deh_location(deh), deh_state(deh)); + return scnprintf(buf, size, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), + deh_objectid(deh), deh_location(deh), + deh_state(deh)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_item_head(char *buf, struct item_head *ih) +static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) { if (ih) { - strcpy(buf, - (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); - sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + char *p = buf; + char * const end = buf + size; + + p += scnprintf(p, end - p, "%s", + (ih_version(ih) == KEY_FORMAT_3_6) ? + "*3.6* " : "*3.5*"); + + p += scnprintf_le_key(p, end - p, &ih->ih_key); + + p += scnprintf(p, end - p, + ", item_len %d, item_location %d, free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), + ih_free_space(ih)); + return p - buf; } else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) +static int scnprintf_direntry(char *buf, size_t size, + struct reiserfs_dir_entry *de) { char name[20]; memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); + return scnprintf(buf, size, "\"%s\"==>[%d %d]", + name, de->de_dir_id, de->de_objectid); } -static void sprintf_block_head(char *buf, struct buffer_head *bh) +static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); + return scnprintf(buf, size, + "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } -static void sprintf_buffer_head(char *buf, struct buffer_head *bh) +static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, - "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bh->b_bdev, bh->b_size, - (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); + return scnprintf(buf, size, + "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + bh->b_bdev, bh->b_size, + (unsigned long long)bh->b_blocknr, + atomic_read(&(bh->b_count)), + bh->b_state, bh->b_page, + buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty(bh) ? "DIRTY" : "CLEAN", + buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } -static void sprintf_disk_child(char *buf, struct disk_child *dc) +static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) { - sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), - dc_size(dc)); + return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", + dc_block_number(dc), dc_size(dc)); } static char *is_there_reiserfs_struct(char *fmt, int *what) @@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args) char *fmt1 = fmt_buf; char *k; char *p = error_buf; + char * const end = &error_buf[sizeof(error_buf)]; int what; spin_lock(&error_lock); - strcpy(fmt1, fmt); + if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { + strscpy(error_buf, "format string too long", end - error_buf); + goto out_unlock; + } while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { *k = 0; - p += vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); switch (what) { case 'k': - sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); + p += scnprintf_le_key(p, end - p, + va_arg(args, struct reiserfs_key *)); break; case 'K': - sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + p += scnprintf_cpu_key(p, end - p, + va_arg(args, struct cpu_key *)); break; case 'h': - sprintf_item_head(p, va_arg(args, struct item_head *)); + p += scnprintf_item_head(p, end - p, + va_arg(args, struct item_head *)); break; case 't': - sprintf_direntry(p, - va_arg(args, - struct reiserfs_dir_entry *)); + p += scnprintf_direntry(p, end - p, + va_arg(args, struct reiserfs_dir_entry *)); break; case 'y': - sprintf_disk_child(p, - va_arg(args, struct disk_child *)); + p += scnprintf_disk_child(p, end - p, + va_arg(args, struct disk_child *)); break; case 'z': - sprintf_block_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_block_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'b': - sprintf_buffer_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_buffer_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'a': - sprintf_de_head(p, - va_arg(args, - struct reiserfs_de_head *)); + p += scnprintf_de_head(p, end - p, + va_arg(args, struct reiserfs_de_head *)); break; } - p += strlen(p); fmt1 = k + 2; } - vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); +out_unlock: spin_unlock(&error_lock); } diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2751476e6b6e..f098b9f1c396 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, } if (compressed) { + if (!msblk->stream) + goto read_failure; length = squashfs_decompress(msblk, bh, b, offset, length, output); if (length < 0) diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 23813c078cc9..0839efa720b3 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); + if (unlikely(length < 0)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 13d80947bf9e..f1c1430ae721 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n, } for (i = 0; i < blocks; i++) { - int size = le32_to_cpu(blist[i]); + int size = squashfs_block_size(blist[i]); + if (size < 0) { + err = size; + goto failure; + } block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); } n -= blocks; @@ -367,7 +371,24 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) sizeof(size)); if (res < 0) return res; - return le32_to_cpu(size); + return squashfs_block_size(size); +} + +void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) +{ + int copied; + void *pageaddr; + + pageaddr = kmap_atomic(page); + copied = squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + copied, 0, PAGE_SIZE - copied); + kunmap_atomic(pageaddr); + + flush_dcache_page(page); + if (copied == avail) + SetPageUptodate(page); + else + SetPageError(page); } /* Copy data into page cache */ @@ -376,7 +397,6 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - void *pageaddr; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = page->index & ~mask, end_index = start_index | mask; @@ -402,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, if (PageUptodate(push_page)) goto skip_page; - pageaddr = kmap_atomic(push_page); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(push_page); - SetPageUptodate(push_page); + squashfs_fill_page(push_page, buffer, offset, avail); skip_page: unlock_page(push_page); if (i != page->index) @@ -416,10 +431,9 @@ skip_page: } /* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page) +static int squashfs_readpage_fragment(struct page *page, int expected) { struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -430,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, i_size_read(inode) & - (msblk->block_size - 1), + squashfs_copy_cache(page, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); return res; } -static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +static int squashfs_readpage_sparse(struct page *page, int expected) { - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - - squashfs_copy_cache(page, NULL, bytes, 0); + squashfs_copy_cache(page, NULL, expected, 0); return 0; } @@ -456,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page) struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int index = page->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; + int expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; int res; void *pageaddr; @@ -474,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page) goto error_out; if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + res = squashfs_readpage_sparse(page, expected); else - res = squashfs_readpage_block(page, block, bsize); + res = squashfs_readpage_block(page, block, bsize, expected); } else - res = squashfs_readpage_fragment(page); + res = squashfs_readpage_fragment(page, expected); if (!res) return 0; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index f2310d2a2019..a9ba8d96776a 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -20,7 +20,7 @@ #include "squashfs.h" /* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) +int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) { struct inode *i = page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, @@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize) ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, buffer->length, 0); + squashfs_copy_cache(page, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index cb485d8e0e91..80db1b86a27c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -21,10 +21,11 @@ #include "page_actor.h" static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); + int pages, struct page **page, int bytes); /* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + int expected) { struct inode *inode = target_page->mapping->host; @@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) * using an intermediate buffer. */ res = squashfs_read_cache(target_page, block, bsize, pages, - page); + page, expected); if (res < 0) goto mark_errored; @@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) if (res < 0) goto mark_errored; + if (res != expected) { + res = -EIO; + goto mark_errored; + } + /* Last page may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (bytes) { @@ -138,13 +144,12 @@ out: static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) + int pages, struct page **page, int bytes) { struct inode *i = target_page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; + int res = buffer->error, n, offset = 0; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, @@ -159,12 +164,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, if (page[n] == NULL) continue; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); + squashfs_fill_page(page[n], buffer, offset, avail); unlock_page(page[n]); if (page[n] != target_page) put_page(page[n]); diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 0ed6edbc5c71..0681feab4a84 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int block = SQUASHFS_FRAGMENT_INDEX(fragment); - int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); - u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + int block, offset, size; struct squashfs_fragment_entry fragment_entry; - int size; + u64 start_block; + + if (fragment >= msblk->fragments) + return -EIO; + block = SQUASHFS_FRAGMENT_INDEX(fragment); + offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + + start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); @@ -61,9 +66,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, return size; *fragment_block = le64_to_cpu(fragment_entry.start_block); - size = le32_to_cpu(fragment_entry.size); - - return size; + return squashfs_block_size(fragment_entry.size); } diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080..f89f8a74c6ce 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -67,11 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); /* file.c */ +void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +extern int squashfs_readpage_block(struct page *, u64, int, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 24d12fd14177..4e6853f084d0 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -129,6 +129,12 @@ #define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) +static inline int squashfs_block_size(__le32 raw) +{ + u32 size = le32_to_cpu(raw); + return (size >> 25) ? -EIO : size; +} + /* * Inode number ops. Inodes consist of a compressed block number, and an * uncompressed offset within that block diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..ef69c31947bf 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -75,6 +75,7 @@ struct squashfs_sb_info { unsigned short block_log; long long bytes_used; unsigned int inodes; + unsigned int fragments; int xattr_ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 8a73b97217c8..40e657386fa5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); + msblk->fragments = le32_to_cpu(sblk->fragments); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); - TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); @@ -272,7 +273,7 @@ allocate_id_index_table: sb->s_export_op = &squashfs_export_ops; handle_fragments: - fragments = le32_to_cpu(sblk->fragments); + fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 123bf7d516fc..bad9cea37f12 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -222,24 +222,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, unsigned long reason) { struct mm_struct *mm = ctx->mm; - pte_t *pte; + pte_t *ptep, pte; bool ret = true; VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); - if (!pte) + ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); + + if (!ptep) goto out; ret = false; + pte = huge_ptep_get(ptep); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ - if (huge_pte_none(*pte)) + if (huge_pte_none(pte)) ret = true; - if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP)) + if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) ret = true; out: return ret; @@ -631,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx); diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index eef466260d43..75dbdc14c45f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -223,12 +223,13 @@ xfs_alloc_get_rec( error = xfs_btree_get_rec(cur, &rec, stat); if (error || !(*stat)) return error; - if (rec->alloc.ar_blockcount == 0) - goto out_bad_rec; *bno = be32_to_cpu(rec->alloc.ar_startblock); *len = be32_to_cpu(rec->alloc.ar_blockcount); + if (*len == 0) + goto out_bad_rec; + /* check for valid extent range, including overflow */ if (!xfs_verify_agbno(mp, agno, *bno)) goto out_bad_rec; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 33dc34655ac3..30d1d60f1d46 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -731,7 +731,8 @@ xfs_inode_validate_extsize( if ((hint_flag || inherit_flag) && extsize == 0) return __this_address; - if (!(hint_flag || inherit_flag) && extsize != 0) + /* free inodes get flags set to zero but extsize remains */ + if (mode && !(hint_flag || inherit_flag) && extsize != 0) return __this_address; if (extsize_bytes % blocksize_bytes) @@ -777,7 +778,8 @@ xfs_inode_validate_cowextsize( if (hint_flag && cowextsize == 0) return __this_address; - if (!hint_flag && cowextsize != 0) + /* free inodes get flags set to zero but cowextsize remains */ + if (mode && !hint_flag && cowextsize != 0) return __this_address; if (hint_flag && rt_flag) |