diff options
author | Olof Johansson <olof@lixom.net> | 2013-07-12 21:59:39 +0400 |
---|---|---|
committer | Olof Johansson <olof@lixom.net> | 2013-07-12 21:59:39 +0400 |
commit | f4b96f5e4ff8d86699c851c10245e102809b0331 (patch) | |
tree | f766102263bed71738431cabb4d4f6f086005cd8 /fs | |
parent | 9d8812df35be58a5da0c44182c1e4ba2507cc6a7 (diff) | |
parent | c24a6ae18abde53b048372b066b93b71b1b91154 (diff) | |
download | linux-f4b96f5e4ff8d86699c851c10245e102809b0331.tar.xz |
Merge tag 'omap-for-v3.11/fixes-for-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
Omap fixes and minor defconfig updates that would be good to
get in before -rc1.
* tag 'omap-for-v3.11/fixes-for-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap:
ARM: OMAP2+: omap2plus_defconfig: Enable appended DTB support
ARM: OMAP2+: Enable TI_EDMA in omap2plus_defconfig
ARM: OMAP2+: omap2plus_defconfig: enable DRA752 thermal support by default
ARM: OMAP2+: omap2plus_defconfig: enable TI bandgap driver
ARM: OMAP2+: devices: remove duplicated include from devices.c
ARM: OMAP3: igep0020: Set DSS pins in correct mux mode.
ARM: OMAP2+: N900: enable N900-specific drivers even if device tree is enabled
ARM: OMAP2+: Cocci spatch "ptr_ret.spatch"
ARM: OMAP2+: Remove obsolete Makefile line
ARM: OMAP5: Enable Cortex A15 errata 798181
ARM: scu: provide inline dummy functions when SCU is not present
ARM: OMAP4: sleep: build OMAP4 specific functions only for OMAP4
ARM: OMAP2+: timer: initialize before using oh_name
Signed-off-by: Olof Johansson <olof@lixom.net>
Add/move/change conflicts in arch/arm/mach-omap2/Kconfig resolved.
Diffstat (limited to 'fs')
135 files changed, 2896 insertions, 1577 deletions
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index ade28bb058e3..0d138c0de293 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -191,8 +191,7 @@ const struct file_operations adfs_dir_operations = { }; static int -adfs_hash(const struct dentry *parent, const struct inode *inode, - struct qstr *qstr) +adfs_hash(const struct dentry *parent, struct qstr *qstr) { const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; const unsigned char *name; @@ -228,8 +227,7 @@ adfs_hash(const struct dentry *parent, const struct inode *inode, * requirements of the underlying filesystem. */ static int -adfs_compare(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +adfs_compare(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { int i; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index ff65884a7839..c36cbb4537a2 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -13,18 +13,12 @@ typedef int (*toupper_t)(int); static int affs_toupper(int ch); -static int affs_hash_dentry(const struct dentry *, - const struct inode *, struct qstr *); -static int affs_compare_dentry(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int affs_hash_dentry(const struct dentry *, struct qstr *); +static int affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); static int affs_intl_toupper(int ch); -static int affs_intl_hash_dentry(const struct dentry *, - const struct inode *, struct qstr *); -static int affs_intl_compare_dentry(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int affs_intl_hash_dentry(const struct dentry *, struct qstr *); +static int affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); const struct dentry_operations affs_dentry_operations = { @@ -86,14 +80,12 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper) } static int -affs_hash_dentry(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) { return __affs_hash_dentry(qstr, affs_toupper); } static int -affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) { return __affs_hash_dentry(qstr, affs_intl_toupper); } @@ -131,15 +123,13 @@ static inline int __affs_compare_dentry(unsigned int len, } static int -affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return __affs_compare_dentry(len, str, name, affs_toupper); } static int -affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return __affs_compare_dentry(len, str, name, affs_intl_toupper); diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 2497bf306c70..a8cf2cff836c 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) */ static int afs_do_setlk(struct file *file, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); + struct inode *inode = file_inode(file); + struct afs_vnode *vnode = AFS_FS_I(inode); afs_lock_type_t type; struct key *key = file->private_data; int ret; @@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; - lock_flocks(); + spin_lock(&inode->i_lock); /* make sure we've got a callback on this file and that our view of the * data version is up to date */ @@ -420,7 +421,7 @@ given_lock: afs_vnode_fetch_status(vnode, NULL, key); error: - unlock_flocks(); + spin_unlock(&inode->i_lock); _leave(" = %d", ret); return ret; @@ -39,6 +39,8 @@ #include <asm/kmap_types.h> #include <asm/uaccess.h> +#include "internal.h" + #define AIO_RING_MAGIC 0xa10a10a1 #define AIO_RING_COMPAT_FEATURES 1 #define AIO_RING_INCOMPAT_FEATURES 0 @@ -623,7 +625,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) /* * Add a completion event to the ring buffer. Must be done holding - * ctx->ctx_lock to prevent other code from messing with the tail + * ctx->completion_lock to prevent other code from messing with the tail * pointer since we might be called from irq context. */ spin_lock_irqsave(&ctx->completion_lock, flags); diff --git a/fs/block_dev.c b/fs/block_dev.c index 2091db8cdd78..bb43ce081d6e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -325,31 +325,10 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, static loff_t block_llseek(struct file *file, loff_t offset, int whence) { struct inode *bd_inode = file->f_mapping->host; - loff_t size; loff_t retval; mutex_lock(&bd_inode->i_mutex); - size = i_size_read(bd_inode); - - retval = -EINVAL; - switch (whence) { - case SEEK_END: - offset += size; - break; - case SEEK_CUR: - offset += file->f_pos; - case SEEK_SET: - break; - default: - goto out; - } - if (offset >= 0 && offset <= size) { - if (offset != file->f_pos) { - file->f_pos = offset; - } - retval = offset; - } -out: + retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); mutex_unlock(&bd_inode->i_mutex); return retval; } @@ -1583,6 +1562,7 @@ static const struct address_space_operations def_blk_aops = { .writepages = generic_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, + .is_dirty_writeback = buffer_check_dirty_writeback, }; const struct file_operations def_blk_fops = { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4205ba752d40..89da56a58b63 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2425,20 +2425,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) } } - if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { - offset = -EINVAL; - goto out; - } - if (offset > inode->i_sb->s_maxbytes) { - offset = -EINVAL; - goto out; - } - - /* Special lock needed here? */ - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out: mutex_unlock(&inode->i_mutex); return offset; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f81d67cdc8d..cd7e96c73cb7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3881,7 +3881,7 @@ drop_write: static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_quota_rescan_args *qsa; int ret; @@ -3914,7 +3914,7 @@ drop_write: static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_quota_rescan_args *qsa; int ret = 0; @@ -4020,7 +4020,7 @@ out: static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; const char *label = root->fs_info->super_copy->label; size_t len = strnlen(label, BTRFS_LABEL_SIZE); int ret; @@ -4039,7 +4039,7 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_super_block *super_block = root->fs_info->super_copy; struct btrfs_trans_handle *trans; char label[BTRFS_LABEL_SIZE]; diff --git a/fs/buffer.c b/fs/buffer.c index f93392e2df12..4d7433534f5c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh) EXPORT_SYMBOL(unlock_buffer); /* + * Returns if the page has dirty or writeback buffers. If all the buffers + * are unlocked and clean then the PageDirty information is stale. If + * any of the pages are locked, it is assumed they are locked for IO. + */ +void buffer_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + struct buffer_head *head, *bh; + *dirty = false; + *writeback = false; + + BUG_ON(!PageLocked(page)); + + if (!page_has_buffers(page)) + return; + + if (PageWriteback(page)) + *writeback = true; + + head = page_buffers(page); + bh = head; + do { + if (buffer_locked(bh)) + *writeback = true; + + if (buffer_dirty(bh)) + *dirty = true; + + bh = bh->b_this_page; + } while (bh != head); +} +EXPORT_SYMBOL(buffer_check_dirty_writeback); + +/* * Block until a buffer comes unlocked. This doesn't stop it * from becoming locked again - you have to lock it yourself * if you want to preserve its state. diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 317f9ee9c991..ebaff368120d 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -12,6 +12,7 @@ #include <linux/mount.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/swap.h> #include "internal.h" /* @@ -227,8 +228,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) */ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, struct fscache_retrieval *op, - struct page *netpage, - struct pagevec *pagevec) + struct page *netpage) { struct cachefiles_one_read *monitor; struct address_space *bmapping; @@ -237,8 +237,6 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, _enter(""); - pagevec_reinit(pagevec); - _debug("read back %p{%lu,%d}", netpage, netpage->index, page_count(netpage)); @@ -283,9 +281,7 @@ installed_new_backing_page: backpage = newpage; newpage = NULL; - page_cache_get(backpage); - pagevec_add(pagevec, backpage); - __pagevec_lru_add_file(pagevec); + lru_cache_add_file(backpage); read_backing_page: ret = bmapping->a_ops->readpage(NULL, backpage); @@ -452,8 +448,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, if (block) { /* submit the apparently valid page to the backing fs to be * read from disk */ - ret = cachefiles_read_backing_file_one(object, op, page, - &pagevec); + ret = cachefiles_read_backing_file_one(object, op, page); } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ fscache_mark_page_cached(op, page); @@ -482,14 +477,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, { struct cachefiles_one_read *monitor = NULL; struct address_space *bmapping = object->backer->d_inode->i_mapping; - struct pagevec lru_pvec; struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; int ret = 0; _enter(""); - pagevec_init(&lru_pvec, 0); - list_for_each_entry_safe(netpage, _n, list, lru) { list_del(&netpage->lru); @@ -534,9 +526,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, backpage = newpage; newpage = NULL; - page_cache_get(backpage); - if (!pagevec_add(&lru_pvec, backpage)) - __pagevec_lru_add_file(&lru_pvec); + lru_cache_add_file(backpage); reread_backing_page: ret = bmapping->a_ops->readpage(NULL, backpage); @@ -559,9 +549,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, goto nomem; } - page_cache_get(netpage); - if (!pagevec_add(&lru_pvec, netpage)) - __pagevec_lru_add_file(&lru_pvec); + lru_cache_add_file(netpage); /* install a monitor */ page_cache_get(netpage); @@ -643,9 +631,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, fscache_mark_page_cached(op, netpage); - page_cache_get(netpage); - if (!pagevec_add(&lru_pvec, netpage)) - __pagevec_lru_add_file(&lru_pvec); + lru_cache_add_file(netpage); /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); @@ -661,8 +647,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, out: /* tidy up */ - pagevec_lru_add_file(&lru_pvec); - if (newpage) page_cache_release(newpage); if (netpage) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 656e16907430..16c989d3e23c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -866,16 +866,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) break; } - if (offset < 0 || offset > inode->i_sb->s_maxbytes) { - offset = -EINVAL; - goto out; - } - - /* Special lock needed here? */ - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out: mutex_unlock(&inode->i_mutex); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ebbf680378e2..690f73f42425 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) /** * Encode the flock and fcntl locks for the given inode into the ceph_filelock - * array. Must be called with lock_flocks() already held. + * array. Must be called with inode->i_lock already held. * If we encounter more of a specific lock type than expected, return -ENOSPC. */ int ceph_encode_locks_to_buffer(struct inode *inode, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4d2920304be8..74fd2898b2ab 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2481,20 +2481,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_filelock *flocks; encode_again: - lock_flocks(); + spin_lock(&inode->i_lock); ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - unlock_flocks(); + spin_unlock(&inode->i_lock); flocks = kmalloc((num_fcntl_locks+num_flock_locks) * sizeof(struct ceph_filelock), GFP_NOFS); if (!flocks) { err = -ENOMEM; goto out_free; } - lock_flocks(); + spin_lock(&inode->i_lock); err = ceph_encode_locks_to_buffer(inode, flocks, num_fcntl_locks, num_flock_locks); - unlock_flocks(); + spin_unlock(&inode->i_lock); if (err) { kfree(flocks); if (err == -ENOSPC) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 2906ee276408..603f18a65c12 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -10,6 +10,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_DES select CRYPTO_SHA256 + select CRYPTO_CMAC help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index d59748346020..f3ac4154cbb6 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) tcon->nativeFileSystem); } seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" - "\nPathComponentMax: %d Status: 0x%d", + "\n\tPathComponentMax: %d Status: 0x%d", le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), le32_to_cpu(tcon->fsAttrInfo.Attributes), le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), @@ -224,6 +224,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_puts(m, " type: CDROM "); else seq_printf(m, " type: %d ", dev_type); + if (server->ops->dump_share_caps) + server->ops->dump_share_caps(m, tcon); if (tcon->need_reconnect) seq_puts(m, "\tDISCONNECTED "); @@ -595,9 +597,36 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) return single_open(file, cifs_security_flags_proc_show, NULL); } +/* + * Ensure that if someone sets a MUST flag, that we disable all other MAY + * flags except for the ones corresponding to the given MUST flag. If there are + * multiple MUST flags, then try to prefer more secure ones. + */ +static void +cifs_security_flags_handle_must_flags(unsigned int *flags) +{ + unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; + + if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) + *flags = CIFSSEC_MUST_KRB5; + else if ((*flags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) + *flags = CIFSSEC_MUST_NTLMSSP; + else if ((*flags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) + *flags = CIFSSEC_MUST_NTLMV2; + else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM) + *flags = CIFSSEC_MUST_NTLM; + else if ((*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN) + *flags = CIFSSEC_MUST_LANMAN; + else if ((*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT) + *flags = CIFSSEC_MUST_PLNTXT; + + *flags |= signflags; +} + static ssize_t cifs_security_flags_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { + int rc; unsigned int flags; char flags_string[12]; char c; @@ -620,26 +649,35 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, global_secflags = CIFSSEC_MAX; return count; } else if (!isdigit(c)) { - cifs_dbg(VFS, "invalid flag %c\n", c); + cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", + flags_string); return -EINVAL; } } - /* else we have a number */ - flags = simple_strtoul(flags_string, NULL, 0); + /* else we have a number */ + rc = kstrtouint(flags_string, 0, &flags); + if (rc) { + cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", + flags_string); + return rc; + } cifs_dbg(FYI, "sec flags 0x%x\n", flags); - if (flags <= 0) { - cifs_dbg(VFS, "invalid security flags %s\n", flags_string); + if (flags == 0) { + cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", flags_string); return -EINVAL; } if (flags & ~CIFSSEC_MASK) { - cifs_dbg(VFS, "attempt to set unsupported security flags 0x%x\n", + cifs_dbg(VFS, "Unsupported security flags: 0x%x\n", flags & ~CIFSSEC_MASK); return -EINVAL; } + + cifs_security_flags_handle_must_flags(&flags); + /* flags look ok - update the global security flags for cifs module */ global_secflags = flags; if (global_secflags & CIFSSEC_MUST_SIGN) { diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 4fb097468e21..fe8d6276410a 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -327,14 +327,14 @@ UniToupper(register wchar_t uc) /* * UniStrupr: Upper case a unicode string */ -static inline wchar_t * -UniStrupr(register wchar_t *upin) +static inline __le16 * +UniStrupr(register __le16 *upin) { - register wchar_t *up; + register __le16 *up; up = upin; while (*up) { /* For all characters */ - *up = UniToupper(*up); + *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); up++; } return upin; /* Return input pointer */ diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 71436d1fca13..3d8bf941d126 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -276,7 +276,6 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) { - memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); memcpy(lnm_session_key, password_with_pad, CIFS_ENCPWD_SIZE); return 0; @@ -414,7 +413,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; - wchar_t *user; + __le16 *user; wchar_t *domain; wchar_t *server; @@ -439,7 +438,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return rc; } - /* convert ses->user_name to unicode and uppercase */ + /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { @@ -448,7 +447,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, } if (len) { - len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); + len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { memset(user, '\0', 2); @@ -536,7 +535,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) return rc; } - if (ses->server->secType == RawNTLMSSP) + if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) memcpy(ses->auth_key.response + offset, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else @@ -568,7 +567,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) char ntlmv2_hash[16]; unsigned char *tiblob = NULL; /* target info blob */ - if (ses->server->secType == RawNTLMSSP) { + if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { if (!ses->domainName) { rc = find_domain_name(ses, nls_cp); if (rc) { @@ -706,6 +705,9 @@ calc_seckey(struct cifs_ses *ses) void cifs_crypto_shash_release(struct TCP_Server_Info *server) { + if (server->secmech.cmacaes) + crypto_free_shash(server->secmech.cmacaes); + if (server->secmech.hmacsha256) crypto_free_shash(server->secmech.hmacsha256); @@ -715,6 +717,8 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server) if (server->secmech.hmacmd5) crypto_free_shash(server->secmech.hmacmd5); + kfree(server->secmech.sdesccmacaes); + kfree(server->secmech.sdeschmacsha256); kfree(server->secmech.sdeschmacmd5); @@ -748,6 +752,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) goto crypto_allocate_hmacsha256_fail; } + server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0); + if (IS_ERR(server->secmech.cmacaes)) { + cifs_dbg(VFS, "could not allocate crypto cmac-aes"); + rc = PTR_ERR(server->secmech.cmacaes); + goto crypto_allocate_cmacaes_fail; + } + size = sizeof(struct shash_desc) + crypto_shash_descsize(server->secmech.hmacmd5); server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); @@ -778,8 +789,22 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; server->secmech.sdeschmacsha256->shash.flags = 0x0; + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.cmacaes); + server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdesccmacaes) { + cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__); + rc = -ENOMEM; + goto crypto_allocate_cmacaes_sdesc_fail; + } + server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes; + server->secmech.sdesccmacaes->shash.flags = 0x0; + return 0; +crypto_allocate_cmacaes_sdesc_fail: + kfree(server->secmech.sdeschmacsha256); + crypto_allocate_hmacsha256_sdesc_fail: kfree(server->secmech.sdescmd5); @@ -787,6 +812,9 @@ crypto_allocate_md5_sdesc_fail: kfree(server->secmech.sdeschmacmd5); crypto_allocate_hmacmd5_sdesc_fail: + crypto_free_shash(server->secmech.cmacaes); + +crypto_allocate_cmacaes_fail: crypto_free_shash(server->secmech.hmacsha256); crypto_allocate_hmacsha256_fail: diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 540c1ccfcdb2..4bdd547dbf6f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -312,11 +312,14 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) } static void -cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) +cifs_show_security(struct seq_file *s, struct cifs_ses *ses) { + if (ses->sectype == Unspecified) + return; + seq_printf(s, ",sec="); - switch (server->secType) { + switch (ses->sectype) { case LANMAN: seq_printf(s, "lanman"); break; @@ -338,7 +341,7 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) break; } - if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->sign) seq_printf(s, "i"); } @@ -369,7 +372,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); - cifs_show_security(s, tcon->ses->server); + cifs_show_security(s, tcon->ses); cifs_show_cache_flavor(s, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) @@ -765,7 +768,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) { - /* note that this is called by vfs setlease with lock_flocks held + /* note that this is called by vfs setlease with i_lock held to protect *lease from going away */ struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d05b3028e3b9..ea723a5e8226 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.0" +#define CIFS_VERSION "2.01" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4f07f6fbe494..e66b08882548 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -101,20 +101,14 @@ enum statusEnum { }; enum securityEnum { - LANMAN = 0, /* Legacy LANMAN auth */ + Unspecified = 0, /* not specified */ + LANMAN, /* Legacy LANMAN auth */ NTLM, /* Legacy NTLM012 auth with NTLM hash */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ -/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ Kerberos, /* Kerberos via SPNEGO */ }; -enum protocolEnum { - TCP = 0, - SCTP - /* Netbios frames protocol not supported at this time */ -}; - struct session_key { unsigned int len; char *response; @@ -131,9 +125,11 @@ struct cifs_secmech { struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ struct crypto_shash *md5; /* md5 hash function */ struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ + struct crypto_shash *cmacaes; /* block-cipher based MAC function */ struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ + struct sdesc *sdesccmacaes; /* ctxt to generate smb3 signature */ }; /* per smb session structure/fields */ @@ -181,6 +177,7 @@ enum smb_version { Smb_20, Smb_21, Smb_30, + Smb_302, }; struct mid_q_entry; @@ -228,6 +225,7 @@ struct smb_version_operations { void (*dump_detail)(void *); void (*clear_stats)(struct cifs_tcon *); void (*print_stats)(struct seq_file *m, struct cifs_tcon *); + void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); /* verify the message */ int (*check_message)(char *, unsigned int); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); @@ -367,6 +365,8 @@ struct smb_version_operations { void (*set_lease_key)(struct inode *, struct cifs_fid *fid); /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *fid); + /* The next two functions will need to be changed to per smb session */ + void (*generate_signingkey)(struct TCP_Server_Info *server); int (*calc_signature)(struct smb_rqst *rqst, struct TCP_Server_Info *server); }; @@ -387,6 +387,8 @@ struct smb_version_values { unsigned int cap_nt_find; unsigned int cap_large_files; unsigned int oplock_read; + __u16 signing_enabled; + __u16 signing_required; }; #define HEADER_SIZE(server) (server->vals->header_size) @@ -407,7 +409,8 @@ struct smb_vol { kgid_t backupgid; umode_t file_mode; umode_t dir_mode; - unsigned secFlg; + enum securityEnum sectype; /* sectype requested via mnt opts */ + bool sign; /* was signing requested via mnt opts? */ bool retry:1; bool intr:1; bool setuids:1; @@ -441,6 +444,7 @@ struct smb_vol { bool mfsymlinks:1; /* use Minshall+French Symlinks */ bool multiuser:1; bool rwpidforward:1; /* pid forward for read/write operations */ + bool nosharesock; unsigned int rsize; unsigned int wsize; bool sockopt_tcp_nodelay:1; @@ -514,6 +518,7 @@ struct TCP_Server_Info { struct task_struct *tsk; char server_GUID[16]; __u16 sec_mode; + bool sign; /* is signing enabled on this connection? */ bool session_estab; /* mark when very first sess is established */ #ifdef CONFIG_CIFS_SMB2 int echo_credits; /* echo reserved slots */ @@ -521,7 +526,6 @@ struct TCP_Server_Info { bool echoes:1; /* enable echoes */ #endif u16 dialect; /* dialect index that server chose */ - enum securityEnum secType; bool oplocks:1; /* enable oplocks */ unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ @@ -540,12 +544,17 @@ struct TCP_Server_Info { int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ + char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* for signing, protected by srv_mutex */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ +#define CIFS_NEGFLAVOR_LANMAN 0 /* wct == 13, LANMAN */ +#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ +#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ + char negflavor; /* NEGOTIATE response flavor */ /* extended security flavors that server supports */ bool sec_ntlmssp; /* supports NTLMSSP */ bool sec_kerberosu2u; /* supports U2U Kerberos */ @@ -697,7 +706,6 @@ struct cifs_ses { enum statusEnum status; unsigned overrideSecFlg; /* if non-zero override global sec flags */ __u16 ipc_tid; /* special tid for connection to IPC share */ - __u16 flags; __u16 vcnum; char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ @@ -714,21 +722,14 @@ struct cifs_ses { char *password; struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ + enum securityEnum sectype; /* what security flavor was specified? */ + bool sign; /* is signing required? */ bool need_reconnect:1; /* connection reset, uid now invalid */ #ifdef CONFIG_CIFS_SMB2 __u16 session_flags; #endif /* CONFIG_CIFS_SMB2 */ }; -/* no more than one of the following three session flags may be set */ -#define CIFS_SES_NT4 1 -#define CIFS_SES_OS2 2 -#define CIFS_SES_W9X 4 -/* following flag is set for old servers such as OS2 (and Win95?) - which do not negotiate NTLM or POSIX dialects, but instead - negotiate one of the older LANMAN dialects */ -#define CIFS_SES_LANMAN 8 - static inline bool cap_unix(struct cifs_ses *ses) { @@ -816,7 +817,7 @@ struct cifs_tcon { #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ - __u32 capabilities; + __le32 capabilities; __u32 share_flags; __u32 maximal_access; __u32 vol_serial_number; @@ -1348,7 +1349,7 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ -#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMSSP) +#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* @@ -1494,4 +1495,7 @@ extern struct smb_version_values smb21_values; #define SMB30_VERSION_STRING "3.0" extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; +#define SMB302_VERSION_STRING "3.02" +/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ +extern struct smb_version_values smb302_values; #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index e996ff6b26d1..11ca24a8e054 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -142,6 +142,11 @@ */ #define CIFS_SESS_KEY_SIZE (16) +/* + * Size of the smb3 signing key + */ +#define SMB3_SIGN_KEY_SIZE (16) + #define CIFS_CLIENT_CHALLENGE_SIZE (8) #define CIFS_SERVER_CHALLENGE_SIZE (8) #define CIFS_HMAC_MD5_HASH_SIZE (16) @@ -531,7 +536,7 @@ typedef struct lanman_neg_rsp { #define READ_RAW_ENABLE 1 #define WRITE_RAW_ENABLE 2 #define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE) - +#define SMB1_CLIENT_GUID_SIZE (16) typedef struct negotiate_rsp { struct smb_hdr hdr; /* wct = 17 */ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ @@ -553,7 +558,7 @@ typedef struct negotiate_rsp { /* followed by 16 bytes of server GUID */ /* then security blob if cap_extended_security negotiated */ struct { - unsigned char GUID[16]; + unsigned char GUID[SMB1_CLIENT_GUID_SIZE]; unsigned char SecurityBlob[1]; } __attribute__((packed)) extended_response; } __attribute__((packed)) u; @@ -1315,6 +1320,14 @@ typedef struct smb_com_ntransact_rsp { /* parms and data follow */ } __attribute__((packed)) NTRANSACT_RSP; +/* See MS-SMB 2.2.7.2.1.1 */ +struct srv_copychunk { + __le64 SourceOffset; + __le64 DestinationOffset; + __le32 CopyLength; + __u32 Reserved; +} __packed; + typedef struct smb_com_transaction_ioctl_req { struct smb_hdr hdr; /* wct = 23 */ __u8 MaxSetupCount; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index dda188a94332..c8ff018fae68 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -118,6 +118,8 @@ extern void header_assemble(struct smb_hdr *, char /* command */ , extern int small_smb_init_no_tc(const int smb_cmd, const int wct, struct cifs_ses *ses, void **request_buf); +extern enum securityEnum select_sectype(struct TCP_Server_Info *server, + enum securityEnum requested); extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); @@ -212,6 +214,7 @@ extern int cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses); extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, struct nls_table *nls_info); +extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required); extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, @@ -433,6 +436,7 @@ extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); extern void cifs_crypto_shash_release(struct TCP_Server_Info *); extern int calc_seckey(struct cifs_ses *); +extern void generate_smb3signingkey(struct TCP_Server_Info *); #ifdef CONFIG_CIFS_WEAK_PW_HASH extern int calc_lanman_hash(const char *password, const char *cryptkey, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a58dc77cc443..a89c4cb4e6cf 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -367,6 +367,185 @@ vt2_err: return -EINVAL; } +static int +decode_ext_sec_blob(struct cifs_ses *ses, NEGOTIATE_RSP *pSMBr) +{ + int rc = 0; + u16 count; + char *guid = pSMBr->u.extended_response.GUID; + struct TCP_Server_Info *server = ses->server; + + count = get_bcc(&pSMBr->hdr); + if (count < SMB1_CLIENT_GUID_SIZE) + return -EIO; + + spin_lock(&cifs_tcp_ses_lock); + if (server->srv_count > 1) { + spin_unlock(&cifs_tcp_ses_lock); + if (memcmp(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE) != 0) { + cifs_dbg(FYI, "server UID changed\n"); + memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE); + } + } else { + spin_unlock(&cifs_tcp_ses_lock); + memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE); + } + + if (count == SMB1_CLIENT_GUID_SIZE) { + server->sec_ntlmssp = true; + } else { + count -= SMB1_CLIENT_GUID_SIZE; + rc = decode_negTokenInit( + pSMBr->u.extended_response.SecurityBlob, count, server); + if (rc != 1) + return -EINVAL; + } + + return 0; +} + +int +cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required) +{ + bool srv_sign_required = server->sec_mode & server->vals->signing_required; + bool srv_sign_enabled = server->sec_mode & server->vals->signing_enabled; + bool mnt_sign_enabled = global_secflags & CIFSSEC_MAY_SIGN; + + /* + * Is signing required by mnt options? If not then check + * global_secflags to see if it is there. + */ + if (!mnt_sign_required) + mnt_sign_required = ((global_secflags & CIFSSEC_MUST_SIGN) == + CIFSSEC_MUST_SIGN); + + /* + * If signing is required then it's automatically enabled too, + * otherwise, check to see if the secflags allow it. + */ + mnt_sign_enabled = mnt_sign_required ? mnt_sign_required : + (global_secflags & CIFSSEC_MAY_SIGN); + + /* If server requires signing, does client allow it? */ + if (srv_sign_required) { + if (!mnt_sign_enabled) { + cifs_dbg(VFS, "Server requires signing, but it's disabled in SecurityFlags!"); + return -ENOTSUPP; + } + server->sign = true; + } + + /* If client requires signing, does server allow it? */ + if (mnt_sign_required) { + if (!srv_sign_enabled) { + cifs_dbg(VFS, "Server does not support signing!"); + return -ENOTSUPP; + } + server->sign = true; + } + + return 0; +} + +#ifdef CONFIG_CIFS_WEAK_PW_HASH +static int +decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) +{ + __s16 tmp; + struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; + + if (server->dialect != LANMAN_PROT && server->dialect != LANMAN2_PROT) + return -EOPNOTSUPP; + + server->sec_mode = le16_to_cpu(rsp->SecurityMode); + server->maxReq = min_t(unsigned int, + le16_to_cpu(rsp->MaxMpxCount), + cifs_max_pending); + set_credits(server, server->maxReq); + server->maxBuf = le16_to_cpu(rsp->MaxBufSize); + server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); + /* even though we do not use raw we might as well set this + accurately, in case we ever find a need for it */ + if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { + server->max_rw = 0xFF00; + server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; + } else { + server->max_rw = 0;/* do not need to use raw anyway */ + server->capabilities = CAP_MPX_MODE; + } + tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); + if (tmp == -1) { + /* OS/2 often does not set timezone therefore + * we must use server time to calc time zone. + * Could deviate slightly from the right zone. + * Smallest defined timezone difference is 15 minutes + * (i.e. Nepal). Rounding up/down is done to match + * this requirement. + */ + int val, seconds, remain, result; + struct timespec ts, utc; + utc = CURRENT_TIME; + ts = cnvrtDosUnixTm(rsp->SrvTime.Date, + rsp->SrvTime.Time, 0); + cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", + (int)ts.tv_sec, (int)utc.tv_sec, + (int)(utc.tv_sec - ts.tv_sec)); + val = (int)(utc.tv_sec - ts.tv_sec); + seconds = abs(val); + result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; + remain = seconds % MIN_TZ_ADJ; + if (remain >= (MIN_TZ_ADJ / 2)) + result += MIN_TZ_ADJ; + if (val < 0) + result = -result; + server->timeAdj = result; + } else { + server->timeAdj = (int)tmp; + server->timeAdj *= 60; /* also in seconds */ + } + cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); + + + /* BB get server time for time conversions and add + code to use it and timezone since this is not UTC */ + + if (rsp->EncryptionKeyLength == + cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { + memcpy(server->cryptkey, rsp->EncryptionKey, + CIFS_CRYPTO_KEY_SIZE); + } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { + return -EIO; /* need cryptkey unless plain text */ + } + + cifs_dbg(FYI, "LANMAN negotiated\n"); + return 0; +} +#else +static inline int +decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) +{ + cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); + return -EOPNOTSUPP; +} +#endif + +static bool +should_set_ext_sec_flag(enum securityEnum sectype) +{ + switch (sectype) { + case RawNTLMSSP: + case Kerberos: + return true; + case Unspecified: + if (global_secflags & + (CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)) + return true; + /* Fallthrough */ + default: + return false; + } +} + int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) { @@ -375,41 +554,24 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) int rc = 0; int bytes_returned; int i; - struct TCP_Server_Info *server; + struct TCP_Server_Info *server = ses->server; u16 count; - unsigned int secFlags; - if (ses->server) - server = ses->server; - else { - rc = -EIO; - return rc; + if (!server) { + WARN(1, "%s: server is NULL!\n", __func__); + return -EIO; } + rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ , (void **) &pSMB, (void **) &pSMBr); if (rc) return rc; - /* if any of auth flags (ie not sign or seal) are overriden use them */ - if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) - secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */ - else /* if override flags set only sign/seal OR them with global auth */ - secFlags = global_secflags | ses->overrideSecFlg; - - cifs_dbg(FYI, "secFlags 0x%x\n", secFlags); - pSMB->hdr.Mid = get_next_mid(server); pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); - if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) - pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; - else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { - cifs_dbg(FYI, "Kerberos only mechanism, enable extended security\n"); - pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; - } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) - pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; - else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { - cifs_dbg(FYI, "NTLMSSP only mechanism, enable extended security\n"); + if (should_set_ext_sec_flag(ses->sectype)) { + cifs_dbg(FYI, "Requesting extended security."); pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; } @@ -436,127 +598,21 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) could not negotiate a common dialect */ rc = -EOPNOTSUPP; goto neg_err_exit; -#ifdef CONFIG_CIFS_WEAK_PW_HASH - } else if ((pSMBr->hdr.WordCount == 13) - && ((server->dialect == LANMAN_PROT) - || (server->dialect == LANMAN2_PROT))) { - __s16 tmp; - struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; - - if ((secFlags & CIFSSEC_MAY_LANMAN) || - (secFlags & CIFSSEC_MAY_PLNTXT)) - server->secType = LANMAN; - else { - cifs_dbg(VFS, "mount failed weak security disabled in /proc/fs/cifs/SecurityFlags\n"); - rc = -EOPNOTSUPP; - goto neg_err_exit; - } - server->sec_mode = le16_to_cpu(rsp->SecurityMode); - server->maxReq = min_t(unsigned int, - le16_to_cpu(rsp->MaxMpxCount), - cifs_max_pending); - set_credits(server, server->maxReq); - server->maxBuf = le16_to_cpu(rsp->MaxBufSize); - server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); - /* even though we do not use raw we might as well set this - accurately, in case we ever find a need for it */ - if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { - server->max_rw = 0xFF00; - server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; - } else { - server->max_rw = 0;/* do not need to use raw anyway */ - server->capabilities = CAP_MPX_MODE; - } - tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); - if (tmp == -1) { - /* OS/2 often does not set timezone therefore - * we must use server time to calc time zone. - * Could deviate slightly from the right zone. - * Smallest defined timezone difference is 15 minutes - * (i.e. Nepal). Rounding up/down is done to match - * this requirement. - */ - int val, seconds, remain, result; - struct timespec ts, utc; - utc = CURRENT_TIME; - ts = cnvrtDosUnixTm(rsp->SrvTime.Date, - rsp->SrvTime.Time, 0); - cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", - (int)ts.tv_sec, (int)utc.tv_sec, - (int)(utc.tv_sec - ts.tv_sec)); - val = (int)(utc.tv_sec - ts.tv_sec); - seconds = abs(val); - result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; - remain = seconds % MIN_TZ_ADJ; - if (remain >= (MIN_TZ_ADJ / 2)) - result += MIN_TZ_ADJ; - if (val < 0) - result = -result; - server->timeAdj = result; - } else { - server->timeAdj = (int)tmp; - server->timeAdj *= 60; /* also in seconds */ - } - cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); - - - /* BB get server time for time conversions and add - code to use it and timezone since this is not UTC */ - - if (rsp->EncryptionKeyLength == - cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { - memcpy(ses->server->cryptkey, rsp->EncryptionKey, - CIFS_CRYPTO_KEY_SIZE); - } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { - rc = -EIO; /* need cryptkey unless plain text */ - goto neg_err_exit; - } - - cifs_dbg(FYI, "LANMAN negotiated\n"); - /* we will not end up setting signing flags - as no signing - was in LANMAN and server did not return the flags on */ - goto signing_check; -#else /* weak security disabled */ } else if (pSMBr->hdr.WordCount == 13) { - cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); - rc = -EOPNOTSUPP; -#endif /* WEAK_PW_HASH */ - goto neg_err_exit; + server->negflavor = CIFS_NEGFLAVOR_LANMAN; + rc = decode_lanman_negprot_rsp(server, pSMBr); + goto signing_check; } else if (pSMBr->hdr.WordCount != 17) { /* unknown wct */ rc = -EOPNOTSUPP; goto neg_err_exit; } - /* else wct == 17 NTLM */ + /* else wct == 17, NTLM or better */ + server->sec_mode = pSMBr->SecurityMode; if ((server->sec_mode & SECMODE_USER) == 0) cifs_dbg(FYI, "share mode security\n"); - if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0) -#ifdef CONFIG_CIFS_WEAK_PW_HASH - if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) -#endif /* CIFS_WEAK_PW_HASH */ - cifs_dbg(VFS, "Server requests plain text password but client support disabled\n"); - - if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) - server->secType = NTLMv2; - else if (secFlags & CIFSSEC_MAY_NTLM) - server->secType = NTLM; - else if (secFlags & CIFSSEC_MAY_NTLMV2) - server->secType = NTLMv2; - else if (secFlags & CIFSSEC_MAY_KRB5) - server->secType = Kerberos; - else if (secFlags & CIFSSEC_MAY_NTLMSSP) - server->secType = RawNTLMSSP; - else if (secFlags & CIFSSEC_MAY_LANMAN) - server->secType = LANMAN; - else { - rc = -EOPNOTSUPP; - cifs_dbg(VFS, "Invalid security type\n"); - goto neg_err_exit; - } - /* else ... any others ...? */ - /* one byte, so no need to convert this or EncryptionKeyLen from little endian */ server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), @@ -569,90 +625,26 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) server->capabilities = le32_to_cpu(pSMBr->Capabilities); server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); server->timeAdj *= 60; + if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { + server->negflavor = CIFS_NEGFLAVOR_UNENCAP; memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, CIFS_CRYPTO_KEY_SIZE); } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || server->capabilities & CAP_EXTENDED_SECURITY) && (pSMBr->EncryptionKeyLength == 0)) { - /* decode security blob */ - count = get_bcc(&pSMBr->hdr); - if (count < 16) { - rc = -EIO; - goto neg_err_exit; - } - spin_lock(&cifs_tcp_ses_lock); - if (server->srv_count > 1) { - spin_unlock(&cifs_tcp_ses_lock); - if (memcmp(server->server_GUID, - pSMBr->u.extended_response. - GUID, 16) != 0) { - cifs_dbg(FYI, "server UID changed\n"); - memcpy(server->server_GUID, - pSMBr->u.extended_response.GUID, - 16); - } - } else { - spin_unlock(&cifs_tcp_ses_lock); - memcpy(server->server_GUID, - pSMBr->u.extended_response.GUID, 16); - } - - if (count == 16) { - server->secType = RawNTLMSSP; - } else { - rc = decode_negTokenInit(pSMBr->u.extended_response. - SecurityBlob, count - 16, - server); - if (rc == 1) - rc = 0; - else - rc = -EINVAL; - if (server->secType == Kerberos) { - if (!server->sec_kerberos && - !server->sec_mskerberos) - rc = -EOPNOTSUPP; - } else if (server->secType == RawNTLMSSP) { - if (!server->sec_ntlmssp) - rc = -EOPNOTSUPP; - } else - rc = -EOPNOTSUPP; - } + server->negflavor = CIFS_NEGFLAVOR_EXTENDED; + rc = decode_ext_sec_blob(ses, pSMBr); } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { rc = -EIO; /* no crypt key only if plain text pwd */ - goto neg_err_exit; - } else - server->capabilities &= ~CAP_EXTENDED_SECURITY; - -#ifdef CONFIG_CIFS_WEAK_PW_HASH -signing_check: -#endif - if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { - /* MUST_SIGN already includes the MAY_SIGN FLAG - so if this is zero it means that signing is disabled */ - cifs_dbg(FYI, "Signing disabled\n"); - if (server->sec_mode & SECMODE_SIGN_REQUIRED) { - cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); - rc = -EOPNOTSUPP; - } - server->sec_mode &= - ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); - } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { - /* signing required */ - cifs_dbg(FYI, "Must sign - secFlags 0x%x\n", secFlags); - if ((server->sec_mode & - (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { - cifs_dbg(VFS, "signing required but server lacks support\n"); - rc = -EOPNOTSUPP; - } else - server->sec_mode |= SECMODE_SIGN_REQUIRED; } else { - /* signing optional ie CIFSSEC_MAY_SIGN */ - if ((server->sec_mode & SECMODE_SIGN_REQUIRED) == 0) - server->sec_mode &= - ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); + server->negflavor = CIFS_NEGFLAVOR_UNENCAP; + server->capabilities &= ~CAP_EXTENDED_SECURITY; } +signing_check: + if (!rc) + rc = cifs_enable_signing(server, ses->sign); neg_err_exit: cifs_buf_release(pSMB); @@ -777,9 +769,8 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) pSMB->hdr.Mid = get_next_mid(ses->server); - if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + if (ses->server->sign) + pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; pSMB->hdr.Uid = ses->Suid; @@ -1540,8 +1531,7 @@ cifs_readv_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: /* result already set, check signature */ - if (server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + if (server->sign) { int rc = 0; rc = cifs_verify_signature(&rqst, server, @@ -3940,6 +3930,7 @@ QFileInfoRetry: pSMB->Pad = 0; pSMB->Fid = netfid; inc_rfc1001_len(pSMB, byte_count); + pSMB->t2.ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); @@ -4108,6 +4099,7 @@ UnixQFileInfoRetry: pSMB->Pad = 0; pSMB->Fid = netfid; inc_rfc1001_len(pSMB, byte_count); + pSMB->t2.ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); @@ -4794,11 +4786,8 @@ getDFSRetry: strncpy(pSMB->RequestFileName, search_name, name_len); } - if (ses->server) { - if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - } + if (ses->server && ses->server->sign) + pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; pSMB->hdr.Uid = ses->Suid; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e3bc39bb9d12..afcb8a1a33b7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -85,7 +85,7 @@ enum { Opt_acl, Opt_noacl, Opt_locallease, Opt_sign, Opt_seal, Opt_noac, Opt_fsc, Opt_mfsymlinks, - Opt_multiuser, Opt_sloppy, + Opt_multiuser, Opt_sloppy, Opt_nosharesock, /* Mount options which take numeric value */ Opt_backupuid, Opt_backupgid, Opt_uid, @@ -165,6 +165,7 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_mfsymlinks, "mfsymlinks" }, { Opt_multiuser, "multiuser" }, { Opt_sloppy, "sloppy" }, + { Opt_nosharesock, "nosharesock" }, { Opt_backupuid, "backupuid=%s" }, { Opt_backupgid, "backupgid=%s" }, @@ -275,6 +276,7 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_20, SMB20_VERSION_STRING}, { Smb_21, SMB21_VERSION_STRING }, { Smb_30, SMB30_VERSION_STRING }, + { Smb_302, SMB302_VERSION_STRING }, }; static int ip_connect(struct TCP_Server_Info *server); @@ -1024,44 +1026,48 @@ static int cifs_parse_security_flavors(char *value, substring_t args[MAX_OPT_ARGS]; + /* + * With mount options, the last one should win. Reset any existing + * settings back to default. + */ + vol->sectype = Unspecified; + vol->sign = false; + switch (match_token(value, cifs_secflavor_tokens, args)) { - case Opt_sec_krb5: - vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_SIGN; - break; - case Opt_sec_krb5i: - vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MUST_SIGN; - break; case Opt_sec_krb5p: - /* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */ - cifs_dbg(VFS, "Krb5 cifs privacy not supported\n"); - break; - case Opt_sec_ntlmssp: - vol->secFlg |= CIFSSEC_MAY_NTLMSSP; + cifs_dbg(VFS, "sec=krb5p is not supported!\n"); + return 1; + case Opt_sec_krb5i: + vol->sign = true; + /* Fallthrough */ + case Opt_sec_krb5: + vol->sectype = Kerberos; break; case Opt_sec_ntlmsspi: - vol->secFlg |= CIFSSEC_MAY_NTLMSSP | CIFSSEC_MUST_SIGN; - break; - case Opt_ntlm: - /* ntlm is default so can be turned off too */ - vol->secFlg |= CIFSSEC_MAY_NTLM; + vol->sign = true; + /* Fallthrough */ + case Opt_sec_ntlmssp: + vol->sectype = RawNTLMSSP; break; case Opt_sec_ntlmi: - vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN; - break; - case Opt_sec_ntlmv2: - vol->secFlg |= CIFSSEC_MAY_NTLMV2; + vol->sign = true; + /* Fallthrough */ + case Opt_ntlm: + vol->sectype = NTLM; break; case Opt_sec_ntlmv2i: - vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN; + vol->sign = true; + /* Fallthrough */ + case Opt_sec_ntlmv2: + vol->sectype = NTLMv2; break; #ifdef CONFIG_CIFS_WEAK_PW_HASH case Opt_sec_lanman: - vol->secFlg |= CIFSSEC_MAY_LANMAN; + vol->sectype = LANMAN; break; #endif case Opt_sec_none: vol->nullauth = 1; - vol->secFlg |= CIFSSEC_MAY_NTLM; break; default: cifs_dbg(VFS, "bad security option: %s\n", value); @@ -1119,6 +1125,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->ops = &smb30_operations; vol->vals = &smb30_values; break; + case Smb_302: + vol->ops = &smb30_operations; /* currently identical with 3.0 */ + vol->vals = &smb302_values; + break; #endif default: cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); @@ -1424,7 +1434,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->local_lease = 1; break; case Opt_sign: - vol->secFlg |= CIFSSEC_MUST_SIGN; + vol->sign = true; break; case Opt_seal: /* we do not do the following in secFlags because seal @@ -1455,6 +1465,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_sloppy: sloppy = true; break; + case Opt_nosharesock: + vol->nosharesock = true; + break; /* Numeric Values */ case Opt_backupuid: @@ -1978,47 +1991,21 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr, static bool match_security(struct TCP_Server_Info *server, struct smb_vol *vol) { - unsigned int secFlags; - - if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) - secFlags = vol->secFlg; - else - secFlags = global_secflags | vol->secFlg; - - switch (server->secType) { - case LANMAN: - if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT))) - return false; - break; - case NTLMv2: - if (!(secFlags & CIFSSEC_MAY_NTLMV2)) - return false; - break; - case NTLM: - if (!(secFlags & CIFSSEC_MAY_NTLM)) - return false; - break; - case Kerberos: - if (!(secFlags & CIFSSEC_MAY_KRB5)) - return false; - break; - case RawNTLMSSP: - if (!(secFlags & CIFSSEC_MAY_NTLMSSP)) - return false; - break; - default: - /* shouldn't happen */ + /* + * The select_sectype function should either return the vol->sectype + * that was specified, or "Unspecified" if that sectype was not + * compatible with the given NEGOTIATE request. + */ + if (select_sectype(server, vol->sectype) == Unspecified) return false; - } - /* now check if signing mode is acceptable */ - if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && - (server->sec_mode & SECMODE_SIGN_REQUIRED)) - return false; - else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) && - (server->sec_mode & - (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0) - return false; + /* + * Now check if signing mode is acceptable. No need to check + * global_secflags at this point since if MUST_SIGN is set then + * the server->sign had better be too. + */ + if (vol->sign && !server->sign) + return false; return true; } @@ -2027,6 +2014,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) { struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; + if (vol->nosharesock) + return 0; + if ((server->vals != vol->vals) || (server->ops != vol->ops)) return 0; @@ -2216,7 +2206,11 @@ out_err: static int match_session(struct cifs_ses *ses, struct smb_vol *vol) { - switch (ses->server->secType) { + if (vol->sectype != Unspecified && + vol->sectype != ses->sectype) + return 0; + + switch (ses->sectype) { case Kerberos: if (!uid_eq(vol->cred_uid, ses->cred_uid)) return 0; @@ -2493,7 +2487,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) ses->cred_uid = volume_info->cred_uid; ses->linux_uid = volume_info->linux_uid; - ses->overrideSecFlg = volume_info->secFlg; + ses->sectype = volume_info->sectype; + ses->sign = volume_info->sign; mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); @@ -3656,7 +3651,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, NTLMv2 password here) */ #ifdef CONFIG_CIFS_WEAK_PW_HASH if ((global_secflags & CIFSSEC_MAY_LANMAN) && - (ses->server->secType == LANMAN)) + (ses->sectype == LANMAN)) calc_lanman_hash(tcon->password, ses->server->cryptkey, ses->server->sec_mode & SECMODE_PW_ENCRYPT ? true : false, @@ -3674,8 +3669,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, } } - if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->server->sign) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (ses->capabilities & CAP_STATUS32) { @@ -3738,7 +3732,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, } bcc_ptr += length + 1; bytes_left -= (length + 1); - strncpy(tcon->treeName, tree, MAX_TREE_SIZE); + strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); /* mostly informational -- no need to fail on error here */ kfree(tcon->nativeFileSystem); @@ -3827,7 +3821,6 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, int rc = -ENOSYS; struct TCP_Server_Info *server = ses->server; - ses->flags = 0; ses->capabilities = server->capabilities; if (linuxExtEnabled == 0) ses->capabilities &= (~server->vals->cap_unix); @@ -3848,6 +3841,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, server->sequence_number = 0x2; server->session_estab = true; ses->auth_key.response = NULL; + if (server->ops->generate_signingkey) + server->ops->generate_signingkey(server); } mutex_unlock(&server->srv_mutex); @@ -3870,23 +3865,11 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, static int cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) { - switch (ses->server->secType) { - case Kerberos: - vol->secFlg = CIFSSEC_MUST_KRB5; + vol->sectype = ses->sectype; + + /* krb5 is special, since we don't need username or pw */ + if (vol->sectype == Kerberos) return 0; - case NTLMv2: - vol->secFlg = CIFSSEC_MUST_NTLMV2; - break; - case NTLM: - vol->secFlg = CIFSSEC_MUST_NTLM; - break; - case RawNTLMSSP: - vol->secFlg = CIFSSEC_MUST_NTLMSSP; - break; - case LANMAN: - vol->secFlg = CIFSSEC_MUST_LANMAN; - break; - } return cifs_set_cifscreds(vol, ses); } @@ -3912,6 +3895,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) vol_info->nocase = master_tcon->nocase; vol_info->local_lease = master_tcon->local_lease; vol_info->no_linux_ext = !master_tcon->unix_ext; + vol_info->sectype = master_tcon->ses->sectype; + vol_info->sign = master_tcon->ses->sign; rc = cifs_set_vol_auth(vol_info, master_tcon->ses); if (rc) { diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5699b5036ed8..5175aebf6737 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -822,8 +822,7 @@ const struct dentry_operations cifs_dentry_ops = { /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; -static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, - struct qstr *q) +static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q) { struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; @@ -838,12 +837,10 @@ static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, return 0; } -static int cifs_ci_compare(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int cifs_ci_compare(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls; if ((name->len == len) && (nls_strnicmp(codepage, name->name, str, len) == 0)) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4d8ba8d491e5..91d8629e69a2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -999,7 +999,7 @@ try_again: rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); if (!rc) goto try_again; - locks_delete_block(flock); + posix_unblock_lock(flock); } return rc; } @@ -1092,6 +1092,7 @@ struct lock_to_push { static int cifs_push_posix_locks(struct cifsFileInfo *cfile) { + struct inode *inode = cfile->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct file_lock *flock, **before; unsigned int count = 0, i = 0; @@ -1102,12 +1103,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) xid = get_xid(); - lock_flocks(); - cifs_for_each_lock(cfile->dentry->d_inode, before) { + spin_lock(&inode->i_lock); + cifs_for_each_lock(inode, before) { if ((*before)->fl_flags & FL_POSIX) count++; } - unlock_flocks(); + spin_unlock(&inode->i_lock); INIT_LIST_HEAD(&locks_to_send); @@ -1126,8 +1127,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) } el = locks_to_send.next; - lock_flocks(); - cifs_for_each_lock(cfile->dentry->d_inode, before) { + spin_lock(&inode->i_lock); + cifs_for_each_lock(inode, before) { flock = *before; if ((flock->fl_flags & FL_POSIX) == 0) continue; @@ -1152,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->offset = flock->fl_start; el = el->next; } - unlock_flocks(); + spin_unlock(&inode->i_lock); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { int stored_rc; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1bec014779fd..f7d4b2285efe 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -267,8 +267,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , if (treeCon->nocase) buffer->Flags |= SMBFLG_CASELESS; if ((treeCon->ses) && (treeCon->ses->server)) - if (treeCon->ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (treeCon->ses->server->sign) buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f1213799de1a..ab8778469394 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -126,6 +126,22 @@ out: dput(dentry); } +/* + * Is it possible that this directory might turn out to be a DFS referral + * once we go to try and use it? + */ +static bool +cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb) +{ +#ifdef CONFIG_CIFS_DFS_UPCALL + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + + if (tcon->Flags & SMB_SHARE_IS_IN_DFS) + return true; +#endif + return false; +} + static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { @@ -135,6 +151,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; + /* + * Windows CIFS servers generally make DFS referrals look + * like directories in FIND_* responses with the reparse + * attribute flag also set (since DFS junctions are + * reparse points). We must revalidate at least these + * directory inodes before trying to use them (if + * they are DFS we will get PATH_NOT_COVERED back + * when queried directly and can then try to connect + * to the DFS target) + */ + if (cifs_dfs_is_possible(cifs_sb) && + (fattr->cf_cifsattrs & ATTR_REPARSE)) + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f230571a7ab3..79358e341fd2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -138,8 +138,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; - if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->server->sign) pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (ses->capabilities & CAP_UNICODE) { @@ -310,11 +309,10 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, return; } -static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, - struct cifs_ses *ses, - const struct nls_table *nls_cp) +static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, + struct cifs_ses *ses, + const struct nls_table *nls_cp) { - int rc = 0; int len; char *bcc_ptr = *pbcc_area; @@ -322,24 +320,22 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, len = strnlen(bcc_ptr, bleft); if (len >= bleft) - return rc; + return; kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, GFP_KERNEL); if (ses->serverOS) strncpy(ses->serverOS, bcc_ptr, len); - if (strncmp(ses->serverOS, "OS/2", 4) == 0) { + if (strncmp(ses->serverOS, "OS/2", 4) == 0) cifs_dbg(FYI, "OS/2 server\n"); - ses->flags |= CIFS_SES_OS2; - } bcc_ptr += len + 1; bleft -= len + 1; len = strnlen(bcc_ptr, bleft); if (len >= bleft) - return rc; + return; kfree(ses->serverNOS); @@ -352,7 +348,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, len = strnlen(bcc_ptr, bleft); if (len > bleft) - return rc; + return; /* No domain field in LANMAN case. Domain is returned by old servers in the SMB negprot response */ @@ -360,8 +356,6 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, but thus do return domain here we could add parsing for it later, but it is not very important */ cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); - - return rc; } int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, @@ -432,8 +426,7 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; - if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + if (ses->server->sign) { flags |= NTLMSSP_NEGOTIATE_SIGN; if (!ses->server->session_estab) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; @@ -471,8 +464,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; - if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + if (ses->server->sign) { flags |= NTLMSSP_NEGOTIATE_SIGN; if (!ses->server->session_estab) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; @@ -558,6 +550,56 @@ setup_ntlmv2_ret: return rc; } +enum securityEnum +select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) +{ + switch (server->negflavor) { + case CIFS_NEGFLAVOR_EXTENDED: + switch (requested) { + case Kerberos: + case RawNTLMSSP: + return requested; + case Unspecified: + if (server->sec_ntlmssp && + (global_secflags & CIFSSEC_MAY_NTLMSSP)) + return RawNTLMSSP; + if ((server->sec_kerberos || server->sec_mskerberos) && + (global_secflags & CIFSSEC_MAY_KRB5)) + return Kerberos; + /* Fallthrough */ + default: + return Unspecified; + } + case CIFS_NEGFLAVOR_UNENCAP: + switch (requested) { + case NTLM: + case NTLMv2: + return requested; + case Unspecified: + if (global_secflags & CIFSSEC_MAY_NTLMV2) + return NTLMv2; + if (global_secflags & CIFSSEC_MAY_NTLM) + return NTLM; + /* Fallthrough */ + default: + return Unspecified; + } + case CIFS_NEGFLAVOR_LANMAN: + switch (requested) { + case LANMAN: + return requested; + case Unspecified: + if (global_secflags & CIFSSEC_MAY_LANMAN) + return LANMAN; + /* Fallthrough */ + default: + return Unspecified; + } + default: + return Unspecified; + } +} + int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -579,11 +621,18 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, u16 blob_len; char *ntlmsspblob = NULL; - if (ses == NULL) + if (ses == NULL) { + WARN(1, "%s: ses == NULL!", __func__); return -EINVAL; + } - type = ses->server->secType; + type = select_sectype(ses->server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); + if (type == Unspecified) { + cifs_dbg(VFS, "Unable to select appropriate authentication method!"); + return -EINVAL; + } + if (type == RawNTLMSSP) { /* if memory allocation is successful, caller of this function * frees it. @@ -643,8 +692,6 @@ ssetup_ntlmssp_authenticate: } bcc_ptr = str_area; - ses->flags &= ~CIFS_SES_LANMAN; - iov[1].iov_base = NULL; iov[1].iov_len = 0; @@ -668,7 +715,6 @@ ssetup_ntlmssp_authenticate: ses->server->sec_mode & SECMODE_PW_ENCRYPT ? true : false, lnm_session_key); - ses->flags |= CIFS_SES_LANMAN; memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); bcc_ptr += CIFS_AUTH_RESP_SIZE; @@ -938,8 +984,7 @@ ssetup_ntlmssp_authenticate: } decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); } else { - rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, - ses, nls_cp); + decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); } ssetup_exit: diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3efdb9d5c0b8..e813f04511d8 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -449,8 +449,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) * WRITEX header, not including the 4 byte RFC1001 length. */ if (!(server->capabilities & CAP_LARGE_WRITE_X) || - (!(server->capabilities & CAP_UNIX) && - (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) + (!(server->capabilities & CAP_UNIX) && server->sign)) wsize = min_t(unsigned int, wsize, server->maxBuf - sizeof(WRITE_REQ) + 4); @@ -765,20 +764,14 @@ smb_set_file_info(struct inode *inode, const char *full_path, } tcon = tlink_tcon(tlink); - /* - * NT4 apparently returns success on this call, but it doesn't really - * work. - */ - if (!(tcon->ses->flags & CIFS_SES_NT4)) { - rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, - cifs_sb->local_nls, + rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - cinode->cifsAttrs = le32_to_cpu(buf->Attributes); - goto out; - } else if (rc != -EOPNOTSUPP && rc != -EINVAL) - goto out; + if (rc == 0) { + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + goto out; + } else if (rc != -EOPNOTSUPP && rc != -EINVAL) { + goto out; } cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); @@ -964,4 +957,6 @@ struct smb_version_values smb1_values = { .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, .cap_large_files = CAP_LARGE_FILES, .oplock_read = OPLOCK_READ, + .signing_enabled = SECMODE_SIGN_ENABLED, + .signing_required = SECMODE_SIGN_REQUIRED, }; diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 7c0e2143e775..c38350851b08 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -54,5 +54,7 @@ #define SMB2_SIGNATURE_SIZE (16) #define SMB2_NTLMV2_SESSKEY_SIZE (16) #define SMB2_HMACSHA256_SIZE (32) +#define SMB2_CMACAES_SIZE (16) +#define SMB3_SIGNKEY_SIZE (16) #endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 10383d8c015b..b0c43345cd98 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -266,6 +266,10 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); break; case SMB2_IOCTL: + *off = le32_to_cpu( + ((struct smb2_ioctl_rsp *)hdr)->OutputOffset); + *len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount); + break; case SMB2_CHANGE_NOTIFY: default: /* BB FIXME for unimplemented cases above */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f2e76f3b0c61..6d15cab95b99 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -281,6 +281,25 @@ smb2_clear_stats(struct cifs_tcon *tcon) } static void +smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon) +{ + seq_puts(m, "\n\tShare Capabilities:"); + if (tcon->capabilities & SMB2_SHARE_CAP_DFS) + seq_puts(m, " DFS,"); + if (tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) + seq_puts(m, " CONTINUOUS AVAILABILITY,"); + if (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT) + seq_puts(m, " SCALEOUT,"); + if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) + seq_puts(m, " CLUSTER,"); + if (tcon->capabilities & SMB2_SHARE_CAP_ASYMMETRIC) + seq_puts(m, " ASYMMETRIC,"); + if (tcon->capabilities == 0) + seq_puts(m, " None"); + seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags); +} + +static void smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) { #ifdef CONFIG_CIFS_STATS @@ -292,7 +311,6 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) seq_printf(m, "\nSessionSetups: %d sent %d failed", atomic_read(&sent[SMB2_SESSION_SETUP_HE]), atomic_read(&failed[SMB2_SESSION_SETUP_HE])); -#define SMB2LOGOFF 0x0002 /* trivial request/resp */ seq_printf(m, "\nLogoffs: %d sent %d failed", atomic_read(&sent[SMB2_LOGOFF_HE]), atomic_read(&failed[SMB2_LOGOFF_HE])); @@ -645,6 +663,7 @@ struct smb_version_operations smb30_operations = { .dump_detail = smb2_dump_detail, .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, + .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -690,6 +709,7 @@ struct smb_version_operations smb30_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, + .generate_signingkey = generate_smb3signingkey, .calc_signature = smb3_calc_signature, }; @@ -709,6 +729,8 @@ struct smb_version_values smb20_values = { .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, .oplock_read = SMB2_OPLOCK_LEVEL_II, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, }; struct smb_version_values smb21_values = { @@ -727,6 +749,8 @@ struct smb_version_values smb21_values = { .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, .oplock_read = SMB2_OPLOCK_LEVEL_II, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, }; struct smb_version_values smb30_values = { @@ -745,4 +769,26 @@ struct smb_version_values smb30_values = { .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, .oplock_read = SMB2_OPLOCK_LEVEL_II, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, +}; + +struct smb_version_values smb302_values = { + .version_string = SMB302_VERSION_STRING, + .protocol_id = SMB302_PROT_ID, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, + .oplock_read = SMB2_OPLOCK_LEVEL_II, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, }; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2b95ce2b54e8..2b312e4eeaa6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1,7 +1,7 @@ /* * fs/cifs/smb2pdu.c * - * Copyright (C) International Business Machines Corp., 2009, 2012 + * Copyright (C) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 * Author(s): Steve French (sfrench@us.ibm.com) * Pavel Shilovsky (pshilovsky@samba.org) 2012 @@ -108,19 +108,33 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , if (!tcon) goto out; + /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */ + /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ + /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ + if ((tcon->ses) && + (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) + hdr->CreditCharge = cpu_to_le16(1); + /* else CreditCharge MBZ */ + hdr->TreeId = tcon->tid; /* Uid is not converted */ if (tcon->ses) hdr->SessionId = tcon->ses->Suid; - /* BB check following DFS flags BB */ - /* BB do we have to add check for SHI1005_FLAGS_DFS_ROOT too? */ - if (tcon->share_flags & SHI1005_FLAGS_DFS) - hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; - /* BB how does SMB2 do case sensitive? */ - /* if (tcon->nocase) - hdr->Flags |= SMBFLG_CASELESS; */ - if (tcon->ses && tcon->ses->server && - (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) + + /* + * If we would set SMB2_FLAGS_DFS_OPERATIONS on open we also would have + * to pass the path on the Open SMB prefixed by \\server\share. + * Not sure when we would need to do the augmented path (if ever) and + * setting this flag breaks the SMB2 open operation since it is + * illegal to send an empty path name (without \\server\share prefix) + * when the DFS flag is set in the SMB open header. We could + * consider setting the flag on all operations other than open + * but it is safer to net set it for now. + */ +/* if (tcon->share_flags & SHI1005_FLAGS_DFS) + hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */ + + if (tcon->ses && tcon->ses->server && tcon->ses->server->sign) hdr->Flags |= SMB2_FLAGS_SIGNED; out: pdu->StructureSize2 = cpu_to_le16(parmsize); @@ -328,34 +342,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) struct kvec iov[1]; int rc = 0; int resp_buftype; - struct TCP_Server_Info *server; - unsigned int sec_flags; - u16 temp = 0; + struct TCP_Server_Info *server = ses->server; int blob_offset, blob_length; char *security_blob; int flags = CIFS_NEG_OP; cifs_dbg(FYI, "Negotiate protocol\n"); - if (ses->server) - server = ses->server; - else { - rc = -EIO; - return rc; + if (!server) { + WARN(1, "%s: server is NULL!\n", __func__); + return -EIO; } rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); if (rc) return rc; - /* if any of auth flags (ie not sign or seal) are overriden use them */ - if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) - sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ - else /* if override flags set only sign/seal OR them with global auth */ - sec_flags = global_secflags | ses->overrideSecFlg; - - cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); - req->hdr.SessionId = 0; req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); @@ -364,12 +366,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) inc_rfc1001_len(req, 2); /* only one of SMB2 signing flags may be set in SMB2 request */ - if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) - temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; - else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ - temp = SMB2_NEGOTIATE_SIGNING_ENABLED; - - req->SecurityMode = cpu_to_le16(temp); + if (ses->sign) + req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); + else if (global_secflags & CIFSSEC_MAY_SIGN) + req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); + else + req->SecurityMode = 0; req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); @@ -399,6 +401,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); + else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID)) + cifs_dbg(FYI, "negotiated smb3.02 dialect\n"); else { cifs_dbg(VFS, "Illegal dialect returned by server %d\n", le16_to_cpu(rsp->DialectRevision)); @@ -407,6 +411,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } server->dialect = le16_to_cpu(rsp->DialectRevision); + /* SMB2 only has an extended negflavor */ + server->negflavor = CIFS_NEGFLAVOR_EXTENDED; server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); server->max_read = le32_to_cpu(rsp->MaxReadSize); server->max_write = le32_to_cpu(rsp->MaxWriteSize); @@ -418,44 +424,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, &rsp->hdr); - if (blob_length == 0) { - cifs_dbg(VFS, "missing security blob on negprot\n"); - rc = -EIO; - goto neg_exit; - } - - cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); - if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { - cifs_dbg(FYI, "Signing required\n"); - if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | - SMB2_NEGOTIATE_SIGNING_ENABLED))) { - cifs_dbg(VFS, "signing required but server lacks support\n"); - rc = -EOPNOTSUPP; - goto neg_exit; - } - server->sec_mode |= SECMODE_SIGN_REQUIRED; - } else if (sec_flags & CIFSSEC_MAY_SIGN) { - cifs_dbg(FYI, "Signing optional\n"); - if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { - cifs_dbg(FYI, "Server requires signing\n"); - server->sec_mode |= SECMODE_SIGN_REQUIRED; - } else { - server->sec_mode &= - ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); - } - } else { - cifs_dbg(FYI, "Signing disabled\n"); - if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { - cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); - rc = -EOPNOTSUPP; - goto neg_exit; - } - server->sec_mode &= - ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); - } + /* + * See MS-SMB2 section 2.2.4: if no blob, client picks default which + * for us will be + * ses->sectype = RawNTLMSSP; + * but for time being this is our only auth choice so doesn't matter. + * We just found a server which sets blob length to zero expecting raw. + */ + if (blob_length == 0) + cifs_dbg(FYI, "missing security blob on negprot\n"); + rc = cifs_enable_signing(server, ses->sign); #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ - rc = decode_neg_token_init(security_blob, blob_length, + if (rc) + goto neg_exit; + if (blob_length) + rc = decode_neg_token_init(security_blob, blob_length, &server->sec_type); if (rc == 1) rc = 0; @@ -480,9 +464,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, int rc = 0; int resp_buftype; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ - struct TCP_Server_Info *server; - unsigned int sec_flags; - u8 temp = 0; + struct TCP_Server_Info *server = ses->server; u16 blob_length = 0; char *security_blob; char *ntlmssp_blob = NULL; @@ -490,11 +472,9 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, cifs_dbg(FYI, "Session Setup\n"); - if (ses->server) - server = ses->server; - else { - rc = -EIO; - return rc; + if (!server) { + WARN(1, "%s: server is NULL!\n", __func__); + return -EIO; } /* @@ -505,7 +485,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, if (!ses->ntlmssp) return -ENOMEM; - ses->server->secType = RawNTLMSSP; + /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ + ses->sectype = RawNTLMSSP; ssetup_ntlmssp_authenticate: if (phase == NtLmChallenge) @@ -515,28 +496,19 @@ ssetup_ntlmssp_authenticate: if (rc) return rc; - /* if any of auth flags (ie not sign or seal) are overriden use them */ - if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) - sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ - else /* if override flags set only sign/seal OR them with global auth */ - sec_flags = global_secflags | ses->overrideSecFlg; - - cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); - req->hdr.SessionId = 0; /* First session, not a reauthenticate */ req->VcNumber = 0; /* MBZ */ /* to enable echos and oplocks */ req->hdr.CreditRequest = cpu_to_le16(3); /* only one of SMB2 signing flags may be set in SMB2 request */ - if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) - temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; - else if (ses->server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) - temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; - else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ - temp = SMB2_NEGOTIATE_SIGNING_ENABLED; - - req->SecurityMode = temp; + if (server->sign) + req->SecurityMode = SMB2_NEGOTIATE_SIGNING_REQUIRED; + else if (global_secflags & CIFSSEC_MAY_SIGN) /* one flag unlike MUST_ */ + req->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED; + else + req->SecurityMode = 0; + req->Capabilities = 0; req->Channel = 0; /* MBZ */ @@ -679,7 +651,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) /* since no tcon, smb2_init can not do this, so do here */ req->hdr.SessionId = ses->Suid; - if (server->sec_mode & SECMODE_SIGN_REQUIRED) + if (server->sign) req->hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); @@ -788,11 +760,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, } tcon->share_flags = le32_to_cpu(rsp->ShareFlags); + tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */ tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); tcon->tidStatus = CifsGood; tcon->need_reconnect = false; tcon->tid = rsp->hdr.TreeId; - strncpy(tcon->treeName, tree, MAX_TREE_SIZE); + strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) @@ -1036,6 +1009,122 @@ creat_exit: return rc; } +/* + * SMB2 IOCTL is used for both IOCTLs and FSCTLs + */ +int +SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, + u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, + u32 indatalen, char **out_data, u32 *plen /* returned data len */) +{ + struct smb2_ioctl_req *req; + struct smb2_ioctl_rsp *rsp; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[2]; + int resp_buftype; + int num_iovecs; + int rc = 0; + + cifs_dbg(FYI, "SMB2 IOCTL\n"); + + /* zero out returned data len, in case of error */ + if (plen) + *plen = 0; + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req); + if (rc) + return rc; + + req->CtlCode = cpu_to_le32(opcode); + req->PersistentFileId = persistent_fid; + req->VolatileFileId = volatile_fid; + + if (indatalen) { + req->InputCount = cpu_to_le32(indatalen); + /* do not set InputOffset if no input data */ + req->InputOffset = + cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4); + iov[1].iov_base = in_data; + iov[1].iov_len = indatalen; + num_iovecs = 2; + } else + num_iovecs = 1; + + req->OutputOffset = 0; + req->OutputCount = 0; /* MBZ */ + + /* + * Could increase MaxOutputResponse, but that would require more + * than one credit. Windows typically sets this smaller, but for some + * ioctls it may be useful to allow server to send more. No point + * limiting what the server can send as long as fits in one credit + */ + req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ + + if (is_fsctl) + req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); + else + req->Flags = 0; + + iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; + + if (indatalen) + inc_rfc1001_len(req, indatalen); + + rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); + rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; + + if (rc != 0) { + if (tcon) + cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); + goto ioctl_exit; + } + + /* check if caller wants to look at return data or just return rc */ + if ((plen == NULL) || (out_data == NULL)) + goto ioctl_exit; + + *plen = le32_to_cpu(rsp->OutputCount); + + /* We check for obvious errors in the output buffer length and offset */ + if (*plen == 0) + goto ioctl_exit; /* server returned no data */ + else if (*plen > 0xFF00) { + cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", *plen); + *plen = 0; + rc = -EIO; + goto ioctl_exit; + } + + if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) { + cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen, + le32_to_cpu(rsp->OutputOffset)); + *plen = 0; + rc = -EIO; + goto ioctl_exit; + } + + *out_data = kmalloc(*plen, GFP_KERNEL); + if (*out_data == NULL) { + rc = -ENOMEM; + goto ioctl_exit; + } + + memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset), + *plen); +ioctl_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +} + int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid) @@ -1384,8 +1473,7 @@ smb2_readv_callback(struct mid_q_entry *mid) case MID_RESPONSE_RECEIVED: credits_received = le16_to_cpu(buf->CreditRequest); /* result already set, check signature */ - if (server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + if (server->sign) { int rc; rc = smb2_verify_signature(&rqst, server); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4cb4ced258cb..f31043b26bd3 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1,7 +1,7 @@ /* * fs/cifs/smb2pdu.h * - * Copyright (c) International Business Machines Corp., 2009, 2010 + * Copyright (c) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 * Author(s): Steve French (sfrench@us.ibm.com) * Pavel Shilovsky (pshilovsky@samba.org) 2012 @@ -170,6 +170,7 @@ struct smb2_negotiate_req { #define SMB20_PROT_ID 0x0202 #define SMB21_PROT_ID 0x0210 #define SMB30_PROT_ID 0x0300 +#define SMB302_PROT_ID 0x0302 #define BAD_PROT_ID 0xFFFF /* SecurityMode flags */ @@ -283,10 +284,17 @@ struct smb2_tree_connect_rsp { #define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 #define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 #define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 -#define SHI1005_FLAGS_ENABLE_HASH 0x00002000 +#define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 +#define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 +#define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 +#define SHI1005_FLAGS_ALL 0x0000FF33 /* Possible share capabilities */ -#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) +#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ +#define SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY cpu_to_le32(0x00000010) /* 3.0 */ +#define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ +#define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ +#define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ struct smb2_tree_disconnect_req { struct smb2_hdr hdr; @@ -477,6 +485,75 @@ struct create_lease { struct lease_context lcontext; } __packed; +/* this goes in the ioctl buffer when doing a copychunk request */ +struct copychunk_ioctl { + char SourceKey[24]; + __le32 ChunkCount; /* we are only sending 1 */ + __le32 Reserved; + /* array will only be one chunk long for us */ + __le64 SourceOffset; + __le64 TargetOffset; + __le32 Length; /* how many bytes to copy */ + __u32 Reserved2; +} __packed; + +/* Response and Request are the same format */ +struct validate_negotiate_info { + __le32 Capabilities; + __u8 Guid[SMB2_CLIENT_GUID_SIZE]; + __le16 SecurityMode; + __le16 DialectCount; + __le16 Dialect[1]; +} __packed; + +#define RSS_CAPABLE 0x00000001 +#define RDMA_CAPABLE 0x00000002 + +struct network_interface_info_ioctl_rsp { + __le32 Next; /* next interface. zero if this is last one */ + __le32 IfIndex; + __le32 Capability; /* RSS or RDMA Capable */ + __le32 Reserved; + __le64 LinkSpeed; + char SockAddr_Storage[128]; +} __packed; + +#define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */ + +struct smb2_ioctl_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 57 */ + __u16 Reserved; + __le32 CtlCode; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 InputOffset; + __le32 InputCount; + __le32 MaxInputResponse; + __le32 OutputOffset; + __le32 OutputCount; + __le32 MaxOutputResponse; + __le32 Flags; + __u32 Reserved2; + char Buffer[0]; +} __packed; + +struct smb2_ioctl_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 57 */ + __u16 Reserved; + __le32 CtlCode; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ + __le32 InputOffset; + __le32 InputCount; + __le32 OutputOffset; + __le32 OutputCount; + __le32 Flags; + __u32 Reserved2; + /* char * buffer[] */ +} __packed; + /* Currently defined values for close flags */ #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) struct smb2_close_req { @@ -517,17 +594,25 @@ struct smb2_flush_rsp { __le16 Reserved; } __packed; +/* For read request Flags field below, following flag is defined for SMB3.02 */ +#define SMB2_READFLAG_READ_UNBUFFERED 0x01 + +/* Channel field for read and write: exactly one of following flags can be set*/ +#define SMB2_CHANNEL_NONE 0x00000000 +#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ +#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */ + struct smb2_read_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 49 */ __u8 Padding; /* offset from start of SMB2 header to place read */ - __u8 Reserved; + __u8 Flags; /* MBZ unless SMB3.02 or later */ __le32 Length; __le64 Offset; __u64 PersistentFileId; /* opaque endianness */ __u64 VolatileFileId; /* opaque endianness */ __le32 MinimumCount; - __le32 Channel; /* Reserved MBZ */ + __le32 Channel; /* MBZ except for SMB3 or later */ __le32 RemainingBytes; __le16 ReadChannelInfoOffset; /* Reserved MBZ */ __le16 ReadChannelInfoLength; /* Reserved MBZ */ @@ -545,8 +630,9 @@ struct smb2_read_rsp { __u8 Buffer[1]; } __packed; -/* For write request Flags field below the following flag is defined: */ -#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 +/* For write request Flags field below the following flags are defined: */ +#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 /* SMB2.1 or later */ +#define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ struct smb2_write_req { struct smb2_hdr hdr; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 2aa3535e38ce..d4e1eb807457 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -111,6 +111,10 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __u32 desired_access, __u32 create_disposition, __u32 file_attributes, __u32 create_options, __u8 *oplock, struct smb2_file_all_info *buf); +extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, u32 opcode, + bool is_fsctl, char *in_data, u32 indatalen, + char **out_data, u32 *plen /* returned data len */); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 01f0ac800780..09b4fbaadeb6 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -116,11 +116,155 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) return rc; } +void +generate_smb3signingkey(struct TCP_Server_Info *server) +{ + unsigned char zero = 0x0; + __u8 i[4] = {0, 0, 0, 1}; + __u8 L[4] = {0, 0, 0, 128}; + int rc = 0; + unsigned char prfhash[SMB2_HMACSHA256_SIZE]; + unsigned char *hashptr = prfhash; + + memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); + memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); + + rc = crypto_shash_setkey(server->secmech.hmacsha256, + server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + if (rc) { + cifs_dbg(VFS, "%s: Could not set with session key\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + if (rc) { + cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + i, 4); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with n\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + "SMB2AESCMAC", 12); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with label\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + &zero, 1); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with zero\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + "SmbSign", 8); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with context\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + L, 4); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with L\n", __func__); + goto smb3signkey_ret; + } + + rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, + hashptr); + if (rc) { + cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); + goto smb3signkey_ret; + } + + memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); + +smb3signkey_ret: + return; +} + int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - cifs_dbg(FYI, "smb3 signatures not supported yet\n"); - return -EOPNOTSUPP; + int i, rc; + unsigned char smb3_signature[SMB2_CMACAES_SIZE]; + unsigned char *sigptr = smb3_signature; + struct kvec *iov = rqst->rq_iov; + int n_vec = rqst->rq_nvec; + struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + + memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); + memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); + + rc = crypto_shash_setkey(server->secmech.cmacaes, + server->smb3signingkey, SMB2_CMACAES_SIZE); + if (rc) { + cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); + return rc; + } + + rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); + if (rc) { + cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); + return rc; + } + + for (i = 0; i < n_vec; i++) { + if (iov[i].iov_len == 0) + continue; + if (iov[i].iov_base == NULL) { + cifs_dbg(VFS, "null iovec entry"); + return -EIO; + } + /* + * The first entry includes a length field (which does not get + * signed that occupies the first 4 bytes before the header). + */ + if (i == 0) { + if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ + break; /* nothing to sign or corrupt header */ + rc = + crypto_shash_update( + &server->secmech.sdesccmacaes->shash, + iov[i].iov_base + 4, iov[i].iov_len - 4); + } else { + rc = + crypto_shash_update( + &server->secmech.sdesccmacaes->shash, + iov[i].iov_base, iov[i].iov_len); + } + if (rc) { + cifs_dbg(VFS, "%s: Couldn't update cmac aes with payload\n", + __func__); + return rc; + } + } + + /* now hash over the rq_pages array */ + for (i = 0; i < rqst->rq_npages; i++) { + struct kvec p_iov; + + cifs_rqst_page_to_kvec(rqst, i, &p_iov); + crypto_shash_update(&server->secmech.sdesccmacaes->shash, + p_iov.iov_base, p_iov.iov_len); + kunmap(rqst->rq_pages[i]); + } + + rc = crypto_shash_final(&server->secmech.sdesccmacaes->shash, + sigptr); + if (rc) + cifs_dbg(VFS, "%s: Could not generate cmac aes\n", __func__); + + memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); + + return rc; } /* must be called with server->srv_mutex held */ @@ -275,8 +419,7 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, dump_smb(mid->resp_buf, min_t(u32, 80, len)); /* convert the length into a more usable form */ - if ((len > 24) && - (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { + if (len > 24 && server->sign) { int rc; rc = smb2_verify_signature(&rqst, server); diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 7056b891e087..d952ee48f4dc 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -1,7 +1,7 @@ /* * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions * - * Copyright (c) International Business Machines Corp., 2002,2009 + * Copyright (c) International Business Machines Corp., 2002,2013 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -22,7 +22,7 @@ /* IOCTL information */ /* * List of ioctl/fsctl function codes that are or could be useful in the - * future to remote clients like cifs or SMB2 client. There is probably + * future to remote clients like cifs or SMB2/SMB3 client. This is probably * a slightly larger set of fsctls that NTFS local filesystem could handle, * including the seven below that we do not have struct definitions for. * Even with protocol definitions for most of these now available, we still @@ -30,7 +30,13 @@ * remotely. Some of the following, such as the encryption/compression ones * could be invoked from tools via a specialized hook into the VFS rather * than via the standard vfs entry points + * + * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are + * below). Additional detail on less common ones can be found in MS-FSCC + * section 2.3. */ +#define FSCTL_DFS_GET_REFERRALS 0x00060194 +#define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0 #define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000 #define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004 #define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008 @@ -71,14 +77,31 @@ #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */ #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ +#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */ #define FSCTL_SIS_LINK_FILES 0x0009C104 #define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */ #define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */ /* strange that the number for this op is not sequential with previous op */ #define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */ +/* Enumerate previous versions of a file */ +#define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064 +/* Retrieve an opaque file reference for server-side data movement ie copy */ +#define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078 +#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ +#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ +/* Perform server-side data movement */ +#define FSCTL_SRV_COPYCHUNK 0x001440F2 +#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 +#define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */ +#define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */ #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 #define IO_REPARSE_TAG_HSM 0xC0000004 #define IO_REPARSE_TAG_SIS 0x80000007 + +/* fsctl flags */ +/* If Flags is set to this value, the request is an FSCTL not ioctl request */ +#define SMB2_0_IOCTL_IS_FSCTL 0x00000001 + diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bfbf4700d160..6fdcb1b4a106 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; - error = wait_event_freezekillable(server->response_q, + error = wait_event_freezekillable_unsafe(server->response_q, midQ->mid_state != MID_REQUEST_SUBMITTED); if (error < 0) return -ERESTARTSYS; @@ -463,7 +463,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct mid_q_entry *mid; /* enable signing if server requires it */ - if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (server->sign) hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; mid = AllocMidQEntry(hdr, server); @@ -612,7 +612,7 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, dump_smb(mid->resp_buf, min_t(u32, 92, len)); /* convert the length into a more usable form */ - if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { + if (server->sign) { struct kvec iov; int rc = 0; struct smb_rqst rqst = { .rq_iov = &iov, diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 87e0ee9f4465..14a14808320c 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -487,13 +487,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) /* skip null entries */ if (vdir->d_fileno && name.len) { - /* try to look up this entry in the dcache, that way - * userspace doesn't have to worry about breaking - * getcwd by having mismatched inode numbers for - * internal volume mountpoints. */ - ino = find_inode_number(de, &name); - if (!ino) ino = vdir->d_fileno; - + ino = vdir->d_fileno; type = CDT2DT(vdir->d_type); if (!dir_emit(ctx, name.name, name.len, ino, type)) break; diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2b6cb23dd14e..1d1c41f1014d 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -203,7 +203,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, count); + len = flush_write_buffer(file->f_path.dentry, buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); diff --git a/fs/coredump.c b/fs/coredump.c index dafafbafa731..72f816d6cad9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -45,69 +45,79 @@ #include <trace/events/sched.h> int core_uses_pid; -char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; +char core_pattern[CORENAME_MAX_SIZE] = "core"; +static int core_name_size = CORENAME_MAX_SIZE; struct core_name { char *corename; int used, size; }; -static atomic_t call_count = ATOMIC_INIT(1); /* The maximal length of core_pattern is also specified in sysctl.c */ -static int expand_corename(struct core_name *cn) +static int expand_corename(struct core_name *cn, int size) { - char *old_corename = cn->corename; - - cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); - cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); + char *corename = krealloc(cn->corename, size, GFP_KERNEL); - if (!cn->corename) { - kfree(old_corename); + if (!corename) return -ENOMEM; - } + if (size > core_name_size) /* racy but harmless */ + core_name_size = size; + + cn->size = ksize(corename); + cn->corename = corename; return 0; } +static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) +{ + int free, need; + +again: + free = cn->size - cn->used; + need = vsnprintf(cn->corename + cn->used, free, fmt, arg); + if (need < free) { + cn->used += need; + return 0; + } + + if (!expand_corename(cn, cn->size + need - free + 1)) + goto again; + + return -ENOMEM; +} + static int cn_printf(struct core_name *cn, const char *fmt, ...) { - char *cur; - int need; - int ret; va_list arg; + int ret; va_start(arg, fmt); - need = vsnprintf(NULL, 0, fmt, arg); + ret = cn_vprintf(cn, fmt, arg); va_end(arg); - if (likely(need < cn->size - cn->used - 1)) - goto out_printf; + return ret; +} - ret = expand_corename(cn); - if (ret) - goto expand_fail; +static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) +{ + int cur = cn->used; + va_list arg; + int ret; -out_printf: - cur = cn->corename + cn->used; va_start(arg, fmt); - vsnprintf(cur, need + 1, fmt, arg); + ret = cn_vprintf(cn, fmt, arg); va_end(arg); - cn->used += need; - return 0; -expand_fail: + for (; cur < cn->used; ++cur) { + if (cn->corename[cur] == '/') + cn->corename[cur] = '!'; + } return ret; } -static void cn_escape(char *str) -{ - for (; *str; str++) - if (*str == '/') - *str = '!'; -} - static int cn_print_exe_file(struct core_name *cn) { struct file *exe_file; @@ -115,12 +125,8 @@ static int cn_print_exe_file(struct core_name *cn) int ret; exe_file = get_mm_exe_file(current->mm); - if (!exe_file) { - char *commstart = cn->corename + cn->used; - ret = cn_printf(cn, "%s (path unknown)", current->comm); - cn_escape(commstart); - return ret; - } + if (!exe_file) + return cn_esc_printf(cn, "%s (path unknown)", current->comm); pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { @@ -134,9 +140,7 @@ static int cn_print_exe_file(struct core_name *cn) goto free_buf; } - cn_escape(path); - - ret = cn_printf(cn, "%s", path); + ret = cn_esc_printf(cn, "%s", path); free_buf: kfree(pathbuf); @@ -157,19 +161,19 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) int pid_in_pattern = 0; int err = 0; - cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); - cn->corename = kmalloc(cn->size, GFP_KERNEL); cn->used = 0; - - if (!cn->corename) + cn->corename = NULL; + if (expand_corename(cn, core_name_size)) return -ENOMEM; + cn->corename[0] = '\0'; + + if (ispipe) + ++pat_ptr; /* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { if (*pat_ptr != '%') { - if (*pat_ptr == 0) - goto out; err = cn_printf(cn, "%c", *pat_ptr++); } else { switch (*++pat_ptr) { @@ -210,22 +214,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) break; } /* hostname */ - case 'h': { - char *namestart = cn->corename + cn->used; + case 'h': down_read(&uts_sem); - err = cn_printf(cn, "%s", + err = cn_esc_printf(cn, "%s", utsname()->nodename); up_read(&uts_sem); - cn_escape(namestart); break; - } /* executable */ - case 'e': { - char *commstart = cn->corename + cn->used; - err = cn_printf(cn, "%s", current->comm); - cn_escape(commstart); + case 'e': + err = cn_esc_printf(cn, "%s", current->comm); break; - } case 'E': err = cn_print_exe_file(cn); break; @@ -244,6 +242,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) return err; } +out: /* Backward compatibility with core_uses_pid: * * If core_pattern does not include a %p (as is the default) @@ -254,7 +253,6 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) if (err) return err; } -out: return ispipe; } @@ -549,7 +547,7 @@ void do_coredump(siginfo_t *siginfo) if (ispipe < 0) { printk(KERN_WARNING "format_corename failed\n"); printk(KERN_WARNING "Aborting core\n"); - goto fail_corename; + goto fail_unlock; } if (cprm.limit == 1) { @@ -584,7 +582,7 @@ void do_coredump(siginfo_t *siginfo) goto fail_dropcount; } - helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); + helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); @@ -601,7 +599,7 @@ void do_coredump(siginfo_t *siginfo) argv_free(helper_argv); if (retval) { - printk(KERN_INFO "Core dump to %s pipe failed\n", + printk(KERN_INFO "Core dump to |%s pipe failed\n", cn.corename); goto close_fail; } @@ -669,7 +667,6 @@ fail_dropcount: atomic_dec(&core_dump_count); fail_unlock: kfree(cn.corename); -fail_corename: coredump_finish(mm, core_dumped); revert_creds(old_cred); fail_creds: diff --git a/fs/dcache.c b/fs/dcache.c index 5a23073138df..87bdb5329c3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1730,7 +1730,7 @@ EXPORT_SYMBOL(d_add_ci); * Do the slow-case of the dentry name compare. * * Unlike the dentry_cmp() function, we need to atomically - * load the name, length and inode information, so that the + * load the name and length information, so that the * filesystem can rely on them, and can use the 'name' and * 'len' information without worrying about walking off the * end of memory etc. @@ -1748,22 +1748,18 @@ enum slow_d_compare { static noinline enum slow_d_compare slow_dentry_cmp( const struct dentry *parent, - struct inode *inode, struct dentry *dentry, unsigned int seq, const struct qstr *name) { int tlen = dentry->d_name.len; const char *tname = dentry->d_name.name; - struct inode *i = dentry->d_inode; if (read_seqcount_retry(&dentry->d_seq, seq)) { cpu_relax(); return D_COMP_SEQRETRY; } - if (parent->d_op->d_compare(parent, inode, - dentry, i, - tlen, tname, name)) + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) return D_COMP_NOMATCH; return D_COMP_OK; } @@ -1773,7 +1769,6 @@ static noinline enum slow_d_compare slow_dentry_cmp( * @parent: parent dentry * @name: qstr of name we wish to find * @seqp: returns d_seq value at the point where the dentry was found - * @inode: returns dentry->d_inode when the inode was found valid. * Returns: dentry, or NULL * * __d_lookup_rcu is the dcache lookup function for rcu-walk name @@ -1800,7 +1795,7 @@ static noinline enum slow_d_compare slow_dentry_cmp( */ struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, - unsigned *seqp, struct inode *inode) + unsigned *seqp) { u64 hashlen = name->hash_len; const unsigned char *str = name->name; @@ -1834,11 +1829,10 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, seqretry: /* * The dentry sequence count protects us from concurrent - * renames, and thus protects inode, parent and name fields. + * renames, and thus protects parent and name fields. * * The caller must perform a seqcount check in order - * to do anything useful with the returned dentry, - * including using the 'd_inode' pointer. + * to do anything useful with the returned dentry. * * NOTE! We do a "raw" seqcount_begin here. That means that * we don't wait for the sequence count to stabilize if it @@ -1852,12 +1846,12 @@ seqretry: continue; if (d_unhashed(dentry)) continue; - *seqp = seq; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (dentry->d_name.hash != hashlen_hash(hashlen)) continue; - switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { + *seqp = seq; + switch (slow_dentry_cmp(parent, dentry, seq, name)) { case D_COMP_OK: return dentry; case D_COMP_NOMATCH: @@ -1869,6 +1863,7 @@ seqretry: if (dentry->d_name.hash_len != hashlen) continue; + *seqp = seq; if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) return dentry; } @@ -1966,9 +1961,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) if (parent->d_flags & DCACHE_OP_COMPARE) { int tlen = dentry->d_name.len; const char *tname = dentry->d_name.name; - if (parent->d_op->d_compare(parent, parent->d_inode, - dentry, dentry->d_inode, - tlen, tname, name)) + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) goto next; } else { if (dentry->d_name.len != len) @@ -2005,7 +1998,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) */ name->hash = full_name_hash(name->name, name->len); if (dir->d_flags & DCACHE_OP_HASH) { - int err = dir->d_op->d_hash(dir, dir->d_inode, name); + int err = dir->d_op->d_hash(dir, name); if (unlikely(err < 0)) return ERR_PTR(err); } @@ -2975,34 +2968,21 @@ rename_retry: goto again; } -/** - * find_inode_number - check for dentry with name - * @dir: directory to check - * @name: Name to find. - * - * Check whether a dentry already exists for the given name, - * and return the inode number if it has an inode. Otherwise - * 0 is returned. - * - * This routine is used to post-process directory listings for - * filesystems using synthetic inode numbers, and is necessary - * to keep getcwd() working. - */ - -ino_t find_inode_number(struct dentry *dir, struct qstr *name) +void d_tmpfile(struct dentry *dentry, struct inode *inode) { - struct dentry * dentry; - ino_t ino = 0; - - dentry = d_hash_and_lookup(dir, name); - if (!IS_ERR_OR_NULL(dentry)) { - if (dentry->d_inode) - ino = dentry->d_inode->i_ino; - dput(dentry); - } - return ino; + inode_dec_link_count(inode); + BUG_ON(dentry->d_name.name != dentry->d_iname || + !hlist_unhashed(&dentry->d_alias) || + !d_unlinked(dentry)); + spin_lock(&dentry->d_parent->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", + (unsigned long long)inode->i_ino); + spin_unlock(&dentry->d_lock); + spin_unlock(&dentry->d_parent->d_lock); + d_instantiate(dentry, inode); } -EXPORT_SYMBOL(find_inode_number); +EXPORT_SYMBOL(d_tmpfile); static __initdata unsigned long dhash_entries; static int __init set_dhash_entries(char *str) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f71ec125290d..cfa109a4d5a2 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -2243,12 +2243,11 @@ out: */ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, size_t *plaintext_name_size, - struct dentry *ecryptfs_dir_dentry, + struct super_block *sb, const char *name, size_t name_size) { struct ecryptfs_mount_crypt_stat *mount_crypt_stat = - &ecryptfs_superblock_to_private( - ecryptfs_dir_dentry->d_sb)->mount_crypt_stat; + &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; char *decoded_name; size_t decoded_name_size; size_t packet_size; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index f622a733f7ad..df19d34a033b 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -575,7 +575,7 @@ int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, struct inode *ecryptfs_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, - struct dentry *ecryptfs_dentry, + struct super_block *sb, const char *name, size_t name_size); int ecryptfs_fill_zeros(struct file *file, loff_t new_length); int ecryptfs_encrypt_and_encode_filename( diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9aa05e08060b..24f1105fda3a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -70,7 +70,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, struct ecryptfs_getdents_callback { struct dir_context ctx; struct dir_context *caller; - struct dentry *dentry; + struct super_block *sb; int filldir_called; int entries_written; }; @@ -88,7 +88,7 @@ ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, buf->filldir_called++; rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size, - buf->dentry, lower_name, + buf->sb, lower_name, lower_namelen); if (rc) { printk(KERN_ERR "%s: Error attempting to decode and decrypt " @@ -114,15 +114,14 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx) { int rc; struct file *lower_file; - struct inode *inode; + struct inode *inode = file_inode(file); struct ecryptfs_getdents_callback buf = { .ctx.actor = ecryptfs_filldir, .caller = ctx, - .dentry = file->f_path.dentry + .sb = inode->i_sb, }; lower_file = ecryptfs_file_to_lower(file); lower_file->f_pos = ctx->pos; - inode = file_inode(file); rc = iterate_dir(lower_file, &buf.ctx); ctx->pos = buf.ctx.pos; if (rc < 0) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5eab400e2590..a2f2bb2c256d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -679,7 +679,7 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, set_fs(old_fs); if (rc < 0) goto out; - rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, + rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry->d_sb, lower_buf, rc); out: kfree(lower_buf); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 141aee31884f..a8766b880c07 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -45,8 +45,8 @@ static struct super_block *efivarfs_sb; * So we need to perform a case-sensitive match on part 1 and a * case-insensitive match on part 2. */ -static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int efivarfs_d_compare(const struct dentry *parent, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { @@ -63,8 +63,7 @@ static int efivarfs_d_compare(const struct dentry *parent, const struct inode *p return strncasecmp(name->name + guid, str + guid, EFI_VARIABLE_GUID_LEN); } -static int efivarfs_d_hash(const struct dentry *dentry, - const struct inode *inode, struct qstr *qstr) +static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) { unsigned long hash = init_name_hash(); const unsigned char *s = qstr->name; @@ -108,7 +107,7 @@ static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) q.name = name; q.len = strlen(name); - err = efivarfs_d_hash(NULL, NULL, &q); + err = efivarfs_d_hash(NULL, &q); if (err) return ERR_PTR(err); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index deecc7294a67..9ad17b15b454 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,6 +34,7 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -1602,7 +1603,8 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!freezable_schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); @@ -1975,8 +1977,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return -EINVAL; if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) return -EFAULT; - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + sigsaved = current->blocked; + set_current_blocked(&ksigmask); } error = sys_epoll_wait(epfd, events, maxevents, timeout); @@ -1993,7 +1995,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, sizeof(sigsaved)); set_restore_sigmask(); } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + set_current_blocked(&sigsaved); } return error; @@ -2020,8 +2022,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) return -EFAULT; sigset_from_compat(&ksigmask, &csigmask); - sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + sigsaved = current->blocked; + set_current_blocked(&ksigmask); } err = sys_epoll_wait(epfd, events, maxevents, timeout); @@ -2038,7 +2040,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, sizeof(sigsaved)); set_restore_sigmask(); } else - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + set_current_blocked(&sigsaved); } return err; diff --git a/fs/exec.c b/fs/exec.c index ffd7a813ad3d..9c73def87642 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -110,13 +110,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) static const struct open_flags uselib_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, - .intent = LOOKUP_OPEN + .intent = LOOKUP_OPEN, + .lookup_flags = LOOKUP_FOLLOW, }; if (IS_ERR(tmp)) goto out; - file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); + file = do_filp_open(AT_FDCWD, tmp, &uselib_flags); putname(tmp); error = PTR_ERR(file); if (IS_ERR(file)) @@ -756,10 +757,11 @@ struct file *open_exec(const char *name) static const struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC | MAY_OPEN, - .intent = LOOKUP_OPEN + .intent = LOOKUP_OPEN, + .lookup_flags = LOOKUP_FOLLOW, }; - file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags, LOOKUP_FOLLOW); + file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags); if (IS_ERR(file)) goto out; @@ -930,6 +932,7 @@ static int de_thread(struct task_struct *tsk) * also take its birthdate (always earlier than our own). */ tsk->start_time = leader->start_time; + tsk->real_start_time = leader->real_start_time; BUG_ON(!same_thread_group(leader, tsk)); BUG_ON(has_group_leader_pid(tsk)); @@ -945,9 +948,8 @@ static int de_thread(struct task_struct *tsk) * Note: The old leader also uses this pid until release_task * is called. Odd but simple and correct. */ - detach_pid(tsk, PIDTYPE_PID); tsk->pid = leader->pid; - attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); + change_pid(tsk, PIDTYPE_PID, task_pid(leader)); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); @@ -1463,7 +1465,6 @@ static int do_execve_common(const char *filename, struct files_struct *displaced; bool clear_in_exec; int retval; - const struct cred *cred = current_cred(); /* * We move the actual failure in case of RLIMIT_NPROC excess from @@ -1472,7 +1473,7 @@ static int do_execve_common(const char *filename, * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && - atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { + atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 73b0d9519836..256dd5f4c1c4 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -119,6 +119,29 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode return ext2_add_nondir(dentry, inode); } +static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode = ext2_new_inode(dir, mode, NULL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &ext2_file_inode_operations; + if (ext2_use_xip(inode->i_sb)) { + inode->i_mapping->a_ops = &ext2_aops_xip; + inode->i_fop = &ext2_xip_file_operations; + } else if (test_opt(inode->i_sb, NOBH)) { + inode->i_mapping->a_ops = &ext2_nobh_aops; + inode->i_fop = &ext2_file_operations; + } else { + inode->i_mapping->a_ops = &ext2_aops; + inode->i_fop = &ext2_file_operations; + } + mark_inode_dirty(inode); + d_tmpfile(dentry, inode); + unlock_new_inode(inode); + return 0; +} + static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode * inode; @@ -398,6 +421,7 @@ const struct inode_operations ext2_dir_inode_operations = { #endif .setattr = ext2_setattr, .get_acl = ext2_get_acl, + .tmpfile = ext2_tmpfile, }; const struct inode_operations ext2_special_inode_operations = { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f67668f724ba..2bd85486b879 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .is_dirty_writeback = buffer_check_dirty_writeback, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index cea8ecf3e76e..998ea111e537 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1759,6 +1759,45 @@ retry: return err; } +static int ext3_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + handle_t *handle; + struct inode *inode; + int err, retries = 0; + + dquot_initialize(dir); + +retry: + handle = ext3_journal_start(dir, EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + 4 + EXT3_XATTR_TRANS_BLOCKS); + + if (IS_ERR(handle)) + return PTR_ERR(handle); + + inode = ext3_new_inode (handle, dir, NULL, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext3_file_inode_operations; + inode->i_fop = &ext3_file_operations; + ext3_set_aops(inode); + err = ext3_orphan_add(handle, inode); + if (err) + goto err_drop_inode; + mark_inode_dirty(inode); + d_tmpfile(dentry, inode); + unlock_new_inode(inode); + } + ext3_journal_stop(handle); + if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +err_drop_inode: + ext3_journal_stop(handle); + unlock_new_inode(inode); + iput(inode); + return err; +} + static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) { handle_t *handle; @@ -2300,7 +2339,7 @@ static int ext3_link (struct dentry * old_dentry, retry: handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2314,6 +2353,11 @@ retry: err = ext3_add_entry(handle, dentry, inode); if (!err) { ext3_mark_inode_dirty(handle, inode); + /* this can happen only for tmpfile being + * linked the first time + */ + if (inode->i_nlink == 1) + ext3_orphan_del(handle, inode); d_instantiate(dentry, inode); } else { drop_nlink(inode); @@ -2516,6 +2560,7 @@ const struct inode_operations ext3_dir_inode_operations = { .mkdir = ext3_mkdir, .rmdir = ext3_rmdir, .mknod = ext3_mknod, + .tmpfile = ext3_tmpfile, .rename = ext3_rename, .setattr = ext3_setattr, #ifdef CONFIG_EXT3_FS_XATTR diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b19f0a457f32..6f4cc567c382 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -494,17 +494,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) if (dataoff > isize) return -ENXIO; - if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) - return -EINVAL; - if (dataoff > maxsize) - return -EINVAL; - - if (dataoff != file->f_pos) { - file->f_pos = dataoff; - file->f_version = 0; - } - - return dataoff; + return vfs_setpos(file, dataoff, maxsize); } /* @@ -580,17 +570,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) if (holeoff > isize) holeoff = isize; - if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) - return -EINVAL; - if (holeoff > maxsize) - return -EINVAL; - - if (holeoff != file->f_pos) { - file->f_pos = holeoff; - file->f_version = 0; - } - - return holeoff; + return vfs_setpos(file, holeoff, maxsize); } /* diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ab2f6dc44b3a..234b834d5a97 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2296,6 +2296,45 @@ retry: return err; } +static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + handle_t *handle; + struct inode *inode; + int err, retries = 0; + + dquot_initialize(dir); + +retry: + inode = ext4_new_inode_start_handle(dir, mode, + NULL, 0, NULL, + EXT4_HT_DIR, + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + 4 + EXT4_XATTR_TRANS_BLOCKS); + handle = ext4_journal_current_handle(); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext4_file_inode_operations; + inode->i_fop = &ext4_file_operations; + ext4_set_aops(inode); + err = ext4_orphan_add(handle, inode); + if (err) + goto err_drop_inode; + mark_inode_dirty(inode); + d_tmpfile(dentry, inode); + unlock_new_inode(inode); + } + if (handle) + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +err_drop_inode: + ext4_journal_stop(handle); + unlock_new_inode(inode); + iput(inode); + return err; +} + struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, struct ext4_dir_entry_2 *de, int blocksize, int csum_size, @@ -2903,7 +2942,7 @@ static int ext4_link(struct dentry *old_dentry, retry: handle = ext4_journal_start(dir, EXT4_HT_DIR, (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS)); + EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2917,6 +2956,11 @@ retry: err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); + /* this can happen only for tmpfile being + * linked the first time + */ + if (inode->i_nlink == 1) + ext4_orphan_del(handle, inode); d_instantiate(dentry, inode); } else { drop_nlink(inode); @@ -3169,6 +3213,7 @@ const struct inode_operations ext4_dir_inode_operations = { .mkdir = ext4_mkdir, .rmdir = ext4_rmdir, .mknod = ext4_mknod, + .tmpfile = ext4_tmpfile, .rename = ext4_rename, .setattr = ext4_setattr, .setxattr = generic_setxattr, diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 359d307b5507..628e22a5a543 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -30,7 +30,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf); + fat_msg(sb, KERN_ERR, "error, %pV", &vaf); va_end(args); } @@ -38,8 +38,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { sb->s_flags |= MS_RDONLY; - printk(KERN_ERR "FAT-fs (%s): Filesystem has been " - "set read-only\n", sb->s_id); + fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); } } EXPORT_SYMBOL_GPL(__fat_fs_error); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 081b759cff83..a783b0e1272a 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -148,8 +148,7 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len, * that the existing dentry can be used. The msdos fs routines will * return ENOENT or EINVAL as appropriate. */ -static int msdos_hash(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +static int msdos_hash(const struct dentry *dentry, struct qstr *qstr) { struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; unsigned char msdos_name[MSDOS_NAME]; @@ -165,8 +164,7 @@ static int msdos_hash(const struct dentry *dentry, const struct inode *inode, * Compare two msdos names. If either of the names are invalid, * we fall back to doing the standard name comparison. */ -static int msdos_cmp(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int msdos_cmp(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 2da952036a3d..6df8d3d885e5 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -107,8 +107,7 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr) * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hash(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +static int vfat_hash(const struct dentry *dentry, struct qstr *qstr) { qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); return 0; @@ -120,8 +119,7 @@ static int vfat_hash(const struct dentry *dentry, const struct inode *inode, * that the existing dentry can be used. The vfat fs routines will * return ENOENT or EINVAL as appropriate. */ -static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr) { struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; const unsigned char *name; @@ -142,8 +140,7 @@ static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, /* * Case insensitive compare of two vfat names. */ -static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int vfat_cmpi(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; @@ -162,8 +159,7 @@ static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, /* * Case sensitive compare of two vfat names. */ -static int vfat_cmp(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int vfat_cmp(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { unsigned int alen, blen; diff --git a/fs/file_table.c b/fs/file_table.c index 485dc0eddd67..08e719b884ca 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -227,7 +227,7 @@ static void __fput(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct vfsmount *mnt = file->f_path.mnt; - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_inode; might_sleep(); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35f281033142..5c121fe19c5f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -548,8 +548,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) res = io->bytes < 0 ? io->size : io->bytes; if (!is_sync_kiocb(io->iocb)) { - struct path *path = &io->iocb->ki_filp->f_path; - struct inode *inode = path->dentry->d_inode; + struct inode *inode = file_inode(io->iocb->ki_filp); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..0b578598c6ac 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -785,7 +785,7 @@ static const struct super_operations fuse_super_operations = { static void sanitize_global_limit(unsigned *limit) { if (*limit == 0) - *limit = ((num_physpages << PAGE_SHIFT) >> 13) / + *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / sizeof(struct fuse_req); if (*limit >= 1 << 16) diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 4fddb3c22d25..f2448ab2aac5 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -109,8 +109,7 @@ fail: return 0; } -static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, - struct qstr *str) +static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) { str->hash = gfs2_disk_hash(str->name, str->len); return 0; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f99f9e8a325f..72c3866a7320 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -912,7 +912,7 @@ out_uninit: * cluster; until we do, disable leases (by just returning -EINVAL), * unless the administrator has requested purely local locking. * - * Locking: called under lock_flocks + * Locking: called under i_lock * * Returns: errno */ diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index a73b11839a41..0524cda47a6e 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -229,13 +229,10 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *); /* string.c */ extern const struct dentry_operations hfs_dentry_operations; -extern int hfs_hash_dentry(const struct dentry *, const struct inode *, - struct qstr *); +extern int hfs_hash_dentry(const struct dentry *, struct qstr *); extern int hfs_strcmp(const unsigned char *, unsigned int, const unsigned char *, unsigned int); -extern int hfs_compare_dentry(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +extern int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); /* trans.c */ diff --git a/fs/hfs/string.c b/fs/hfs/string.c index 495a976a3cc9..85b610c3909f 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c @@ -51,8 +51,7 @@ static unsigned char caseorder[256] = { /* * Hash a string to an integer in a case-independent way */ -int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, - struct qstr *this) +int hfs_hash_dentry(const struct dentry *dentry, struct qstr *this) { const unsigned char *name = this->name; unsigned int hash, len = this->len; @@ -93,8 +92,7 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1, * Test for equality of two strings in the HFS filename character ordering. * return 1 on failure and 0 on success */ -int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { const unsigned char *n1, *n2; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 60b0a3388b26..ede79317cfb8 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -495,11 +495,8 @@ int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, int, const char *, int); -int hfsplus_hash_dentry(const struct dentry *dentry, - const struct inode *inode, struct qstr *str); -int hfsplus_compare_dentry(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str); +int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); /* wrapper.c */ diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 2c2e47dcfdd8..e8ef121a4d8b 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -334,8 +334,7 @@ int hfsplus_asc2uni(struct super_block *sb, * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, - struct qstr *str) +int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) { struct super_block *sb = dentry->d_sb; const char *astr; @@ -386,9 +385,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, * Composed unicode characters are decomposed and case-folding is performed * if the appropriate bits are (un)set on the superblock. */ -int hfsplus_compare_dentry(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct super_block *sb = parent->d_sb; diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 05d4816e4e77..fa27980f2229 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c @@ -12,8 +12,7 @@ * Note: the dentry argument is the parent dentry. */ -static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +static int hpfs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) { unsigned long hash; int i; @@ -35,9 +34,7 @@ static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *ino return 0; } -static int hpfs_compare_dentry(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int hpfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { unsigned al = len; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index fc90ab11c340..4338ff32959d 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -69,7 +69,7 @@ static char *dentry_name(struct dentry *dentry, int extra) struct dentry *parent; char *root, *name; const char *seg_name; - int len, seg_len; + int len, seg_len, root_len; len = 0; parent = dentry; @@ -81,7 +81,8 @@ static char *dentry_name(struct dentry *dentry, int extra) } root = "proc"; - len += strlen(root); + root_len = strlen(root); + len += root_len; name = kmalloc(len + extra + 1, GFP_KERNEL); if (name == NULL) return NULL; @@ -91,7 +92,7 @@ static char *dentry_name(struct dentry *dentry, int extra) while (parent->d_parent != parent) { if (is_pid(parent)) { seg_name = "pid"; - seg_len = strlen("pid"); + seg_len = strlen(seg_name); } else { seg_name = parent->d_name.name; @@ -100,10 +101,10 @@ static char *dentry_name(struct dentry *dentry, int extra) len -= seg_len + 1; name[len] = '/'; - strncpy(&name[len + 1], seg_name, seg_len); + memcpy(&name[len + 1], seg_name, seg_len); parent = parent->d_parent; } - strncpy(name, root, strlen(root)); + memcpy(name, root, root_len); return name; } diff --git a/fs/inode.c b/fs/inode.c index 00d5fc3b86e1..d6dfb09c8280 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -333,8 +333,10 @@ EXPORT_SYMBOL(set_nlink); */ void inc_nlink(struct inode *inode) { - if (WARN_ON(inode->i_nlink == 0)) + if (unlikely(inode->i_nlink == 0)) { + WARN_ON(!(inode->i_state & I_LINKABLE)); atomic_long_dec(&inode->i_sb->s_remove_count); + } inode->__i_nlink++; } diff --git a/fs/internal.h b/fs/internal.h index 68121584ae37..7c5f01cf619d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -96,11 +96,12 @@ struct open_flags { umode_t mode; int acc_mode; int intent; + int lookup_flags; }; extern struct file *do_filp_open(int dfd, struct filename *pathname, - const struct open_flags *op, int flags); + const struct open_flags *op); extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, - const char *, const struct open_flags *, int lookup_flags); + const char *, const struct open_flags *); extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); @@ -130,6 +131,7 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); * read_write.c */ extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); +extern int rw_verify_area(int, struct file *, const loff_t *, size_t); /* * splice.c diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d9b8aebdeb22..c348d6d88624 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -28,31 +28,23 @@ #define BEQUIET -static int isofs_hashi(const struct dentry *parent, const struct inode *inode, - struct qstr *qstr); -static int isofs_hash(const struct dentry *parent, const struct inode *inode, - struct qstr *qstr); +static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); +static int isofs_hash(const struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); static int isofs_dentry_cmp(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #ifdef CONFIG_JOLIET -static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode, - struct qstr *qstr); -static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode, - struct qstr *qstr); +static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); +static int isofs_hash_ms(const struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi_ms(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); static int isofs_dentry_cmp_ms(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #endif @@ -265,30 +257,26 @@ static int isofs_dentry_cmp_common( } static int -isofs_hash(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +isofs_hash(const struct dentry *dentry, struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 0); } static int -isofs_hashi(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +isofs_hashi(const struct dentry *dentry, struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 0); } static int -isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 0, 0); } static int -isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 0, 1); @@ -296,30 +284,26 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, #ifdef CONFIG_JOLIET static int -isofs_hash_ms(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { return isofs_hash_common(dentry, qstr, 1); } static int -isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) { return isofs_hashi_common(dentry, qstr, 1); } static int -isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +isofs_dentry_cmp_ms(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 1, 0); } static int -isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +isofs_dentry_cmpi_ms(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { return isofs_dentry_cmp_common(len, str, name, 1, 1); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index c167028844ed..95295640d9c8 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -37,8 +37,7 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) qstr.name = compare; qstr.len = dlen; - return dentry->d_op->d_compare(NULL, NULL, NULL, NULL, - dentry->d_name.len, dentry->d_name.name, &qstr); + return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); } /* diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 89186b7b9002..8b19027291d6 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1538,8 +1538,7 @@ const struct file_operations jfs_dir_operations = { .llseek = generic_file_llseek, }; -static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, - struct qstr *this) +static int jfs_ci_hash(const struct dentry *dir, struct qstr *this) { unsigned long hash; int i; @@ -1552,9 +1551,7 @@ static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, return 0; } -static int jfs_ci_compare(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int jfs_ci_compare(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a2aa97d45670..10d6c41aecad 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -305,7 +305,7 @@ static int lockd_start_svc(struct svc_serv *serv) svc_sock_update_bufs(serv); serv->sv_maxconn = nlm_max_connections; - nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); + nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); if (IS_ERR(nlmsvc_task)) { error = PTR_ERR(nlmsvc_task); printk(KERN_WARNING diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e703318c41df..067778b0ccc9 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block) dprintk("lockd: unlinking block %p...\n", block); /* Remove block from list */ - status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl); + status = posix_unblock_lock(&block->b_call->a_args.lock.fl); nlmsvc_remove_block(block); return status; } @@ -744,8 +744,20 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; } +/* + * Since NLM uses two "keys" for tracking locks, we need to hash them down + * to one for the blocked_hash. Here, we're just xor'ing the host address + * with the pid in order to create a key value for picking a hash bucket. + */ +static unsigned long +nlmsvc_owner_key(struct file_lock *fl) +{ + return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid; +} + const struct lock_manager_operations nlmsvc_lock_operations = { .lm_compare_owner = nlmsvc_same_owner, + .lm_owner_key = nlmsvc_owner_key, .lm_notify = nlmsvc_notify_blocked, .lm_grant = nlmsvc_grant_deferred, }; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 97e87415b145..dc5c75930f0f 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, again: file->f_locks = 0; - lock_flocks(); /* protects i_flock list */ + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl; fl = fl->fl_next) { if (fl->fl_lmops != &nlmsvc_lock_operations) continue; @@ -181,7 +181,7 @@ again: if (match(lockhost, host)) { struct file_lock lock = *fl; - unlock_flocks(); + spin_unlock(&inode->i_lock); lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; @@ -193,7 +193,7 @@ again: goto again; } } - unlock_flocks(); + spin_unlock(&inode->i_lock); return 0; } @@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file) if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; - lock_flocks(); + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl; fl = fl->fl_next) { if (fl->fl_lmops == &nlmsvc_lock_operations) { - unlock_flocks(); + spin_unlock(&inode->i_lock); return 1; } } - unlock_flocks(); + spin_unlock(&inode->i_lock); file->f_locks = 0; return 0; } diff --git a/fs/locks.c b/fs/locks.c index cb424a4fed71..04e2c1fdb157 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -126,6 +126,7 @@ #include <linux/time.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> +#include <linux/hashtable.h> #include <asm/uaccess.h> @@ -153,30 +154,51 @@ int lease_break_time = 45; #define for_each_lock(inode, lockp) \ for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) -static LIST_HEAD(file_lock_list); -static LIST_HEAD(blocked_list); +/* + * The global file_lock_list is only used for displaying /proc/locks. Protected + * by the file_lock_lock. + */ +static HLIST_HEAD(file_lock_list); static DEFINE_SPINLOCK(file_lock_lock); /* - * Protects the two list heads above, plus the inode->i_flock list + * The blocked_hash is used to find POSIX lock loops for deadlock detection. + * It is protected by blocked_lock_lock. + * + * We hash locks by lockowner in order to optimize searching for the lock a + * particular lockowner is waiting on. + * + * FIXME: make this value scale via some heuristic? We generally will want more + * buckets when we have more lockowners holding locks, but that's a little + * difficult to determine without knowing what the workload will look like. */ -void lock_flocks(void) -{ - spin_lock(&file_lock_lock); -} -EXPORT_SYMBOL_GPL(lock_flocks); +#define BLOCKED_HASH_BITS 7 +static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); -void unlock_flocks(void) -{ - spin_unlock(&file_lock_lock); -} -EXPORT_SYMBOL_GPL(unlock_flocks); +/* + * This lock protects the blocked_hash. Generally, if you're accessing it, you + * want to be holding this lock. + * + * In addition, it also protects the fl->fl_block list, and the fl->fl_next + * pointer for file_lock structures that are acting as lock requests (in + * contrast to those that are acting as records of acquired locks). + * + * Note that when we acquire this lock in order to change the above fields, + * we often hold the i_lock as well. In certain cases, when reading the fields + * protected by this lock, we can skip acquiring it iff we already hold the + * i_lock. + * + * In particular, adding an entry to the fl_block list requires that you hold + * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting + * an entry from the list however only requires the file_lock_lock. + */ +static DEFINE_SPINLOCK(blocked_lock_lock); static struct kmem_cache *filelock_cache __read_mostly; static void locks_init_lock_heads(struct file_lock *fl) { - INIT_LIST_HEAD(&fl->fl_link); + INIT_HLIST_NODE(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); } @@ -210,7 +232,7 @@ void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); BUG_ON(!list_empty(&fl->fl_block)); - BUG_ON(!list_empty(&fl->fl_link)); + BUG_ON(!hlist_unhashed(&fl->fl_link)); locks_release_private(fl); kmem_cache_free(filelock_cache, fl); @@ -484,47 +506,108 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner; } +static inline void +locks_insert_global_locks(struct file_lock *fl) +{ + spin_lock(&file_lock_lock); + hlist_add_head(&fl->fl_link, &file_lock_list); + spin_unlock(&file_lock_lock); +} + +static inline void +locks_delete_global_locks(struct file_lock *fl) +{ + spin_lock(&file_lock_lock); + hlist_del_init(&fl->fl_link); + spin_unlock(&file_lock_lock); +} + +static unsigned long +posix_owner_key(struct file_lock *fl) +{ + if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) + return fl->fl_lmops->lm_owner_key(fl); + return (unsigned long)fl->fl_owner; +} + +static inline void +locks_insert_global_blocked(struct file_lock *waiter) +{ + hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); +} + +static inline void +locks_delete_global_blocked(struct file_lock *waiter) +{ + hash_del(&waiter->fl_link); +} + /* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. + * + * Must be called with blocked_lock_lock held. */ static void __locks_delete_block(struct file_lock *waiter) { + locks_delete_global_blocked(waiter); list_del_init(&waiter->fl_block); - list_del_init(&waiter->fl_link); waiter->fl_next = NULL; } -/* - */ -void locks_delete_block(struct file_lock *waiter) +static void locks_delete_block(struct file_lock *waiter) { - lock_flocks(); + spin_lock(&blocked_lock_lock); __locks_delete_block(waiter); - unlock_flocks(); + spin_unlock(&blocked_lock_lock); } -EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. + * + * Must be called with both the i_lock and blocked_lock_lock held. The fl_block + * list itself is protected by the file_lock_list, but by ensuring that the + * i_lock is also held on insertions we can avoid taking the blocked_lock_lock + * in some cases when we see that the fl_block list is empty. */ -static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) +static void __locks_insert_block(struct file_lock *blocker, + struct file_lock *waiter) { BUG_ON(!list_empty(&waiter->fl_block)); - list_add_tail(&waiter->fl_block, &blocker->fl_block); waiter->fl_next = blocker; + list_add_tail(&waiter->fl_block, &blocker->fl_block); if (IS_POSIX(blocker)) - list_add(&waiter->fl_link, &blocked_list); + locks_insert_global_blocked(waiter); } -/* Wake up processes blocked waiting for blocker. - * If told to wait then schedule the processes until the block list - * is empty, otherwise empty the block list ourselves. +/* Must be called with i_lock held. */ +static void locks_insert_block(struct file_lock *blocker, + struct file_lock *waiter) +{ + spin_lock(&blocked_lock_lock); + __locks_insert_block(blocker, waiter); + spin_unlock(&blocked_lock_lock); +} + +/* + * Wake up processes blocked waiting for blocker. + * + * Must be called with the inode->i_lock held! */ static void locks_wake_up_blocks(struct file_lock *blocker) { + /* + * Avoid taking global lock if list is empty. This is safe since new + * blocked requests are only added to the list under the i_lock, and + * the i_lock is always held here. Note that removal from the fl_block + * list does not require the i_lock, so we must recheck list_empty() + * after acquiring the blocked_lock_lock. + */ + if (list_empty(&blocker->fl_block)) + return; + + spin_lock(&blocked_lock_lock); while (!list_empty(&blocker->fl_block)) { struct file_lock *waiter; @@ -536,20 +619,23 @@ static void locks_wake_up_blocks(struct file_lock *blocker) else wake_up(&waiter->fl_wait); } + spin_unlock(&blocked_lock_lock); } /* Insert file lock fl into an inode's lock list at the position indicated * by pos. At the same time add the lock to the global file lock list. + * + * Must be called with the i_lock held! */ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) { - list_add(&fl->fl_link, &file_lock_list); - fl->fl_nspid = get_pid(task_tgid(current)); /* insert into file's list */ fl->fl_next = *pos; *pos = fl; + + locks_insert_global_locks(fl); } /* @@ -557,14 +643,17 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) * Wake up processes that are blocked waiting for this lock, * notify the FS that the lock has been cleared and * finally free the lock. + * + * Must be called with the i_lock held! */ static void locks_delete_lock(struct file_lock **thisfl_p) { struct file_lock *fl = *thisfl_p; + locks_delete_global_locks(fl); + *thisfl_p = fl->fl_next; fl->fl_next = NULL; - list_del_init(&fl->fl_link); if (fl->fl_nspid) { put_pid(fl->fl_nspid); @@ -625,8 +714,9 @@ void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; + struct inode *inode = file_inode(filp); - lock_flocks(); + spin_lock(&inode->i_lock); for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { if (!IS_POSIX(cfl)) continue; @@ -639,7 +729,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) fl->fl_pid = pid_vnr(cfl->fl_nspid); } else fl->fl_type = F_UNLCK; - unlock_flocks(); + spin_unlock(&inode->i_lock); return; } EXPORT_SYMBOL(posix_test_lock); @@ -676,13 +766,14 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) { struct file_lock *fl; - list_for_each_entry(fl, &blocked_list, fl_link) { + hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { if (posix_same_owner(fl, block_fl)) return fl->fl_next; } return NULL; } +/* Must be called with the blocked_lock_lock held! */ static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { @@ -718,7 +809,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) return -ENOMEM; } - lock_flocks(); + spin_lock(&inode->i_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; @@ -748,9 +839,9 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) { - unlock_flocks(); + spin_unlock(&inode->i_lock); cond_resched(); - lock_flocks(); + spin_lock(&inode->i_lock); } find_conflict: @@ -777,7 +868,7 @@ find_conflict: error = 0; out: - unlock_flocks(); + spin_unlock(&inode->i_lock); if (new_fl) locks_free_lock(new_fl); return error; @@ -791,7 +882,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str struct file_lock *left = NULL; struct file_lock *right = NULL; struct file_lock **before; - int error, added = 0; + int error; + bool added = false; /* * We may need two file_lock structures for this operation, @@ -806,7 +898,12 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str new_fl2 = locks_alloc_lock(); } - lock_flocks(); + spin_lock(&inode->i_lock); + /* + * New lock request. Walk all POSIX locks and look for conflicts. If + * there are any, either return error or put the request on the + * blocker's list of waiters and the global blocked_hash. + */ if (request->fl_type != F_UNLCK) { for_each_lock(inode, before) { fl = *before; @@ -819,11 +916,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str error = -EAGAIN; if (!(request->fl_flags & FL_SLEEP)) goto out; + /* + * Deadlock detection and insertion into the blocked + * locks list must be done while holding the same lock! + */ error = -EDEADLK; - if (posix_locks_deadlock(request, fl)) - goto out; - error = FILE_LOCK_DEFERRED; - locks_insert_block(fl, request); + spin_lock(&blocked_lock_lock); + if (likely(!posix_locks_deadlock(request, fl))) { + error = FILE_LOCK_DEFERRED; + __locks_insert_block(fl, request); + } + spin_unlock(&blocked_lock_lock); goto out; } } @@ -845,7 +948,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str before = &fl->fl_next; } - /* Process locks with this owner. */ + /* Process locks with this owner. */ while ((fl = *before) && posix_same_owner(request, fl)) { /* Detect adjacent or overlapping regions (if same lock type) */ @@ -880,7 +983,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str continue; } request = fl; - added = 1; + added = true; } else { /* Processing for different lock types is a bit @@ -891,7 +994,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (fl->fl_start > request->fl_end) break; if (request->fl_type == F_UNLCK) - added = 1; + added = true; if (fl->fl_start < request->fl_start) left = fl; /* If the next lock in the list has a higher end @@ -921,7 +1024,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_release_private(fl); locks_copy_private(fl, request); request = fl; - added = 1; + added = true; } } /* Go on to next lock. @@ -931,10 +1034,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str } /* - * The above code only modifies existing locks in case of - * merging or replacing. If new lock(s) need to be inserted - * all modifications are done bellow this, so it's safe yet to - * bail out. + * The above code only modifies existing locks in case of merging or + * replacing. If new lock(s) need to be inserted all modifications are + * done below this, so it's safe yet to bail out. */ error = -ENOLCK; /* "no luck" */ if (right && left == right && !new_fl2) @@ -974,7 +1076,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_wake_up_blocks(left); } out: - unlock_flocks(); + spin_unlock(&inode->i_lock); /* * Free any unused locks. */ @@ -1049,14 +1151,14 @@ int locks_mandatory_locked(struct inode *inode) /* * Search the lock list for this inode for any POSIX locks. */ - lock_flocks(); + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!IS_POSIX(fl)) continue; if (fl->fl_owner != owner) break; } - unlock_flocks(); + spin_unlock(&inode->i_lock); return fl ? -EAGAIN : 0; } @@ -1199,7 +1301,7 @@ int __break_lease(struct inode *inode, unsigned int mode) if (IS_ERR(new_fl)) return PTR_ERR(new_fl); - lock_flocks(); + spin_lock(&inode->i_lock); time_out_leases(inode); @@ -1249,11 +1351,11 @@ restart: break_time++; } locks_insert_block(flock, new_fl); - unlock_flocks(); + spin_unlock(&inode->i_lock); error = wait_event_interruptible_timeout(new_fl->fl_wait, !new_fl->fl_next, break_time); - lock_flocks(); - __locks_delete_block(new_fl); + spin_lock(&inode->i_lock); + locks_delete_block(new_fl); if (error >= 0) { if (error == 0) time_out_leases(inode); @@ -1270,7 +1372,7 @@ restart: } out: - unlock_flocks(); + spin_unlock(&inode->i_lock); locks_free_lock(new_fl); return error; } @@ -1323,9 +1425,10 @@ EXPORT_SYMBOL(lease_get_mtime); int fcntl_getlease(struct file *filp) { struct file_lock *fl; + struct inode *inode = file_inode(filp); int type = F_UNLCK; - lock_flocks(); + spin_lock(&inode->i_lock); time_out_leases(file_inode(filp)); for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); fl = fl->fl_next) { @@ -1334,11 +1437,11 @@ int fcntl_getlease(struct file *filp) break; } } - unlock_flocks(); + spin_unlock(&inode->i_lock); return type; } -int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) +static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; @@ -1403,7 +1506,7 @@ out: return error; } -int generic_delete_lease(struct file *filp, struct file_lock **flp) +static int generic_delete_lease(struct file *filp, struct file_lock **flp) { struct file_lock *fl, **before; struct dentry *dentry = filp->f_path.dentry; @@ -1428,7 +1531,7 @@ int generic_delete_lease(struct file *filp, struct file_lock **flp) * The (input) flp->fl_lmops->lm_break function is required * by break_lease(). * - * Called with file_lock_lock held. + * Called with inode->i_lock held. */ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) { @@ -1497,11 +1600,12 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) { + struct inode *inode = file_inode(filp); int error; - lock_flocks(); + spin_lock(&inode->i_lock); error = __vfs_setlease(filp, arg, lease); - unlock_flocks(); + spin_unlock(&inode->i_lock); return error; } @@ -1519,6 +1623,7 @@ static int do_fcntl_delete_lease(struct file *filp) static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) { struct file_lock *fl, *ret; + struct inode *inode = file_inode(filp); struct fasync_struct *new; int error; @@ -1532,10 +1637,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) return -ENOMEM; } ret = fl; - lock_flocks(); + spin_lock(&inode->i_lock); error = __vfs_setlease(filp, arg, &ret); if (error) { - unlock_flocks(); + spin_unlock(&inode->i_lock); locks_free_lock(fl); goto out_free_fasync; } @@ -1552,7 +1657,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) new = NULL; error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); - unlock_flocks(); + spin_unlock(&inode->i_lock); out_free_fasync: if (new) @@ -2076,7 +2181,7 @@ void locks_remove_flock(struct file *filp) fl.fl_ops->fl_release_private(&fl); } - lock_flocks(); + spin_lock(&inode->i_lock); before = &inode->i_flock; while ((fl = *before) != NULL) { @@ -2094,30 +2199,28 @@ void locks_remove_flock(struct file *filp) } before = &fl->fl_next; } - unlock_flocks(); + spin_unlock(&inode->i_lock); } /** * posix_unblock_lock - stop waiting for a file lock - * @filp: how the file was opened * @waiter: the lock which was waiting * * lockd needs to block waiting for locks. */ int -posix_unblock_lock(struct file *filp, struct file_lock *waiter) +posix_unblock_lock(struct file_lock *waiter) { int status = 0; - lock_flocks(); + spin_lock(&blocked_lock_lock); if (waiter->fl_next) __locks_delete_block(waiter); else status = -ENOENT; - unlock_flocks(); + spin_unlock(&blocked_lock_lock); return status; } - EXPORT_SYMBOL(posix_unblock_lock); /** @@ -2215,7 +2318,7 @@ static int locks_show(struct seq_file *f, void *v) { struct file_lock *fl, *bfl; - fl = list_entry(v, struct file_lock, fl_link); + fl = hlist_entry(v, struct file_lock, fl_link); lock_get_status(f, fl, *((loff_t *)f->private), ""); @@ -2229,21 +2332,23 @@ static void *locks_start(struct seq_file *f, loff_t *pos) { loff_t *p = f->private; - lock_flocks(); + spin_lock(&file_lock_lock); + spin_lock(&blocked_lock_lock); *p = (*pos + 1); - return seq_list_start(&file_lock_list, *pos); + return seq_hlist_start(&file_lock_list, *pos); } static void *locks_next(struct seq_file *f, void *v, loff_t *pos) { loff_t *p = f->private; ++*p; - return seq_list_next(v, &file_lock_list, pos); + return seq_hlist_next(v, &file_lock_list, pos); } static void locks_stop(struct seq_file *f, void *v) { - unlock_flocks(); + spin_unlock(&blocked_lock_lock); + spin_unlock(&file_lock_lock); } static const struct seq_operations locks_seq_operations = { @@ -2290,7 +2395,8 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) { struct file_lock *fl; int result = 1; - lock_flocks(); + + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (IS_POSIX(fl)) { if (fl->fl_type == F_RDLCK) @@ -2307,7 +2413,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) result = 0; break; } - unlock_flocks(); + spin_unlock(&inode->i_lock); return result; } @@ -2330,7 +2436,8 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) { struct file_lock *fl; int result = 1; - lock_flocks(); + + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (IS_POSIX(fl)) { if ((fl->fl_end < start) || (fl->fl_start > (start + len))) @@ -2345,7 +2452,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) result = 0; break; } - unlock_flocks(); + spin_unlock(&inode->i_lock); return result; } diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 08c442902fcd..dfaf6fa9b7b5 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -93,7 +93,7 @@ static int minix_readdir(struct file *file, struct dir_context *ctx) unsigned offset; unsigned long n; - ctx->pos = pos = (pos + chunk_size-1) & ~(chunk_size-1); + ctx->pos = pos = ALIGN(pos, chunk_size); if (pos >= inode->i_size) return 0; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 0db73d9dd668..cd950e2331b6 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -54,6 +54,18 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, return error; } +static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int error; + struct inode *inode = minix_new_inode(dir, mode, &error); + if (inode) { + minix_set_inode(inode, 0); + mark_inode_dirty(inode); + d_tmpfile(dentry, inode); + } + return error; +} + static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -254,4 +266,5 @@ const struct inode_operations minix_dir_inode_operations = { .mknod = minix_mknod, .rename = minix_rename, .getattr = minix_getattr, + .tmpfile = minix_tmpfile, }; diff --git a/fs/namei.c b/fs/namei.c index 9ed9361223c0..b2beee7a733f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1352,7 +1352,7 @@ static int lookup_fast(struct nameidata *nd, */ if (nd->flags & LOOKUP_RCU) { unsigned seq; - dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode); + dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (!dentry) goto unlazy; @@ -1787,8 +1787,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) struct dentry *parent = nd->path.dentry; nd->flags &= ~LOOKUP_JUMPED; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { - err = parent->d_op->d_hash(parent, nd->inode, - &this); + err = parent->d_op->d_hash(parent, &this); if (err < 0) break; } @@ -2121,7 +2120,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) * to use its own hash.. */ if (base->d_flags & DCACHE_OP_HASH) { - int err = base->d_op->d_hash(base, base->d_inode, &this); + int err = base->d_op->d_hash(base, &this); if (err < 0) return ERR_PTR(err); } @@ -2690,28 +2689,10 @@ static int do_last(struct nameidata *nd, struct path *path, nd->flags &= ~LOOKUP_PARENT; nd->flags |= op->intent; - switch (nd->last_type) { - case LAST_DOTDOT: - case LAST_DOT: + if (nd->last_type != LAST_NORM) { error = handle_dots(nd, nd->last_type); if (error) return error; - /* fallthrough */ - case LAST_ROOT: - error = complete_walk(nd); - if (error) - return error; - audit_inode(name, nd->path.dentry, 0); - if (open_flag & O_CREAT) { - error = -EISDIR; - goto out; - } - goto finish_open; - case LAST_BIND: - error = complete_walk(nd); - if (error) - return error; - audit_inode(name, dir, 0); goto finish_open; } @@ -2841,19 +2822,19 @@ finish_lookup: } nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ +finish_open: error = complete_walk(nd); if (error) { path_put(&save_parent); return error; } + audit_inode(name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto out; error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) goto out; - audit_inode(name, nd->path.dentry, 0); -finish_open: if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; @@ -2920,6 +2901,67 @@ stale_open: goto retry_lookup; } +static int do_tmpfile(int dfd, struct filename *pathname, + struct nameidata *nd, int flags, + const struct open_flags *op, + struct file *file, int *opened) +{ + static const struct qstr name = QSTR_INIT("/", 1); + struct dentry *dentry, *child; + struct inode *dir; + int error = path_lookupat(dfd, pathname->name, + flags | LOOKUP_DIRECTORY, nd); + if (unlikely(error)) + return error; + error = mnt_want_write(nd->path.mnt); + if (unlikely(error)) + goto out; + /* we want directory to be writable */ + error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC); + if (error) + goto out2; + dentry = nd->path.dentry; + dir = dentry->d_inode; + if (!dir->i_op->tmpfile) { + error = -EOPNOTSUPP; + goto out2; + } + child = d_alloc(dentry, &name); + if (unlikely(!child)) { + error = -ENOMEM; + goto out2; + } + nd->flags &= ~LOOKUP_DIRECTORY; + nd->flags |= op->intent; + dput(nd->path.dentry); + nd->path.dentry = child; + error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode); + if (error) + goto out2; + audit_inode(pathname, nd->path.dentry, 0); + error = may_open(&nd->path, op->acc_mode, op->open_flag); + if (error) + goto out2; + file->f_path.mnt = nd->path.mnt; + error = finish_open(file, nd->path.dentry, NULL, opened); + if (error) + goto out2; + error = open_check_o_direct(file); + if (error) { + fput(file); + } else if (!(op->open_flag & O_EXCL)) { + struct inode *inode = file_inode(file); + spin_lock(&inode->i_lock); + inode->i_state |= I_LINKABLE; + spin_unlock(&inode->i_lock); + } +out2: + mnt_drop_write(nd->path.mnt); +out: + path_put(&nd->path); + return error; +} + static struct file *path_openat(int dfd, struct filename *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { @@ -2935,6 +2977,11 @@ static struct file *path_openat(int dfd, struct filename *pathname, file->f_flags = op->open_flag; + if (unlikely(file->f_flags & O_TMPFILE)) { + error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); + goto out; + } + error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) goto out; @@ -2987,9 +3034,10 @@ out: } struct file *do_filp_open(int dfd, struct filename *pathname, - const struct open_flags *op, int flags) + const struct open_flags *op) { struct nameidata nd; + int flags = op->lookup_flags; struct file *filp; filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); @@ -3001,17 +3049,16 @@ struct file *do_filp_open(int dfd, struct filename *pathname, } struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, - const char *name, const struct open_flags *op, int flags) + const char *name, const struct open_flags *op) { struct nameidata nd; struct file *file; struct filename filename = { .name = name }; + int flags = op->lookup_flags | LOOKUP_ROOT; nd.root.mnt = mnt; nd.root.dentry = dentry; - flags |= LOOKUP_ROOT; - if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); @@ -3586,12 +3633,18 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de mutex_lock(&inode->i_mutex); /* Make sure we don't allow creating hardlink to an unlinked file */ - if (inode->i_nlink == 0) + if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; else error = dir->i_op->link(old_dentry, dir, new_dentry); + + if (!error && (inode->i_state & I_LINKABLE)) { + spin_lock(&inode->i_lock); + inode->i_state &= ~I_LINKABLE; + spin_unlock(&inode->i_lock); + } mutex_unlock(&inode->i_mutex); if (!error) fsnotify_link(dir, inode, new_dentry); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0e7f00298213..3be047474bfc 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -73,10 +73,8 @@ const struct inode_operations ncp_dir_inode_operations = * Dentry operations routines */ static int ncp_lookup_validate(struct dentry *, unsigned int); -static int ncp_hash_dentry(const struct dentry *, const struct inode *, - struct qstr *); -static int ncp_compare_dentry(const struct dentry *, const struct inode *, - const struct dentry *, const struct inode *, +static int ncp_hash_dentry(const struct dentry *, struct qstr *); +static int ncp_compare_dentry(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); static int ncp_delete_dentry(const struct dentry *); @@ -119,11 +117,19 @@ static inline int ncp_case_sensitive(const struct inode *i) /* * Note: leave the hash unchanged if the directory * is case-sensitive. + * + * Accessing the parent inode can be racy under RCU pathwalking. + * Use ACCESS_ONCE() to make sure we use _one_ particular inode, + * the callers will handle races. */ static int -ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, - struct qstr *this) +ncp_hash_dentry(const struct dentry *dentry, struct qstr *this) { + struct inode *inode = ACCESS_ONCE(dentry->d_inode); + + if (!inode) + return 0; + if (!ncp_case_sensitive(inode)) { struct super_block *sb = dentry->d_sb; struct nls_table *t; @@ -140,14 +146,24 @@ ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, return 0; } +/* + * Accessing the parent inode can be racy under RCU pathwalking. + * Use ACCESS_ONCE() to make sure we use _one_ particular inode, + * the callers will handle races. + */ static int -ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { + struct inode *pinode; + if (len != name->len) return 1; + pinode = ACCESS_ONCE(parent->d_inode); + if (!pinode) + return 1; + if (ncp_case_sensitive(pinode)) return strncmp(str, name->name, len); @@ -660,8 +676,6 @@ end_advance: ctl.valid = 0; if (!ctl.filled && (ctl.fpos == ctx->pos)) { if (!ino) - ino = find_inode_number(dentry, &qname); - if (!ino) ino = iunique(dir->i_sb, 2); ctl.filled = !dir_emit(ctx, qname.name, qname.len, ino, DT_UNKNOWN); @@ -1123,17 +1137,6 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { - /* - * fail with EBUSY if there are still references to this - * directory. - */ - dentry_unhash(new_dentry); - error = -EBUSY; - if (!d_unhashed(new_dentry)) - goto out; - } - ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 26910c8154da..0765ad12c382 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -891,6 +891,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) if (!server) /* How this could happen? */ goto out; + result = -EPERM; + if (IS_DEADDIR(dentry->d_inode)) + goto out; + /* ageing the dentry to force validation */ ncp_age_dentry(server, dentry); diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index ee24df5af1f9..3c5dd55d284c 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -117,7 +117,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; /* we do not support files bigger than 4GB... We eventually supports just 4GB... */ - if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff + if (vma_pages(vma) + vma->vm_pgoff > (1U << (32 - PAGE_SHIFT))) return -EFBIG; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cff089a412c7..da6a43d19aa3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -211,7 +211,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_rqst *rqstp; int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - char svc_name[12]; int ret; nfs_callback_bc_serv(minorversion, xprt, serv); @@ -235,10 +234,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, svc_sock_update_bufs(serv); - sprintf(svc_name, "nfsv4.%u-svc", minorversion); cb_info->serv = serv; cb_info->rqst = rqstp; - cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + cb_info->task = kthread_run(callback_svc, cb_info->rqst, + "nfsv4.%u-svc", minorversion); if (IS_ERR(cb_info->task)) { ret = PTR_ERR(cb_info->task); svc_exit_thread(cb_info->rqst); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 57db3244f4d9..7ec4814e298d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ if (inode->i_flock == NULL) goto out; - /* Protect inode->i_flock using the file locks lock */ - lock_flocks(); + /* Protect inode->i_flock using the i_lock */ + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file) != ctx) continue; - unlock_flocks(); + spin_unlock(&inode->i_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; - lock_flocks(); + spin_lock(&inode->i_lock); } - unlock_flocks(); + spin_unlock(&inode->i_lock); out: return status; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5d051419527b..d7ed697133f0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,6 +33,7 @@ #include <linux/pagevec.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/swap.h> #include <linux/sched.h> #include <linux/kmemleak.h> #include <linux/xattr.h> @@ -1758,7 +1759,6 @@ EXPORT_SYMBOL_GPL(nfs_unlink); */ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct pagevec lru_pvec; struct page *page; char *kaddr; struct iattr attr; @@ -1798,11 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. */ - pagevec_init(&lru_pvec, 0); - if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, + if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0, GFP_KERNEL)) { - pagevec_add(&lru_pvec, page); - pagevec_lru_add_file(&lru_pvec); SetPageUptodate(page); unlock_page(page); } else diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b4a79f4ad1d..94e94bd11aae 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -495,6 +495,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp) return nfs_fscache_release_page(page, gfp); } +static void nfs_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + struct nfs_inode *nfsi; + struct address_space *mapping = page_file_mapping(page); + + if (!mapping || PageSwapCache(page)) + return; + + /* + * Check if an unstable page is currently being committed and + * if so, have the VM treat it as if the page is under writeback + * so it will not block due to pages that will shortly be freeable. + */ + nfsi = NFS_I(mapping->host); + if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + *writeback = true; + return; + } + + /* + * If PagePrivate() is set, then the page is not freeable and as the + * inode is not being committed, it's not going to be cleaned in the + * near future so treat it as dirty + */ + if (PagePrivate(page)) + *dirty = true; +} + /* * Attempt to clear the private state associated with a page when an error * occurs that requires the cached contents of an inode to be written back or @@ -542,6 +571,7 @@ const struct address_space_operations nfs_file_aops = { .direct_IO = nfs_direct_IO, .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, + .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, #ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c1c7a9d78722..ce727047ee87 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 43ea96ced28c..ce90eb4775c2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX) break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7ba5616989c..28241a42f363 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -268,7 +268,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - freezable_schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable_unsafe(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -4528,7 +4528,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - freezable_schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable_unsafe(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1fab140764c4..55418811a55a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1194,7 +1194,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) snprintf(buf, sizeof(buf), "%s-manager", rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); rcu_read_unlock(); - task = kthread_run(nfs4_run_state_manager, clp, buf); + task = kthread_run(nfs4_run_state_manager, clp, "%s", buf); if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); @@ -1373,13 +1373,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); /* Protect inode->i_flock using the BKL */ - lock_flocks(); + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file)->state != state) continue; - unlock_flocks(); + spin_unlock(&inode->i_lock); status = ops->recover_lock(state, fl); switch (status) { case 0: @@ -1406,9 +1406,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* kill_proc(fl->fl_pid, SIGLOST, 1); */ status = 0; } - lock_flocks(); + spin_lock(&inode->i_lock); } - unlock_flocks(); + spin_unlock(&inode->i_lock); out: up_write(&nfsi->rwsem); return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 316ec843dec2..f17051838b41 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2645,13 +2645,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - /* only place dl_time is set. protected by lock_flocks*/ + /* Only place dl_time is set; protected by i_lock: */ dp->dl_time = get_seconds(); nfsd4_cb_recall(dp); } -/* Called from break_lease() with lock_flocks() held. */ +/* Called from break_lease() with i_lock held. */ static void nfsd_break_deleg_cb(struct file_lock *fl) { struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; @@ -4520,7 +4520,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) struct inode *inode = filp->fi_inode; int status = 0; - lock_flocks(); + spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { status = 1; @@ -4528,7 +4528,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) } } out: - unlock_flocks(); + spin_unlock(&inode->i_lock); return status; } diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index eed4d7b26249..741fd02e0444 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, } /** + * nilfs_palloc_count_desc_blocks - count descriptor blocks number + * @inode: inode of metadata file using this allocator + * @desc_blocks: descriptor blocks number [out] + */ +static int nilfs_palloc_count_desc_blocks(struct inode *inode, + unsigned long *desc_blocks) +{ + unsigned long blknum; + int ret; + + ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); + if (likely(!ret)) + *desc_blocks = DIV_ROUND_UP( + blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); + return ret; +} + +/** + * nilfs_palloc_mdt_file_can_grow - check potential opportunity for + * MDT file growing + * @inode: inode of metadata file using this allocator + * @desc_blocks: known current descriptor blocks count + */ +static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, + unsigned long desc_blocks) +{ + return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < + nilfs_palloc_groups_count(inode); +} + +/** + * nilfs_palloc_count_max_entries - count max number of entries that can be + * described by descriptor blocks count + * @inode: inode of metadata file using this allocator + * @nused: current number of used entries + * @nmaxp: max number of entries [out] + */ +int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) +{ + unsigned long desc_blocks = 0; + u64 entries_per_desc_block, nmax; + int err; + + err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); + if (unlikely(err)) + return err; + + entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * + nilfs_palloc_groups_per_desc_block(inode); + nmax = entries_per_desc_block * desc_blocks; + + if (nused == nmax && + nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) + nmax += entries_per_desc_block; + + if (nused > nmax) + return -ERANGE; + + *nmaxp = nmax; + return 0; +} + +/** * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index fb7238100548..4bd6451b5703 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, void *nilfs_palloc_block_get_entry(const struct inode *, __u64, const struct buffer_head *, void *); +int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); + /** * nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index d8e65bde083c..6548c7851b48 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -160,6 +160,28 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } /** + * nilfs_ifile_count_free_inodes - calculate free inodes count + * @ifile: ifile inode + * @nmaxinodes: current maximum of available inodes count [out] + * @nfreeinodes: free inodes count [out] + */ +int nilfs_ifile_count_free_inodes(struct inode *ifile, + u64 *nmaxinodes, u64 *nfreeinodes) +{ + u64 nused; + int err; + + *nmaxinodes = 0; + *nfreeinodes = 0; + + nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); + err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); + if (likely(!err)) + *nfreeinodes = *nmaxinodes - nused; + return err; +} + +/** * nilfs_ifile_read - read or get ifile inode * @sb: super block instance * @root: root object diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 59b6f2b51df6..679674d13372 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); +int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); + int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, size_t inode_size, struct nilfs_inode *raw_inode, struct inode **inodep); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index bccfec8343c5..b1a5277cfd18 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -54,7 +54,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) inode_add_bytes(inode, (1 << inode->i_blkbits) * n); if (root) - atomic_add(n, &root->blocks_count); + atomic64_add(n, &root->blocks_count); } void nilfs_inode_sub_blocks(struct inode *inode, int n) @@ -63,7 +63,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); if (root) - atomic_sub(n, &root->blocks_count); + atomic64_sub(n, &root->blocks_count); } /** @@ -369,7 +369,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - atomic_inc(&root->inodes_count); + atomic64_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -801,7 +801,7 @@ void nilfs_evict_inode(struct inode *inode) ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); if (!ret) - atomic_dec(&ii->i_root->inodes_count); + atomic64_dec(&ii->i_root->inodes_count); nilfs_clear_inode(inode); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a5752a589932..bd88a7461063 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -835,9 +835,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_snapshot_list.ssl_next = 0; raw_cp->cp_snapshot_list.ssl_prev = 0; raw_cp->cp_inodes_count = - cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); raw_cp->cp_blocks_count = - cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); + cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f18b09..1427de5ebf4d 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -554,8 +554,10 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, if (err) goto failed_bh; - atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); - atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + atomic64_set(&root->inodes_count, + le64_to_cpu(raw_cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, + le64_to_cpu(raw_cp->cp_blocks_count)); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); @@ -609,6 +611,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; + u64 nmaxinodes, nfreeinodes; int err; /* @@ -633,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (unlikely(err)) return err; + err = nilfs_ifile_count_free_inodes(root->ifile, + &nmaxinodes, &nfreeinodes); + if (unlikely(err)) { + printk(KERN_WARNING + "NILFS warning: fail to count free inodes: err %d.\n", + err); + if (err == -ERANGE) { + /* + * If nilfs_palloc_count_max_entries() returns + * -ERANGE error code then we simply treat + * curent inodes count as maximum possible and + * zero as free inodes value. + */ + nmaxinodes = atomic64_read(&root->inodes_count); + nfreeinodes = 0; + err = 0; + } else + return err; + } + buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; - buf->f_files = atomic_read(&root->inodes_count); - buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ + buf->f_files = nmaxinodes; + buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 41e6a04a561f..94c451ce6d24 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -764,8 +764,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) new->ifile = NULL; new->nilfs = nilfs; atomic_set(&new->count, 1); - atomic_set(&new->inodes_count, 0); - atomic_set(&new->blocks_count, 0); + atomic64_set(&new->inodes_count, 0); + atomic64_set(&new->blocks_count, 0); rb_link_node(&new->rb_node, parent, p); rb_insert_color(&new->rb_node, &nilfs->ns_cptree); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index be1267a34cea..de8cc53b4a5c 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -241,8 +241,8 @@ struct nilfs_root { struct the_nilfs *nilfs; struct inode *ifile; - atomic_t inodes_count; - atomic_t blocks_count; + atomic64_t inodes_count; + atomic64_t blocks_count; }; /* Special checkpoint number */ diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6c80083a984f..1ea52f7c031f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -399,9 +399,6 @@ static int fanotify_release(struct inode *ignored, struct file *file) wake_up(&group->fanotify_data.access_waitq); #endif - if (file->f_flags & FASYNC) - fsnotify_fasync(-1, file, 0); - /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_destroy_group(group); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8a9d87231b1..17e6bdde96c5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5655,7 +5655,7 @@ int ocfs2_remove_btree_range(struct inode *inode, &ref_tree, NULL); if (ret) { mlog_errno(ret); - goto out; + goto bail; } ret = ocfs2_prepare_refcount_change_for_del(inode, @@ -5666,7 +5666,7 @@ int ocfs2_remove_btree_range(struct inode *inode, &extra_blocks); if (ret < 0) { mlog_errno(ret); - goto out; + goto bail; } } @@ -5674,7 +5674,7 @@ int ocfs2_remove_btree_range(struct inode *inode, extra_blocks); if (ret) { mlog_errno(ret); - return ret; + goto bail; } mutex_lock(&tl_inode->i_mutex); @@ -5734,7 +5734,7 @@ out_commit: ocfs2_commit_trans(osb, handle); out: mutex_unlock(&tl_inode->i_mutex); - +bail: if (meta_ac) ocfs2_free_alloc_context(meta_ac); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 42252bf64b51..5c1c864e81cc 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -176,7 +176,7 @@ static void o2hb_dead_threshold_set(unsigned int threshold) } } -static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) +static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode) { int ret = -1; @@ -500,7 +500,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, } atomic_inc(&write_wc->wc_num_reqs); - submit_bio(WRITE, bio); + submit_bio(WRITE_SYNC, bio); status = 0; bail: @@ -2271,7 +2271,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) continue; - ret = o2hb_global_hearbeat_mode_set(i); + ret = o2hb_global_heartbeat_mode_set(i); if (!ret) printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", o2hb_heartbeat_mode_desc[i]); @@ -2304,7 +2304,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { NULL, }; -static struct configfs_item_operations o2hb_hearbeat_group_item_ops = { +static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { .show_attribute = o2hb_heartbeat_group_show, .store_attribute = o2hb_heartbeat_group_store, }; @@ -2316,7 +2316,7 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { static struct config_item_type o2hb_heartbeat_group_type = { .ct_group_ops = &o2hb_heartbeat_group_group_ops, - .ct_item_ops = &o2hb_hearbeat_group_item_ops, + .ct_item_ops = &o2hb_heartbeat_group_item_ops, .ct_attrs = o2hb_heartbeat_group_attrs, .ct_owner = THIS_MODULE, }; @@ -2389,6 +2389,9 @@ static int o2hb_region_pin(const char *region_uuid) assert_spin_locked(&o2hb_live_lock); list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + if (reg->hr_item_dropped) + continue; + uuid = config_item_name(®->hr_item); /* local heartbeat */ @@ -2439,6 +2442,9 @@ static void o2hb_region_unpin(const char *region_uuid) assert_spin_locked(&o2hb_live_lock); list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + if (reg->hr_item_dropped) + continue; + uuid = config_item_name(®->hr_item); if (region_uuid) { if (strcmp(region_uuid, uuid)) @@ -2654,6 +2660,9 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions) p = region_uuids; list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + if (reg->hr_item_dropped) + continue; + mlog(0, "Region: %s\n", config_item_name(®->hr_item)); if (numregs < max_regions) { memcpy(p, config_item_name(®->hr_item), diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index c19897d0fe14..1ec141e758d7 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -264,7 +264,7 @@ void o2quo_hb_still_up(u8 node) /* This is analogous to hb_up. as a node's connection comes up we delay the * quorum decision until we see it heartbeating. the hold will be droped in * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if - * it's already heartbeating we we might be dropping a hold that conn_up got. + * it's already heartbeating we might be dropping a hold that conn_up got. * */ void o2quo_conn_up(u8 node) { diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa88bd8bcedc..d644dc611425 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -406,6 +406,9 @@ static void sc_kref_release(struct kref *kref) sc->sc_node = NULL; o2net_debug_del_sc(sc); + + if (sc->sc_page) + __free_page(sc->sc_page); kfree(sc); } @@ -630,19 +633,19 @@ static void o2net_state_change(struct sock *sk) state_change = sc->sc_state_change; switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - o2net_sc_queue_work(sc, &sc->sc_connect_work); - break; - default: - printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT - " shutdown, state %d\n", - SC_NODEF_ARGS(sc), sk->sk_state); - o2net_sc_queue_work(sc, &sc->sc_shutdown_work); - break; + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + o2net_sc_queue_work(sc, &sc->sc_connect_work); + break; + default: + printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT + " shutdown, state %d\n", + SC_NODEF_ARGS(sc), sk->sk_state); + o2net_sc_queue_work(sc, &sc->sc_shutdown_work); + break; } out: read_unlock(&sk->sk_callback_lock); diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 975810b98492..47e67c2d228f 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -178,6 +178,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, lock->ml.node); } } else { + status = DLM_NORMAL; dlm_lock_get(lock); list_add_tail(&lock->list, &res->blocked); kick_thread = 1; diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e68588e6b1e8..773bd32bfd8c 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -55,9 +55,6 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); static int dlm_recovery_thread(void *data); -void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); -int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); -void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); static int dlm_do_recovery(struct dlm_ctxt *dlm); static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); @@ -789,7 +786,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, u8 dead_node) { struct dlm_lock_request lr; - enum dlm_status ret; + int ret; mlog(0, "\n"); @@ -802,7 +799,6 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, lr.dead_node = dead_node; // send message - ret = DLM_NOLOCKMGR; ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, &lr, sizeof(lr), request_from, NULL); @@ -2696,6 +2692,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, dlm->name, br->node_idx, br->dead_node, dlm->reco.dead_node, dlm->reco.new_master); spin_unlock(&dlm->spinlock); + dlm_put(dlm); return -EAGAIN; } spin_unlock(&dlm->spinlock); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8a38714f1d92..41000f223ca4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2646,17 +2646,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) goto out; } - if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) - ret = -EINVAL; - if (!ret && offset > inode->i_sb->s_maxbytes) - ret = -EINVAL; - if (ret) - goto out; - - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out: mutex_unlock(&inode->i_mutex); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index a3385b63ff5e..96f9ac237e86 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -200,7 +200,6 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) { - atomic_set(&osb->needs_checkpoint, 1); wake_up(&osb->checkpoint_event); } @@ -538,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + - ocfs2_quota_trans_credits(sb); + ocfs2_quota_trans_credits(sb) + bits_wanted; } static inline int ocfs2_calc_symlink_credits(struct super_block *sb) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b4a5cdf9dbc5..be3f8676a438 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -522,7 +522,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, fe->i_last_eb_blk = 0; strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); - le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); + fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); fe->i_atime = fe->i_ctime = fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec); fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = @@ -773,7 +773,7 @@ static int ocfs2_remote_dentry_delete(struct dentry *dentry) return ret; } -static inline int inode_is_unlinkable(struct inode *inode) +static inline int ocfs2_inode_is_unlinkable(struct inode *inode) { if (S_ISDIR(inode->i_mode)) { if (inode->i_nlink == 2) @@ -791,6 +791,7 @@ static int ocfs2_unlink(struct inode *dir, { int status; int child_locked = 0; + bool is_unlinkable = false; struct inode *inode = dentry->d_inode; struct inode *orphan_dir = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); @@ -865,7 +866,7 @@ static int ocfs2_unlink(struct inode *dir, goto leave; } - if (inode_is_unlinkable(inode)) { + if (ocfs2_inode_is_unlinkable(inode)) { status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, OCFS2_I(inode)->ip_blkno, orphan_name, &orphan_insert); @@ -873,6 +874,7 @@ static int ocfs2_unlink(struct inode *dir, mlog_errno(status); goto leave; } + is_unlinkable = true; } handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); @@ -892,15 +894,6 @@ static int ocfs2_unlink(struct inode *dir, fe = (struct ocfs2_dinode *) fe_bh->b_data; - if (inode_is_unlinkable(inode)) { - status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, - &orphan_insert, orphan_dir); - if (status < 0) { - mlog_errno(status); - goto leave; - } - } - /* delete the name from the parent dir */ status = ocfs2_delete_entry(handle, dir, &lookup); if (status < 0) { @@ -923,6 +916,14 @@ static int ocfs2_unlink(struct inode *dir, mlog_errno(status); if (S_ISDIR(inode->i_mode)) inc_nlink(dir); + goto leave; + } + + if (is_unlinkable) { + status = ocfs2_orphan_add(osb, handle, inode, fe_bh, + orphan_name, &orphan_insert, orphan_dir); + if (status < 0) + mlog_errno(status); } leave: @@ -2012,6 +2013,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } + /* + * We're going to journal the change of i_flags and i_orphaned_slot. + * It's safe anyway, though some callers may duplicate the journaling. + * Journaling within the func just make the logic look more + * straightforward. + */ + status = ocfs2_journal_access_di(handle, + INODE_CACHE(inode), + fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto leave; + } + /* we're a cluster, and nlink can change on disk from * underneath us... */ orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; @@ -2026,25 +2042,10 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, orphan_dir_bh, lookup); if (status < 0) { mlog_errno(status); - goto leave; - } - - /* - * We're going to journal the change of i_flags and i_orphaned_slot. - * It's safe anyway, though some callers may duplicate the journaling. - * Journaling within the func just make the logic look more - * straightforward. - */ - status = ocfs2_journal_access_di(handle, - INODE_CACHE(inode), - fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto leave; + goto rollback; } - le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); + fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; /* Record which orphan dir our inode now resides @@ -2057,11 +2058,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); +rollback: + if (status < 0) { + if (S_ISDIR(inode->i_mode)) + ocfs2_add_links_count(orphan_fe, -1); + set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); + } + leave: brelse(orphan_dir_bh); - if (status) - mlog_errno(status); return status; } @@ -2434,7 +2440,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, } di = (struct ocfs2_dinode *)di_bh->b_data; - le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); + di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL); di->i_orphaned_slot = 0; set_nlink(inode, 1); ocfs2_set_links_count(di, inode->i_nlink); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d355e6e36b36..3a903470c794 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -347,7 +347,6 @@ struct ocfs2_super struct task_struct *recovery_thread_task; int disable_recovery; wait_queue_head_t checkpoint_event; - atomic_t needs_checkpoint; struct ocfs2_journal *journal; unsigned long osb_commit_interval; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7e74b580c0f..5397c07ce608 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1422,7 +1422,7 @@ static int ocfs2_relink_block_group(handle_t *handle, int status; /* there is a really tiny chance the journal calls could fail, * but we wouldn't want inconsistent blocks in *any* case. */ - u64 fe_ptr, bg_ptr, prev_bg_ptr; + u64 bg_ptr, prev_bg_ptr; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; @@ -1437,51 +1437,44 @@ static int ocfs2_relink_block_group(handle_t *handle, (unsigned long long)le64_to_cpu(bg->bg_blkno), (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); - fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); bg_ptr = le64_to_cpu(bg->bg_next_group); prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), prev_bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + if (status < 0) + goto out; prev_bg->bg_next_group = bg->bg_next_group; ocfs2_journal_dirty(handle, prev_bg_bh); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + if (status < 0) + goto out_rollback_prev_bg; bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; ocfs2_journal_dirty(handle, bg_bh); status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto out_rollback; - } + if (status < 0) + goto out_rollback_bg; fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; ocfs2_journal_dirty(handle, fe_bh); -out_rollback: - if (status < 0) { - fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); - bg->bg_next_group = cpu_to_le64(bg_ptr); - prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); - } - - if (status) +out: + if (status < 0) mlog_errno(status); return status; + +out_rollback_bg: + bg->bg_next_group = cpu_to_le64(bg_ptr); +out_rollback_prev_bg: + prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); + goto out; } static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b85165552b..854d80955bf8 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -286,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) spin_unlock(&osb->osb_lock); out += snprintf(buf + out, len - out, - "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", + "%10s => Pid: %d Interval: %lu\n", "Commit", (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), - osb->osb_commit_interval, - atomic_read(&osb->needs_checkpoint)); + osb->osb_commit_interval); out += snprintf(buf + out, len - out, "%10s => State: %d TxnId: %lu NumTxns: %d\n", @@ -2154,7 +2153,6 @@ static int ocfs2_initialize_super(struct super_block *sb, } init_waitqueue_head(&osb->checkpoint_event); - atomic_set(&osb->needs_checkpoint, 0); osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..317ef0abccbb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2751,7 +2751,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, { int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); - struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; struct ocfs2_xa_loc loc; if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) @@ -2759,13 +2758,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, down_write(&oi->ip_alloc_sem); if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { - if (!ocfs2_xattr_has_space_inline(inode, di)) { - ret = -ENOSPC; - goto out; - } - } - - if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); if (ret) { if (ret != -ENOSPC) @@ -6499,6 +6491,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); diff --git a/fs/open.c b/fs/open.c index 8c741002f947..fca72c4d3f17 100644 --- a/fs/open.c +++ b/fs/open.c @@ -840,11 +840,15 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o if (flags & __O_SYNC) flags |= O_DSYNC; - /* - * If we have O_PATH in the open flag. Then we - * cannot have anything other than the below set of flags - */ - if (flags & O_PATH) { + if (flags & O_TMPFILE) { + if (!(flags & O_CREAT)) + return -EINVAL; + acc_mode = MAY_OPEN | ACC_MODE(flags); + } else if (flags & O_PATH) { + /* + * If we have O_PATH in the open flag. Then we + * cannot have anything other than the below set of flags + */ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; acc_mode = 0; } else { @@ -876,7 +880,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o lookup_flags |= LOOKUP_DIRECTORY; if (!(flags & O_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; - return lookup_flags; + op->lookup_flags = lookup_flags; + return 0; } /** @@ -893,8 +898,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o struct file *file_open_name(struct filename *name, int flags, umode_t mode) { struct open_flags op; - int lookup = build_open_flags(flags, mode, &op); - return do_filp_open(AT_FDCWD, name, &op, lookup); + int err = build_open_flags(flags, mode, &op); + return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); } /** @@ -919,37 +924,43 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, const char *filename, int flags) { struct open_flags op; - int lookup = build_open_flags(flags, 0, &op); + int err = build_open_flags(flags, 0, &op); + if (err) + return ERR_PTR(err); if (flags & O_CREAT) return ERR_PTR(-EINVAL); if (!filename && (flags & O_DIRECTORY)) if (!dentry->d_inode->i_op->lookup) return ERR_PTR(-ENOTDIR); - return do_file_open_root(dentry, mnt, filename, &op, lookup); + return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; - int lookup = build_open_flags(flags, mode, &op); - struct filename *tmp = getname(filename); - int fd = PTR_ERR(tmp); - - if (!IS_ERR(tmp)) { - fd = get_unused_fd_flags(flags); - if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, &op, lookup); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = PTR_ERR(f); - } else { - fsnotify_open(f); - fd_install(fd, f); - } + int fd = build_open_flags(flags, mode, &op); + struct filename *tmp; + + if (fd) + return fd; + + tmp = getname(filename); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + fd = get_unused_fd_flags(flags); + if (fd >= 0) { + struct file *f = do_filp_open(dfd, tmp, &op); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = PTR_ERR(f); + } else { + fsnotify_open(f); + fd_install(fd, f); } - putname(tmp); } + putname(tmp); return fd; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 0016350ad95e..1485e38daaa3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1686,41 +1686,29 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = file->f_path.dentry; + struct qstr qname = QSTR_INIT(name, len); struct inode *inode; - struct qstr qname; - ino_t ino = 0; - unsigned type = DT_UNKNOWN; + unsigned type; + ino_t ino; - qname.name = name; - qname.len = len; - qname.hash = full_name_hash(name, len); - - child = d_lookup(dir, &qname); + child = d_hash_and_lookup(dir, &qname); if (!child) { - struct dentry *new; - new = d_alloc(dir, &qname); - if (new) { - child = instantiate(dir->d_inode, new, task, ptr); - if (child) - dput(new); - else - child = new; + child = d_alloc(dir, &qname); + if (!child) + goto end_instantiate; + if (instantiate(dir->d_inode, child, task, ptr) < 0) { + dput(child); + goto end_instantiate; } } - if (!child || IS_ERR(child) || !child->d_inode) - goto end_instantiate; inode = child->d_inode; - if (inode) { - ino = inode->i_ino; - type = inode->i_mode >> 12; - } + ino = inode->i_ino; + type = inode->i_mode >> 12; dput(child); -end_instantiate: - if (!ino) - ino = find_inode_number(dir, &qname); - if (!ino) - ino = 1; return dir_emit(ctx, name, len, ino, type); + +end_instantiate: + return dir_emit(ctx, name, len, 1, DT_UNKNOWN); } #ifdef CONFIG_CHECKPOINT_RESTORE @@ -1846,7 +1834,7 @@ struct map_files_info { unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; -static struct dentry * +static int proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -1856,7 +1844,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) - return ERR_PTR(-ENOENT); + return -ENOENT; ei = PROC_I(inode); ei->op.proc_get_link = proc_map_files_get_link; @@ -1873,7 +1861,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, d_set_d_op(dentry, &tid_map_files_dentry_operations); d_add(dentry, inode); - return NULL; + return 0; } static struct dentry *proc_map_files_lookup(struct inode *dir, @@ -1882,23 +1870,23 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; - struct dentry *result; + int result; struct mm_struct *mm; - result = ERR_PTR(-EPERM); + result = -EPERM; if (!capable(CAP_SYS_ADMIN)) goto out; - result = ERR_PTR(-ENOENT); + result = -ENOENT; task = get_proc_task(dir); if (!task) goto out; - result = ERR_PTR(-EACCES); + result = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - result = ERR_PTR(-ENOENT); + result = -ENOENT; if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; @@ -1921,7 +1909,7 @@ out_no_vma: out_put_task: put_task_struct(task); out: - return result; + return ERR_PTR(result); } static const struct inode_operations proc_map_files_inode_operations = { @@ -2135,13 +2123,12 @@ static const struct file_operations proc_timers_operations = { }; #endif /* CONFIG_CHECKPOINT_RESTORE */ -static struct dentry *proc_pident_instantiate(struct inode *dir, +static int proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) @@ -2160,9 +2147,9 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static struct dentry *proc_pident_lookup(struct inode *dir, @@ -2170,11 +2157,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, const struct pid_entry *ents, unsigned int nents) { - struct dentry *error; + int error; struct task_struct *task = get_proc_task(dir); const struct pid_entry *p, *last; - error = ERR_PTR(-ENOENT); + error = -ENOENT; if (!task) goto out_no_task; @@ -2197,7 +2184,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, out: put_task_struct(task); out_no_task: - return error; + return ERR_PTR(error); } static int proc_pident_readdir(struct file *file, struct dir_context *ctx, @@ -2780,11 +2767,10 @@ void proc_flush_task(struct task_struct *task) } } -static struct dentry *proc_pid_instantiate(struct inode *dir, - struct dentry * dentry, - struct task_struct *task, const void *ptr) +static int proc_pid_instantiate(struct inode *dir, + struct dentry * dentry, + struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); @@ -2804,14 +2790,14 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result = NULL; + int result = 0; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; @@ -2832,7 +2818,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign result = proc_pid_instantiate(dir, dentry, task, NULL); put_task_struct(task); out: - return result; + return ERR_PTR(result); } /* @@ -2884,21 +2870,21 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns; + struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; if (pos == TGID_OFFSET - 1) { - if (!proc_fill_cache(file, ctx, "self", 4, NULL, NULL, NULL)) + struct inode *inode = ns->proc_self->d_inode; + if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; iter.tgid = 0; } else { iter.tgid = pos - TGID_OFFSET; } iter.task = NULL; - ns = file->f_dentry->d_sb->s_fs_info; for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { @@ -3027,10 +3013,9 @@ static const struct inode_operations proc_tid_base_inode_operations = { .setattr = proc_setattr, }; -static struct dentry *proc_task_instantiate(struct inode *dir, +static int proc_task_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); @@ -3049,14 +3034,14 @@ static struct dentry *proc_task_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result = ERR_PTR(-ENOENT); + int result = -ENOENT; struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; @@ -3086,7 +3071,7 @@ out_drop_task: out: put_task_struct(leader); out_no_task: - return result; + return ERR_PTR(result); } /* diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1441f143c43b..75f2890abbd8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -167,11 +167,10 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) return ret; } -static struct dentry * +static int proc_fd_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); unsigned fd = (unsigned long)ptr; struct proc_inode *ei; struct inode *inode; @@ -194,9 +193,9 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry, /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static struct dentry *proc_lookupfd_common(struct inode *dir, @@ -204,7 +203,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, instantiate_t instantiate) { struct task_struct *task = get_proc_task(dir); - struct dentry *result = ERR_PTR(-ENOENT); + int result = -ENOENT; unsigned fd = name_to_int(dentry); if (!task) @@ -216,7 +215,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, out: put_task_struct(task); out_no_task: - return result; + return ERR_PTR(result); } static int proc_readfd_common(struct file *file, struct dir_context *ctx, @@ -300,11 +299,10 @@ const struct inode_operations proc_fd_inode_operations = { .setattr = proc_setattr, }; -static struct dentry * +static int proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); unsigned fd = (unsigned long)ptr; struct proc_inode *ei; struct inode *inode; @@ -324,9 +322,9 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, /* Close the race of the process dying before we return the dentry */ if (tid_fd_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static struct dentry * diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 4eae2e149f31..651d09a11dde 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -170,7 +170,7 @@ extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ -typedef struct dentry *instantiate_t(struct inode *, struct dentry *, +typedef int instantiate_t(struct inode *, struct dentry *, struct task_struct *, const void *); extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, instantiate_t, struct task_struct *, const void *); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0a22194e5d58..06ea155e1a59 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) prpsinfo.pr_zomb = 0; strcpy(prpsinfo.pr_fname, "vmlinux"); - strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); + strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); nhdr->p_filesz += notesize(¬es[1]); bufp = storenote(¬es[1], bufp); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index f6abbbbfad8a..49a7fff2e83a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -187,13 +187,12 @@ static const struct inode_operations proc_ns_link_inode_operations = { .setattr = proc_setattr, }; -static struct dentry *proc_ns_instantiate(struct inode *dir, +static int proc_ns_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct proc_ns_operations *ns_ops = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) @@ -208,9 +207,9 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) @@ -248,12 +247,12 @@ const struct file_operations proc_ns_dir_operations = { static struct dentry *proc_ns_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct dentry *error; + int error; struct task_struct *task = get_proc_task(dir); const struct proc_ns_operations **entry, **last; unsigned int len = dentry->d_name.len; - error = ERR_PTR(-ENOENT); + error = -ENOENT; if (!task) goto out_no_task; @@ -272,7 +271,7 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, out: put_task_struct(task); out_no_task: - return error; + return ERR_PTR(error); } const struct inode_operations proc_ns_dir_inode_operations = { diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f3a570e7c257..71290463a1d3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -796,15 +796,16 @@ static int sysctl_is_seen(struct ctl_table_header *p) return res; } -static int proc_sys_compare(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, +static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct ctl_table_header *head; + struct inode *inode; + /* Although proc doesn't have negative dentries, rcu-walk means * that inode here can be NULL */ /* AV: can it, indeed? */ + inode = ACCESS_ONCE(dentry->d_inode); if (!inode) return 1; if (name->len != len) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..dbf61f6174f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -11,6 +11,7 @@ #include <linux/rmap.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/mmu_notifier.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -688,10 +689,58 @@ const struct file_operations proc_tid_smaps_operations = { .release = seq_release_private, }; +/* + * We do not want to have constant page-shift bits sitting in + * pagemap entries and are about to reuse them some time soon. + * + * Here's the "migration strategy": + * 1. when the system boots these bits remain what they are, + * but a warning about future change is printed in log; + * 2. once anyone clears soft-dirty bits via clear_refs file, + * these flag is set to denote, that user is aware of the + * new API and those page-shift bits change their meaning. + * The respective warning is printed in dmesg; + * 3. In a couple of releases we will remove all the mentions + * of page-shift in pagemap entries. + */ + +static bool soft_dirty_cleared __read_mostly; + +enum clear_refs_types { + CLEAR_REFS_ALL = 1, + CLEAR_REFS_ANON, + CLEAR_REFS_MAPPED, + CLEAR_REFS_SOFT_DIRTY, + CLEAR_REFS_LAST, +}; + +struct clear_refs_private { + struct vm_area_struct *vma; + enum clear_refs_types type; +}; + +static inline void clear_soft_dirty(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte) +{ +#ifdef CONFIG_MEM_SOFT_DIRTY + /* + * The soft-dirty tracker uses #PF-s to catch writes + * to pages, so write-protect the pte as well. See the + * Documentation/vm/soft-dirty.txt for full description + * of how soft-dirty works. + */ + pte_t ptent = *pte; + ptent = pte_wrprotect(ptent); + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + set_pte_at(vma->vm_mm, addr, pte, ptent); +#endif +} + static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = walk->private; + struct clear_refs_private *cp = walk->private; + struct vm_area_struct *vma = cp->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -706,6 +755,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, if (!pte_present(ptent)) continue; + if (cp->type == CLEAR_REFS_SOFT_DIRTY) { + clear_soft_dirty(vma, addr, pte); + continue; + } + page = vm_normal_page(vma, addr, ptent); if (!page) continue; @@ -719,10 +773,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -#define CLEAR_REFS_ALL 1 -#define CLEAR_REFS_ANON 2 -#define CLEAR_REFS_MAPPED 3 - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -730,7 +780,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; - int type; + enum clear_refs_types type; + int itype; int rv; memset(buffer, 0, sizeof(buffer)); @@ -738,23 +789,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - rv = kstrtoint(strstrip(buffer), 10, &type); + rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; - if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) + type = (enum clear_refs_types)itype; + if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; + + if (type == CLEAR_REFS_SOFT_DIRTY) { + soft_dirty_cleared = true; + pr_warn_once("The pagemap bits 55-60 has changed their meaning! " + "See the linux/Documentation/vm/pagemap.txt for details.\n"); + } + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); if (mm) { + struct clear_refs_private cp = { + .type = type, + }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, .mm = mm, + .private = &cp, }; down_read(&mm->mmap_sem); + if (type == CLEAR_REFS_SOFT_DIRTY) + mmu_notifier_invalidate_range_start(mm, 0, -1); for (vma = mm->mmap; vma; vma = vma->vm_next) { - clear_refs_walk.private = vma; + cp.vma = vma; if (is_vm_hugetlb_page(vma)) continue; /* @@ -773,6 +838,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, walk_page_range(vma->vm_start, vma->vm_end, &clear_refs_walk); } + if (type == CLEAR_REFS_SOFT_DIRTY) + mmu_notifier_invalidate_range_end(mm, 0, -1); flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); @@ -794,6 +861,7 @@ typedef struct { struct pagemapread { int pos, len; pagemap_entry_t *buffer; + bool v2; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) @@ -807,14 +875,17 @@ struct pagemapread { #define PM_PSHIFT_BITS 6 #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) -#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +/* in "new" pagemap pshift bits are occupied with more status bits */ +#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) +#define __PM_SOFT_DIRTY (1LL) #define PM_PRESENT PM_STATUS(4LL) #define PM_SWAP PM_STATUS(2LL) #define PM_FILE PM_STATUS(1LL) -#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) +#define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) #define PM_END_OF_BUFFER 1 static inline pagemap_entry_t make_pme(u64 val) @@ -837,7 +908,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); for (addr = start; addr < end; addr += PAGE_SIZE) { err = add_to_pagemap(addr, &pme, pm); @@ -847,11 +918,12 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, return err; } -static void pte_to_pagemap_entry(pagemap_entry_t *pme, +static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { u64 frame, flags; struct page *page = NULL; + int flags2 = 0; if (pte_present(pte)) { frame = pte_pfn(pte); @@ -866,19 +938,21 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; + if (pte_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; - *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); + *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, - pmd_t pmd, int offset) +static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, + pmd_t pmd, int offset, int pmd_flags2) { /* * Currently pmd for thp is always present because thp can not be @@ -887,13 +961,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, */ if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } #else -static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, - pmd_t pmd, int offset) +static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, + pmd_t pmd, int offset, int pmd_flags2) { } #endif @@ -905,17 +979,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + int pmd_flags2; + + pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; offset = (addr & ~PAGEMAP_WALK_MASK) >> PAGE_SHIFT; - thp_pmd_to_pagemap_entry(&pme, *pmd, offset); + thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); err = add_to_pagemap(addr, &pme, pm); if (err) break; @@ -932,7 +1009,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT_PRESENT); + pme = make_pme(PM_NOT_PRESENT(pm->v2)); } /* check that 'vma' actually covers this address, @@ -940,7 +1017,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { pte = pte_offset_map(pmd, addr); - pte_to_pagemap_entry(&pme, vma, addr, *pte); + pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); /* unmap before userspace copy */ pte_unmap(pte); } @@ -955,14 +1032,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } #ifdef CONFIG_HUGETLB_PAGE -static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, +static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, pte_t pte, int offset) { if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + | PM_STATUS2(pm->v2, 0) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT); + *pme = make_pme(PM_NOT_PRESENT(pm->v2)); } /* This function walks within one hugetlb entry in the single call */ @@ -976,7 +1053,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - huge_pte_to_pagemap_entry(&pme, *pte, offset); + huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); err = add_to_pagemap(addr, &pme, pm); if (err) return err; @@ -1038,6 +1115,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; + pm.v2 = soft_dirty_cleared; pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; @@ -1110,9 +1188,18 @@ out: return ret; } +static int pagemap_open(struct inode *inode, struct file *file) +{ + pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " + "to stop being page-shift some time soon. See the " + "linux/Documentation/vm/pagemap.txt for details.\n"); + return 0; +} + const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, + .open = pagemap_open, }; #endif /* CONFIG_PROC_PAGE_MONITOR */ diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac772d7e..061894625903 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) for_each_possible_cpu(i) idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; - do_posix_clock_monotonic_gettime(&uptime); - monotonic_to_bootbased(&uptime); + get_monotonic_boottime(&uptime); nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); idle.tv_nsec = rem; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 17f7e080d7ff..28503172f2e4 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/crash_dump.h> #include <linux/list.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> #include <asm/io.h> #include "internal.h" @@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list); /* Stores the pointer to the buffer containing kernel elf core headers. */ static char *elfcorebuf; static size_t elfcorebuf_sz; +static size_t elfcorebuf_sz_orig; + +static char *elfnotes_buf; +static size_t elfnotes_sz; /* Total size of vmcore file. */ static u64 vmcore_size; @@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count, return read; } -/* Maps vmcore file offset to respective physical address in memroy. */ -static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, - struct vmcore **m_ptr) -{ - struct vmcore *m; - u64 paddr; - - list_for_each_entry(m, vc_list, list) { - u64 start, end; - start = m->offset; - end = m->offset + m->size - 1; - if (offset >= start && offset <= end) { - paddr = m->paddr + offset - start; - *m_ptr = m; - return paddr; - } - } - *m_ptr = NULL; - return 0; -} - /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ @@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, { ssize_t acc = 0, tmp; size_t tsz; - u64 start, nr_bytes; - struct vmcore *curr_m = NULL; + u64 start; + struct vmcore *m = NULL; if (buflen == 0 || *fpos >= vmcore_size) return 0; @@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, /* Read ELF core header */ if (*fpos < elfcorebuf_sz) { - tsz = elfcorebuf_sz - *fpos; - if (buflen < tsz) - tsz = buflen; + tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) return -EFAULT; buflen -= tsz; @@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } - start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); - if (!curr_m) - return -EINVAL; - - while (buflen) { - tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); + /* Read Elf note segment */ + if (*fpos < elfcorebuf_sz + elfnotes_sz) { + void *kaddr; - /* Calculate left bytes in current memory segment. */ - nr_bytes = (curr_m->size - (start - curr_m->paddr)); - if (tsz > nr_bytes) - tsz = nr_bytes; - - tmp = read_from_oldmem(buffer, tsz, &start, 1); - if (tmp < 0) - return tmp; + tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); + kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; + if (copy_to_user(buffer, kaddr, tsz)) + return -EFAULT; buflen -= tsz; *fpos += tsz; buffer += tsz; acc += tsz; - if (start >= (curr_m->paddr + curr_m->size)) { - if (curr_m->list.next == &vmcore_list) - return acc; /*EOF*/ - curr_m = list_entry(curr_m->list.next, - struct vmcore, list); - start = curr_m->paddr; + + /* leave now if filled buffer already */ + if (buflen == 0) + return acc; + } + + list_for_each_entry(m, &vmcore_list, list) { + if (*fpos < m->offset + m->size) { + tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); + start = m->paddr + *fpos - m->offset; + tmp = read_from_oldmem(buffer, tsz, &start, 1); + if (tmp < 0) + return tmp; + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + + /* leave now if filled buffer already */ + if (buflen == 0) + return acc; } } + return acc; } +/** + * alloc_elfnotes_buf - allocate buffer for ELF note segment in + * vmalloc memory + * + * @notes_sz: size of buffer + * + * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap + * the buffer to user-space by means of remap_vmalloc_range(). + * + * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is + * disabled and there's no need to allow users to mmap the buffer. + */ +static inline char *alloc_elfnotes_buf(size_t notes_sz) +{ +#ifdef CONFIG_MMU + return vmalloc_user(notes_sz); +#else + return vzalloc(notes_sz); +#endif +} + +/* + * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is + * essential for mmap_vmcore() in order to map physically + * non-contiguous objects (ELF header, ELF note segment and memory + * regions in the 1st kernel pointed to by PT_LOAD entries) into + * virtually contiguous user-space in ELF layout. + */ +#ifdef CONFIG_MMU +static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + u64 start, end, len, tsz; + struct vmcore *m; + + start = (u64)vma->vm_pgoff << PAGE_SHIFT; + end = start + size; + + if (size > vmcore_size || end > vmcore_size) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + return -EPERM; + + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + vma->vm_flags |= VM_MIXEDMAP; + + len = 0; + + if (start < elfcorebuf_sz) { + u64 pfn; + + tsz = min(elfcorebuf_sz - (size_t)start, size); + pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; + if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, + vma->vm_page_prot)) + return -EAGAIN; + size -= tsz; + start += tsz; + len += tsz; + + if (size == 0) + return 0; + } + + if (start < elfcorebuf_sz + elfnotes_sz) { + void *kaddr; + + tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); + kaddr = elfnotes_buf + start - elfcorebuf_sz; + if (remap_vmalloc_range_partial(vma, vma->vm_start + len, + kaddr, tsz)) + goto fail; + size -= tsz; + start += tsz; + len += tsz; + + if (size == 0) + return 0; + } + + list_for_each_entry(m, &vmcore_list, list) { + if (start < m->offset + m->size) { + u64 paddr = 0; + + tsz = min_t(size_t, m->offset + m->size - start, size); + paddr = m->paddr + start - m->offset; + if (remap_pfn_range(vma, vma->vm_start + len, + paddr >> PAGE_SHIFT, tsz, + vma->vm_page_prot)) + goto fail; + size -= tsz; + start += tsz; + len += tsz; + + if (size == 0) + return 0; + } + } + + return 0; +fail: + do_munmap(vma->vm_mm, vma->vm_start, len); + return -EAGAIN; +} +#else +static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) +{ + return -ENOSYS; +} +#endif + static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, .llseek = default_llseek, + .mmap = mmap_vmcore, }; static struct vmcore* __init get_new_element(void) @@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void) return kzalloc(sizeof(struct vmcore), GFP_KERNEL); } -static u64 __init get_vmcore_size_elf64(char *elfptr) +static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, + struct list_head *vc_list) { - int i; - u64 size; - Elf64_Ehdr *ehdr_ptr; - Elf64_Phdr *phdr_ptr; - - ehdr_ptr = (Elf64_Ehdr *)elfptr; - phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); - size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); - for (i = 0; i < ehdr_ptr->e_phnum; i++) { - size += phdr_ptr->p_memsz; - phdr_ptr++; - } - return size; -} - -static u64 __init get_vmcore_size_elf32(char *elfptr) -{ - int i; u64 size; - Elf32_Ehdr *ehdr_ptr; - Elf32_Phdr *phdr_ptr; + struct vmcore *m; - ehdr_ptr = (Elf32_Ehdr *)elfptr; - phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); - size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); - for (i = 0; i < ehdr_ptr->e_phnum; i++) { - size += phdr_ptr->p_memsz; - phdr_ptr++; + size = elfsz + elfnotesegsz; + list_for_each_entry(m, vc_list, list) { + size += m->size; } return size; } -/* Merges all the PT_NOTE headers into one. */ -static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, - struct list_head *vc_list) +/** + * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry + * + * @ehdr_ptr: ELF header + * + * This function updates p_memsz member of each PT_NOTE entry in the + * program header table pointed to by @ehdr_ptr to real size of ELF + * note segment. + */ +static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) { - int i, nr_ptnote=0, rc=0; - char *tmp; - Elf64_Ehdr *ehdr_ptr; - Elf64_Phdr phdr, *phdr_ptr; + int i, rc=0; + Elf64_Phdr *phdr_ptr; Elf64_Nhdr *nhdr_ptr; - u64 phdr_sz = 0, note_off; - ehdr_ptr = (Elf64_Ehdr *)elfptr; - phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); + phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { - int j; void *notes_section; - struct vmcore *new; u64 offset, max_sz, sz, real_sz = 0; if (phdr_ptr->p_type != PT_NOTE) continue; - nr_ptnote++; max_sz = phdr_ptr->p_memsz; offset = phdr_ptr->p_offset; notes_section = kmalloc(max_sz, GFP_KERNEL); @@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, return rc; } nhdr_ptr = notes_section; - for (j = 0; j < max_sz; j += sz) { + while (real_sz < max_sz) { if (nhdr_ptr->n_namesz == 0) break; sz = sizeof(Elf64_Nhdr) + @@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, real_sz += sz; nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); } - - /* Add this contiguous chunk of notes section to vmcore list.*/ - new = get_new_element(); - if (!new) { - kfree(notes_section); - return -ENOMEM; - } - new->paddr = phdr_ptr->p_offset; - new->size = real_sz; - list_add_tail(&new->list, vc_list); - phdr_sz += real_sz; kfree(notes_section); + phdr_ptr->p_memsz = real_sz; + } + + return 0; +} + +/** + * get_note_number_and_size_elf64 - get the number of PT_NOTE program + * headers and sum of real size of their ELF note segment headers and + * data. + * + * @ehdr_ptr: ELF header + * @nr_ptnote: buffer for the number of PT_NOTE program headers + * @sz_ptnote: buffer for size of unique PT_NOTE program header + * + * This function is used to merge multiple PT_NOTE program headers + * into a unique single one. The resulting unique entry will have + * @sz_ptnote in its phdr->p_mem. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf64 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr, + int *nr_ptnote, u64 *sz_ptnote) +{ + int i; + Elf64_Phdr *phdr_ptr; + + *nr_ptnote = *sz_ptnote = 0; + + phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + if (phdr_ptr->p_type != PT_NOTE) + continue; + *nr_ptnote += 1; + *sz_ptnote += phdr_ptr->p_memsz; + } + + return 0; +} + +/** + * copy_notes_elf64 - copy ELF note segments in a given buffer + * + * @ehdr_ptr: ELF header + * @notes_buf: buffer into which ELF note segments are copied + * + * This function is used to copy ELF note segment in the 1st kernel + * into the buffer @notes_buf in the 2nd kernel. It is assumed that + * size of the buffer @notes_buf is equal to or larger than sum of the + * real ELF note segment headers and data. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf64 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) +{ + int i, rc=0; + Elf64_Phdr *phdr_ptr; + + phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1); + + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 offset; + if (phdr_ptr->p_type != PT_NOTE) + continue; + offset = phdr_ptr->p_offset; + rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + if (rc < 0) + return rc; + notes_buf += phdr_ptr->p_memsz; } + return 0; +} + +/* Merges all the PT_NOTE headers into one. */ +static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, + char **notes_buf, size_t *notes_sz) +{ + int i, nr_ptnote=0, rc=0; + char *tmp; + Elf64_Ehdr *ehdr_ptr; + Elf64_Phdr phdr; + u64 phdr_sz = 0, note_off; + + ehdr_ptr = (Elf64_Ehdr *)elfptr; + + rc = update_note_header_size_elf64(ehdr_ptr); + if (rc < 0) + return rc; + + rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz); + if (rc < 0) + return rc; + + *notes_sz = roundup(phdr_sz, PAGE_SIZE); + *notes_buf = alloc_elfnotes_buf(*notes_sz); + if (!*notes_buf) + return -ENOMEM; + + rc = copy_notes_elf64(ehdr_ptr, *notes_buf); + if (rc < 0) + return rc; + /* Prepare merged PT_NOTE program header. */ phdr.p_type = PT_NOTE; phdr.p_flags = 0; note_off = sizeof(Elf64_Ehdr) + (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); - phdr.p_offset = note_off; + phdr.p_offset = roundup(note_off, PAGE_SIZE); phdr.p_vaddr = phdr.p_paddr = 0; phdr.p_filesz = phdr.p_memsz = phdr_sz; phdr.p_align = 0; @@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); *elfsz = *elfsz - i; memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); + memset(elfptr + *elfsz, 0, i); + *elfsz = roundup(*elfsz, PAGE_SIZE); /* Modify e_phnum to reflect merged headers. */ ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; @@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, return 0; } -/* Merges all the PT_NOTE headers into one. */ -static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, - struct list_head *vc_list) +/** + * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry + * + * @ehdr_ptr: ELF header + * + * This function updates p_memsz member of each PT_NOTE entry in the + * program header table pointed to by @ehdr_ptr to real size of ELF + * note segment. + */ +static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) { - int i, nr_ptnote=0, rc=0; - char *tmp; - Elf32_Ehdr *ehdr_ptr; - Elf32_Phdr phdr, *phdr_ptr; + int i, rc=0; + Elf32_Phdr *phdr_ptr; Elf32_Nhdr *nhdr_ptr; - u64 phdr_sz = 0, note_off; - ehdr_ptr = (Elf32_Ehdr *)elfptr; - phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); + phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { - int j; void *notes_section; - struct vmcore *new; u64 offset, max_sz, sz, real_sz = 0; if (phdr_ptr->p_type != PT_NOTE) continue; - nr_ptnote++; max_sz = phdr_ptr->p_memsz; offset = phdr_ptr->p_offset; notes_section = kmalloc(max_sz, GFP_KERNEL); @@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, return rc; } nhdr_ptr = notes_section; - for (j = 0; j < max_sz; j += sz) { + while (real_sz < max_sz) { if (nhdr_ptr->n_namesz == 0) break; sz = sizeof(Elf32_Nhdr) + @@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, real_sz += sz; nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); } - - /* Add this contiguous chunk of notes section to vmcore list.*/ - new = get_new_element(); - if (!new) { - kfree(notes_section); - return -ENOMEM; - } - new->paddr = phdr_ptr->p_offset; - new->size = real_sz; - list_add_tail(&new->list, vc_list); - phdr_sz += real_sz; kfree(notes_section); + phdr_ptr->p_memsz = real_sz; + } + + return 0; +} + +/** + * get_note_number_and_size_elf32 - get the number of PT_NOTE program + * headers and sum of real size of their ELF note segment headers and + * data. + * + * @ehdr_ptr: ELF header + * @nr_ptnote: buffer for the number of PT_NOTE program headers + * @sz_ptnote: buffer for size of unique PT_NOTE program header + * + * This function is used to merge multiple PT_NOTE program headers + * into a unique single one. The resulting unique entry will have + * @sz_ptnote in its phdr->p_mem. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf32 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr, + int *nr_ptnote, u64 *sz_ptnote) +{ + int i; + Elf32_Phdr *phdr_ptr; + + *nr_ptnote = *sz_ptnote = 0; + + phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + if (phdr_ptr->p_type != PT_NOTE) + continue; + *nr_ptnote += 1; + *sz_ptnote += phdr_ptr->p_memsz; + } + + return 0; +} + +/** + * copy_notes_elf32 - copy ELF note segments in a given buffer + * + * @ehdr_ptr: ELF header + * @notes_buf: buffer into which ELF note segments are copied + * + * This function is used to copy ELF note segment in the 1st kernel + * into the buffer @notes_buf in the 2nd kernel. It is assumed that + * size of the buffer @notes_buf is equal to or larger than sum of the + * real ELF note segment headers and data. + * + * It is assumed that program headers with PT_NOTE type pointed to by + * @ehdr_ptr has already been updated by update_note_header_size_elf32 + * and each of PT_NOTE program headers has actual ELF note segment + * size in its p_memsz member. + */ +static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) +{ + int i, rc=0; + Elf32_Phdr *phdr_ptr; + + phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1); + + for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 offset; + if (phdr_ptr->p_type != PT_NOTE) + continue; + offset = phdr_ptr->p_offset; + rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + if (rc < 0) + return rc; + notes_buf += phdr_ptr->p_memsz; } + return 0; +} + +/* Merges all the PT_NOTE headers into one. */ +static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, + char **notes_buf, size_t *notes_sz) +{ + int i, nr_ptnote=0, rc=0; + char *tmp; + Elf32_Ehdr *ehdr_ptr; + Elf32_Phdr phdr; + u64 phdr_sz = 0, note_off; + + ehdr_ptr = (Elf32_Ehdr *)elfptr; + + rc = update_note_header_size_elf32(ehdr_ptr); + if (rc < 0) + return rc; + + rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz); + if (rc < 0) + return rc; + + *notes_sz = roundup(phdr_sz, PAGE_SIZE); + *notes_buf = alloc_elfnotes_buf(*notes_sz); + if (!*notes_buf) + return -ENOMEM; + + rc = copy_notes_elf32(ehdr_ptr, *notes_buf); + if (rc < 0) + return rc; + /* Prepare merged PT_NOTE program header. */ phdr.p_type = PT_NOTE; phdr.p_flags = 0; note_off = sizeof(Elf32_Ehdr) + (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); - phdr.p_offset = note_off; + phdr.p_offset = roundup(note_off, PAGE_SIZE); phdr.p_vaddr = phdr.p_paddr = 0; phdr.p_filesz = phdr.p_memsz = phdr_sz; phdr.p_align = 0; @@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); *elfsz = *elfsz - i; memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); + memset(elfptr + *elfsz, 0, i); + *elfsz = roundup(*elfsz, PAGE_SIZE); /* Modify e_phnum to reflect merged headers. */ ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; @@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, * the new offset fields of exported program headers. */ static int __init process_ptload_program_headers_elf64(char *elfptr, size_t elfsz, + size_t elfnotes_sz, struct list_head *vc_list) { int i; @@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, ehdr_ptr = (Elf64_Ehdr *)elfptr; phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ - /* First program header is PT_NOTE header. */ - vmcore_off = sizeof(Elf64_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + - phdr_ptr->p_memsz; /* Note sections */ + /* Skip Elf header, program headers and Elf note segment. */ + vmcore_off = elfsz + elfnotes_sz; for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 paddr, start, end, size; + if (phdr_ptr->p_type != PT_LOAD) continue; + paddr = phdr_ptr->p_offset; + start = rounddown(paddr, PAGE_SIZE); + end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); + size = end - start; + /* Add this contiguous chunk of memory to vmcore list.*/ new = get_new_element(); if (!new) return -ENOMEM; - new->paddr = phdr_ptr->p_offset; - new->size = phdr_ptr->p_memsz; + new->paddr = start; + new->size = size; list_add_tail(&new->list, vc_list); /* Update the program header offset. */ - phdr_ptr->p_offset = vmcore_off; - vmcore_off = vmcore_off + phdr_ptr->p_memsz; + phdr_ptr->p_offset = vmcore_off + (paddr - start); + vmcore_off = vmcore_off + size; } return 0; } static int __init process_ptload_program_headers_elf32(char *elfptr, size_t elfsz, + size_t elfnotes_sz, struct list_head *vc_list) { int i; @@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, ehdr_ptr = (Elf32_Ehdr *)elfptr; phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ - /* First program header is PT_NOTE header. */ - vmcore_off = sizeof(Elf32_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + - phdr_ptr->p_memsz; /* Note sections */ + /* Skip Elf header, program headers and Elf note segment. */ + vmcore_off = elfsz + elfnotes_sz; for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { + u64 paddr, start, end, size; + if (phdr_ptr->p_type != PT_LOAD) continue; + paddr = phdr_ptr->p_offset; + start = rounddown(paddr, PAGE_SIZE); + end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); + size = end - start; + /* Add this contiguous chunk of memory to vmcore list.*/ new = get_new_element(); if (!new) return -ENOMEM; - new->paddr = phdr_ptr->p_offset; - new->size = phdr_ptr->p_memsz; + new->paddr = start; + new->size = size; list_add_tail(&new->list, vc_list); /* Update the program header offset */ - phdr_ptr->p_offset = vmcore_off; - vmcore_off = vmcore_off + phdr_ptr->p_memsz; + phdr_ptr->p_offset = vmcore_off + (paddr - start); + vmcore_off = vmcore_off + size; } return 0; } /* Sets offset fields of vmcore elements. */ -static void __init set_vmcore_list_offsets_elf64(char *elfptr, - struct list_head *vc_list) +static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, + struct list_head *vc_list) { loff_t vmcore_off; - Elf64_Ehdr *ehdr_ptr; struct vmcore *m; - ehdr_ptr = (Elf64_Ehdr *)elfptr; - - /* Skip Elf header and program headers. */ - vmcore_off = sizeof(Elf64_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); + /* Skip Elf header, program headers and Elf note segment. */ + vmcore_off = elfsz + elfnotes_sz; list_for_each_entry(m, vc_list, list) { m->offset = vmcore_off; @@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr, } } -/* Sets offset fields of vmcore elements. */ -static void __init set_vmcore_list_offsets_elf32(char *elfptr, - struct list_head *vc_list) +static void free_elfcorebuf(void) { - loff_t vmcore_off; - Elf32_Ehdr *ehdr_ptr; - struct vmcore *m; - - ehdr_ptr = (Elf32_Ehdr *)elfptr; - - /* Skip Elf header and program headers. */ - vmcore_off = sizeof(Elf32_Ehdr) + - (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); - - list_for_each_entry(m, vc_list, list) { - m->offset = vmcore_off; - vmcore_off += m->size; - } + free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); + elfcorebuf = NULL; + vfree(elfnotes_buf); + elfnotes_buf = NULL; } static int __init parse_crash_elf64_headers(void) @@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void) } /* Read in all elf headers. */ - elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); - elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); + elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) + + ehdr.e_phnum * sizeof(Elf64_Phdr); + elfcorebuf_sz = elfcorebuf_sz_orig; + elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(elfcorebuf_sz_orig)); if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); - if (rc < 0) { - kfree(elfcorebuf); - return rc; - } + rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + if (rc < 0) + goto fail; /* Merge all PT_NOTE headers into one. */ - rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } + rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, + &elfnotes_buf, &elfnotes_sz); + if (rc) + goto fail; rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, - &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } - set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); + elfnotes_sz, &vmcore_list); + if (rc) + goto fail; + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); return 0; +fail: + free_elfcorebuf(); + return rc; } static int __init parse_crash_elf32_headers(void) @@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void) } /* Read in all elf headers. */ - elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); - elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); + elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); + elfcorebuf_sz = elfcorebuf_sz_orig; + elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(elfcorebuf_sz_orig)); if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); - if (rc < 0) { - kfree(elfcorebuf); - return rc; - } + rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + if (rc < 0) + goto fail; /* Merge all PT_NOTE headers into one. */ - rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } + rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, + &elfnotes_buf, &elfnotes_sz); + if (rc) + goto fail; rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, - &vmcore_list); - if (rc) { - kfree(elfcorebuf); - return rc; - } - set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); + elfnotes_sz, &vmcore_list); + if (rc) + goto fail; + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); return 0; +fail: + free_elfcorebuf(); + return rc; } static int __init parse_crash_elf_headers(void) @@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void) rc = parse_crash_elf64_headers(); if (rc) return rc; - - /* Determine vmcore size. */ - vmcore_size = get_vmcore_size_elf64(elfcorebuf); } else if (e_ident[EI_CLASS] == ELFCLASS32) { rc = parse_crash_elf32_headers(); if (rc) return rc; - - /* Determine vmcore size. */ - vmcore_size = get_vmcore_size_elf32(elfcorebuf); } else { pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } + + /* Determine vmcore size. */ + vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, + &vmcore_list); + return 0; } @@ -711,7 +986,6 @@ void vmcore_cleanup(void) list_del(&m->list); kfree(m); } - kfree(elfcorebuf); - elfcorebuf = NULL; + free_elfcorebuf(); } EXPORT_SYMBOL_GPL(vmcore_cleanup); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index e4bcb2cf055a..bfd95bf38005 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -178,6 +178,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) if (p->psi->erase) p->psi->erase(p->type, p->id, p->count, dentry->d_inode->i_ctime, p->psi); + else + return -EPERM; return simple_unlink(dir, dentry); } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 86d1038b5a12..b7ffe2bcd9c4 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -239,17 +239,15 @@ int pstore_register(struct pstore_info *psi) { struct module *owner = psi->owner; + if (backend && strcmp(backend, psi->name)) + return -EPERM; + spin_lock(&pstore_lock); if (psinfo) { spin_unlock(&pstore_lock); return -EBUSY; } - if (backend && strcmp(backend, psi->name)) { - spin_unlock(&pstore_lock); - return -EINVAL; - } - if (!psi->write) psi->write = pstore_write_compat; psinfo = psi; @@ -274,6 +272,9 @@ int pstore_register(struct pstore_info *psi) add_timer(&pstore_timer); } + pr_info("pstore: Registered %s as persistent store backend\n", + psi->name); + return 0; } EXPORT_SYMBOL_GPL(pstore_register); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1376e5a8f0d6..43abee2c6cb9 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -399,8 +399,6 @@ static int ramoops_probe(struct platform_device *pdev) goto fail_out; } - if (!is_power_of_2(pdata->mem_size)) - pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); if (!is_power_of_2(pdata->record_size)) pdata->record_size = rounddown_pow_of_two(pdata->record_size); if (!is_power_of_2(pdata->console_size)) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 59337326e288..de272d426763 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -46,7 +46,7 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) } /* increase and wrap the start pointer, returning the old value */ -static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) +static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a) { int old; int new; @@ -62,7 +62,7 @@ static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) } /* increase the size counter until it hits the max size */ -static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) +static void buffer_size_add_atomic(struct persistent_ram_zone *prz, size_t a) { size_t old; size_t new; @@ -78,6 +78,53 @@ static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old); } +static DEFINE_RAW_SPINLOCK(buffer_lock); + +/* increase and wrap the start pointer, returning the old value */ +static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a) +{ + int old; + int new; + unsigned long flags; + + raw_spin_lock_irqsave(&buffer_lock, flags); + + old = atomic_read(&prz->buffer->start); + new = old + a; + while (unlikely(new > prz->buffer_size)) + new -= prz->buffer_size; + atomic_set(&prz->buffer->start, new); + + raw_spin_unlock_irqrestore(&buffer_lock, flags); + + return old; +} + +/* increase the size counter until it hits the max size */ +static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a) +{ + size_t old; + size_t new; + unsigned long flags; + + raw_spin_lock_irqsave(&buffer_lock, flags); + + old = atomic_read(&prz->buffer->size); + if (old == prz->buffer_size) + goto exit; + + new = old + a; + if (new > prz->buffer_size) + new = prz->buffer_size; + atomic_set(&prz->buffer->size, new); + +exit: + raw_spin_unlock_irqrestore(&buffer_lock, flags); +} + +static size_t (*buffer_start_add)(struct persistent_ram_zone *, size_t) = buffer_start_add_atomic; +static void (*buffer_size_add)(struct persistent_ram_zone *, size_t) = buffer_size_add_atomic; + static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, uint8_t *data, size_t len, uint8_t *ecc) { @@ -372,6 +419,9 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size) return NULL; } + buffer_start_add = buffer_start_add_locked; + buffer_size_add = buffer_size_add_locked; + return ioremap(start, size); } diff --git a/fs/read_write.c b/fs/read_write.c index 2cefa417be34..122a3846d9e1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -41,8 +41,19 @@ static inline int unsigned_offsets(struct file *file) return file->f_mode & FMODE_UNSIGNED_OFFSET; } -static loff_t lseek_execute(struct file *file, struct inode *inode, - loff_t offset, loff_t maxsize) +/** + * vfs_setpos - update the file offset for lseek + * @file: file structure in question + * @offset: file offset to seek to + * @maxsize: maximum file size + * + * This is a low-level filesystem helper for updating the file offset to + * the value specified by @offset if the given offset is valid and it is + * not equal to the current file offset. + * + * Return the specified offset on success and -EINVAL on invalid offset. + */ +loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) { if (offset < 0 && !unsigned_offsets(file)) return -EINVAL; @@ -55,6 +66,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, } return offset; } +EXPORT_SYMBOL(vfs_setpos); /** * generic_file_llseek_size - generic llseek implementation for regular files @@ -76,8 +88,6 @@ loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof) { - struct inode *inode = file->f_mapping->host; - switch (whence) { case SEEK_END: offset += eof; @@ -97,8 +107,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * like SEEK_SET. */ spin_lock(&file->f_lock); - offset = lseek_execute(file, inode, file->f_pos + offset, - maxsize); + offset = vfs_setpos(file, file->f_pos + offset, maxsize); spin_unlock(&file->f_lock); return offset; case SEEK_DATA: @@ -120,7 +129,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, break; } - return lseek_execute(file, inode, offset, maxsize); + return vfs_setpos(file, offset, maxsize); } EXPORT_SYMBOL(generic_file_llseek_size); @@ -145,6 +154,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) EXPORT_SYMBOL(generic_file_llseek); /** + * fixed_size_llseek - llseek implementation for fixed-sized devices + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * @size: size of the file + * + */ +loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) +{ + switch (whence) { + case SEEK_SET: case SEEK_CUR: case SEEK_END: + return generic_file_llseek_size(file, offset, whence, + size, size); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL(fixed_size_llseek); + +/** * noop_llseek - No Operation Performed llseek implementation * @file: file structure to seek on * @offset: file offset to seek to @@ -296,7 +325,7 @@ out_putf: * them to something that fits in "int" so that others * won't have to do range checks all the time. */ -int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) +int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) { struct inode *inode; loff_t pos; @@ -477,7 +506,8 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) if (f.file) { loff_t pos = file_pos_read(f.file); ret = vfs_read(f.file, buf, count, &pos); - file_pos_write(f.file, pos); + if (ret >= 0) + file_pos_write(f.file, pos); fdput(f); } return ret; @@ -492,7 +522,8 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, if (f.file) { loff_t pos = file_pos_read(f.file); ret = vfs_write(f.file, buf, count, &pos); - file_pos_write(f.file, pos); + if (ret >= 0) + file_pos_write(f.file, pos); fdput(f); } @@ -780,7 +811,8 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, if (f.file) { loff_t pos = file_pos_read(f.file); ret = vfs_readv(f.file, vec, vlen, &pos); - file_pos_write(f.file, pos); + if (ret >= 0) + file_pos_write(f.file, pos); fdput(f); } @@ -799,7 +831,8 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, if (f.file) { loff_t pos = file_pos_read(f.file); ret = vfs_writev(f.file, vec, vlen, &pos); - file_pos_write(f.file, pos); + if (ret >= 0) + file_pos_write(f.file, pos); fdput(f); } @@ -959,7 +992,8 @@ COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, return -EBADF; pos = f.file->f_pos; ret = compat_readv(f.file, vec, vlen, &pos); - f.file->f_pos = pos; + if (ret >= 0) + f.file->f_pos = pos; fdput(f); return ret; } @@ -1025,7 +1059,8 @@ COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, return -EBADF; pos = f.file->f_pos; ret = compat_writev(f.file, vec, vlen, &pos); - f.file->f_pos = pos; + if (ret >= 0) + f.file->f_pos = pos; fdput(f); return ret; } @@ -1129,7 +1164,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, if (in.file->f_flags & O_NONBLOCK) fl = SPLICE_F_NONBLOCK; #endif + file_start_write(out.file); retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); + file_end_write(out.file); if (retval > 0) { add_rchar(current, retval); diff --git a/fs/select.c b/fs/select.c index 8c1c96c27062..6b14dc7df3a4 100644 --- a/fs/select.c +++ b/fs/select.c @@ -27,6 +27,7 @@ #include <linux/rcupdate.h> #include <linux/hrtimer.h> #include <linux/sched/rt.h> +#include <linux/freezer.h> #include <asm/uaccess.h> @@ -236,7 +237,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + rc = freezable_schedule_hrtimeout_range(expires, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/splice.c b/fs/splice.c index d37431dd60a1..3b7ee656f3aa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1098,27 +1098,13 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); - int ret; - - if (unlikely(!(out->f_mode & FMODE_WRITE))) - return -EBADF; - - if (unlikely(out->f_flags & O_APPEND)) - return -EINVAL; - - ret = rw_verify_area(WRITE, out, ppos, len); - if (unlikely(ret < 0)) - return ret; if (out->f_op && out->f_op->splice_write) splice_write = out->f_op->splice_write; else splice_write = default_file_splice_write; - file_start_write(out); - ret = splice_write(pipe, out, ppos, len, flags); - file_end_write(out); - return ret; + return splice_write(pipe, out, ppos, len, flags); } /* @@ -1307,6 +1293,16 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, }; long ret; + if (unlikely(!(out->f_mode & FMODE_WRITE))) + return -EBADF; + + if (unlikely(out->f_flags & O_APPEND)) + return -EINVAL; + + ret = rw_verify_area(WRITE, out, opos, len); + if (unlikely(ret < 0)) + return ret; + ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) *ppos = sd.pos; @@ -1362,7 +1358,19 @@ static long do_splice(struct file *in, loff_t __user *off_in, offset = out->f_pos; } + if (unlikely(!(out->f_mode & FMODE_WRITE))) + return -EBADF; + + if (unlikely(out->f_flags & O_APPEND)) + return -EINVAL; + + ret = rw_verify_area(WRITE, out, &offset, len); + if (unlikely(ret < 0)) + return ret; + + file_start_write(out); ret = do_splice_from(ipipe, out, &offset, len, flags); + file_end_write(out); if (!off_out) out->f_pos = offset; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 1c0d5f264767..731b2bbcaab3 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -27,8 +27,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static int sysv_hash(const struct dentry *dentry, const struct inode *inode, - struct qstr *qstr) +static int sysv_hash(const struct dentry *dentry, struct qstr *qstr) { /* Truncate the name in place, avoids having to define a compare function. */ diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 102c072c6bbf..5f6fc17d6bc5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -594,6 +594,29 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, return 0; } +static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + struct udf_inode_info *iinfo; + int err; + + inode = udf_new_inode(dir, mode, &err); + if (!inode) + return err; + + iinfo = UDF_I(inode); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + inode->i_data.a_ops = &udf_adinicb_aops; + else + inode->i_data.a_ops = &udf_aops; + inode->i_op = &udf_file_inode_operations; + inode->i_fop = &udf_file_operations; + mark_inode_dirty(inode); + + d_tmpfile(dentry, inode); + return 0; +} + static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { @@ -1311,6 +1334,7 @@ const struct inode_operations udf_dir_inode_operations = { .rmdir = udf_rmdir, .mknod = udf_mknod, .rename = udf_rename, + .tmpfile = udf_tmpfile, }; const struct inode_operations udf_symlink_inode_operations = { .readlink = generic_readlink, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0ad2b95fca12..de3dc98f4e8f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1268,8 +1268,7 @@ xfs_seek_data( } out: - if (offset != file->f_pos) - file->f_pos = offset; + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: xfs_iunlock_map_shared(ip, lock); @@ -1377,8 +1376,7 @@ out: * situation in particular. */ offset = min_t(loff_t, offset, isize); - if (offset != file->f_pos) - file->f_pos = offset; + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: xfs_iunlock_map_shared(ip, lock); |