summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2013-07-12 21:59:39 +0400
committerOlof Johansson <olof@lixom.net>2013-07-12 21:59:39 +0400
commitf4b96f5e4ff8d86699c851c10245e102809b0331 (patch)
treef766102263bed71738431cabb4d4f6f086005cd8 /fs
parent9d8812df35be58a5da0c44182c1e4ba2507cc6a7 (diff)
parentc24a6ae18abde53b048372b066b93b71b1b91154 (diff)
downloadlinux-f4b96f5e4ff8d86699c851c10245e102809b0331.tar.xz
Merge tag 'omap-for-v3.11/fixes-for-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
Omap fixes and minor defconfig updates that would be good to get in before -rc1. * tag 'omap-for-v3.11/fixes-for-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap: ARM: OMAP2+: omap2plus_defconfig: Enable appended DTB support ARM: OMAP2+: Enable TI_EDMA in omap2plus_defconfig ARM: OMAP2+: omap2plus_defconfig: enable DRA752 thermal support by default ARM: OMAP2+: omap2plus_defconfig: enable TI bandgap driver ARM: OMAP2+: devices: remove duplicated include from devices.c ARM: OMAP3: igep0020: Set DSS pins in correct mux mode. ARM: OMAP2+: N900: enable N900-specific drivers even if device tree is enabled ARM: OMAP2+: Cocci spatch "ptr_ret.spatch" ARM: OMAP2+: Remove obsolete Makefile line ARM: OMAP5: Enable Cortex A15 errata 798181 ARM: scu: provide inline dummy functions when SCU is not present ARM: OMAP4: sleep: build OMAP4 specific functions only for OMAP4 ARM: OMAP2+: timer: initialize before using oh_name Signed-off-by: Olof Johansson <olof@lixom.net> Add/move/change conflicts in arch/arm/mach-omap2/Kconfig resolved.
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/dir.c6
-rw-r--r--fs/affs/namei.c26
-rw-r--r--fs/afs/flock.c7
-rw-r--r--fs/aio.c4
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/file.c15
-rw-r--r--fs/btrfs/ioctl.c8
-rw-r--r--fs/buffer.c34
-rw-r--r--fs/cachefiles/rdwr.c30
-rw-r--r--fs/ceph/file.c11
-rw-r--r--fs/ceph/locks.c2
-rw-r--r--fs/ceph/mds_client.c8
-rw-r--r--fs/cifs/Kconfig1
-rw-r--r--fs/cifs/cifs_debug.c52
-rw-r--r--fs/cifs/cifs_unicode.h8
-rw-r--r--fs/cifs/cifsencrypt.c40
-rw-r--r--fs/cifs/cifsfs.c13
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h48
-rw-r--r--fs/cifs/cifspdu.h17
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c425
-rw-r--r--fs/cifs/connect.c159
-rw-r--r--fs/cifs/dir.c9
-rw-r--r--fs/cifs/file.c15
-rw-r--r--fs/cifs/misc.c3
-rw-r--r--fs/cifs/readdir.c29
-rw-r--r--fs/cifs/sess.c95
-rw-r--r--fs/cifs/smb1ops.c23
-rw-r--r--fs/cifs/smb2glob.h2
-rw-r--r--fs/cifs/smb2misc.c4
-rw-r--r--fs/cifs/smb2ops.c48
-rw-r--r--fs/cifs/smb2pdu.c282
-rw-r--r--fs/cifs/smb2pdu.h100
-rw-r--r--fs/cifs/smb2proto.h4
-rw-r--r--fs/cifs/smb2transport.c151
-rw-r--r--fs/cifs/smbfsctl.h27
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/dir.c8
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/coredump.c121
-rw-r--r--fs/dcache.c66
-rw-r--r--fs/ecryptfs/crypto.c5
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/efivarfs/super.c9
-rw-r--r--fs/eventpoll.c16
-rw-r--r--fs/exec.c17
-rw-r--r--fs/ext2/namei.c24
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--fs/ext3/namei.c47
-rw-r--r--fs/ext4/file.c24
-rw-r--r--fs/ext4/namei.c47
-rw-r--r--fs/fat/misc.c5
-rw-r--r--fs/fat/namei_msdos.c6
-rw-r--r--fs/fat/namei_vfat.c12
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/dentry.c3
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/hfs/hfs_fs.h7
-rw-r--r--fs/hfs/string.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h7
-rw-r--r--fs/hfsplus/unicode.c7
-rw-r--r--fs/hpfs/dentry.c7
-rw-r--r--fs/hppfs/hppfs.c11
-rw-r--r--fs/inode.c4
-rw-r--r--fs/internal.h6
-rw-r--r--fs/isofs/inode.c48
-rw-r--r--fs/isofs/namei.c3
-rw-r--r--fs/jfs/namei.c7
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svclock.c14
-rw-r--r--fs/lockd/svcsubs.c12
-rw-r--r--fs/locks.c281
-rw-r--r--fs/minix/dir.c2
-rw-r--r--fs/minix/namei.c13
-rw-r--r--fs/namei.c113
-rw-r--r--fs/ncpfs/dir.c45
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/nfs/callback.c5
-rw-r--r--fs/nfs/delegation.c10
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/file.c30
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nilfs2/alloc.c63
-rw-r--r--fs/nilfs2/alloc.h2
-rw-r--r--fs/nilfs2/ifile.c22
-rw-r--r--fs/nilfs2/ifile.h2
-rw-r--r--fs/nilfs2/inode.c8
-rw-r--r--fs/nilfs2/segment.c4
-rw-r--r--fs/nilfs2/super.c31
-rw-r--r--fs/nilfs2/the_nilfs.c4
-rw-r--r--fs/nilfs2/the_nilfs.h4
-rw-r--r--fs/notify/fanotify/fanotify_user.c3
-rw-r--r--fs/ocfs2/alloc.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c19
-rw-r--r--fs/ocfs2/cluster/quorum.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c29
-rw-r--r--fs/ocfs2/dlm/dlmlock.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c7
-rw-r--r--fs/ocfs2/file.c12
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/namei.c70
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/suballoc.c37
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/xattr.c18
-rw-r--r--fs/open.c63
-rw-r--r--fs/proc/base.c105
-rw-r--r--fs/proc/fd.c18
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/namespaces.c13
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/proc/task_mmu.c145
-rw-r--r--fs/proc/uptime.c3
-rw-r--r--fs/proc/vmcore.c694
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/pstore/platform.c11
-rw-r--r--fs/pstore/ram.c2
-rw-r--r--fs/pstore/ram_core.c54
-rw-r--r--fs/read_write.c65
-rw-r--r--fs/select.c4
-rw-r--r--fs/splice.c38
-rw-r--r--fs/sysv/namei.c3
-rw-r--r--fs/udf/namei.c24
-rw-r--r--fs/xfs/xfs_file.c6
135 files changed, 2896 insertions, 1577 deletions
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index ade28bb058e3..0d138c0de293 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -191,8 +191,7 @@ const struct file_operations adfs_dir_operations = {
};
static int
-adfs_hash(const struct dentry *parent, const struct inode *inode,
- struct qstr *qstr)
+adfs_hash(const struct dentry *parent, struct qstr *qstr)
{
const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
const unsigned char *name;
@@ -228,8 +227,7 @@ adfs_hash(const struct dentry *parent, const struct inode *inode,
* requirements of the underlying filesystem.
*/
static int
-adfs_compare(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+adfs_compare(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
int i;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index ff65884a7839..c36cbb4537a2 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,18 +13,12 @@
typedef int (*toupper_t)(int);
static int affs_toupper(int ch);
-static int affs_hash_dentry(const struct dentry *,
- const struct inode *, struct qstr *);
-static int affs_compare_dentry(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int affs_hash_dentry(const struct dentry *, struct qstr *);
+static int affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
static int affs_intl_toupper(int ch);
-static int affs_intl_hash_dentry(const struct dentry *,
- const struct inode *, struct qstr *);
-static int affs_intl_compare_dentry(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int affs_intl_hash_dentry(const struct dentry *, struct qstr *);
+static int affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
const struct dentry_operations affs_dentry_operations = {
@@ -86,14 +80,12 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
}
static int
-affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
{
return __affs_hash_dentry(qstr, affs_toupper);
}
static int
-affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
{
return __affs_hash_dentry(qstr, affs_intl_toupper);
}
@@ -131,15 +123,13 @@ static inline int __affs_compare_dentry(unsigned int len,
}
static int
-affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
return __affs_compare_dentry(len, str, name, affs_toupper);
}
static int
-affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
return __affs_compare_dentry(len, str, name, affs_intl_toupper);
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 2497bf306c70..a8cf2cff836c 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
*/
static int afs_do_setlk(struct file *file, struct file_lock *fl)
{
- struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
+ struct inode *inode = file_inode(file);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
afs_lock_type_t type;
struct key *key = file->private_data;
int ret;
@@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
- lock_flocks();
+ spin_lock(&inode->i_lock);
/* make sure we've got a callback on this file and that our view of the
* data version is up to date */
@@ -420,7 +421,7 @@ given_lock:
afs_vnode_fetch_status(vnode, NULL, key);
error:
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
_leave(" = %d", ret);
return ret;
diff --git a/fs/aio.c b/fs/aio.c
index 2bbcacf74d0c..9b5ca1137419 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -39,6 +39,8 @@
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
+#include "internal.h"
+
#define AIO_RING_MAGIC 0xa10a10a1
#define AIO_RING_COMPAT_FEATURES 1
#define AIO_RING_INCOMPAT_FEATURES 0
@@ -623,7 +625,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
/*
* Add a completion event to the ring buffer. Must be done holding
- * ctx->ctx_lock to prevent other code from messing with the tail
+ * ctx->completion_lock to prevent other code from messing with the tail
* pointer since we might be called from irq context.
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2091db8cdd78..bb43ce081d6e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -325,31 +325,10 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
static loff_t block_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *bd_inode = file->f_mapping->host;
- loff_t size;
loff_t retval;
mutex_lock(&bd_inode->i_mutex);
- size = i_size_read(bd_inode);
-
- retval = -EINVAL;
- switch (whence) {
- case SEEK_END:
- offset += size;
- break;
- case SEEK_CUR:
- offset += file->f_pos;
- case SEEK_SET:
- break;
- default:
- goto out;
- }
- if (offset >= 0 && offset <= size) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- }
- retval = offset;
- }
-out:
+ retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
mutex_unlock(&bd_inode->i_mutex);
return retval;
}
@@ -1583,6 +1562,7 @@ static const struct address_space_operations def_blk_aops = {
.writepages = generic_writepages,
.releasepage = blkdev_releasepage,
.direct_IO = blkdev_direct_IO,
+ .is_dirty_writeback = buffer_check_dirty_writeback,
};
const struct file_operations def_blk_fops = {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4205ba752d40..89da56a58b63 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2425,20 +2425,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
}
}
- if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
- offset = -EINVAL;
- goto out;
- }
- if (offset > inode->i_sb->s_maxbytes) {
- offset = -EINVAL;
- goto out;
- }
-
- /* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
+ offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
mutex_unlock(&inode->i_mutex);
return offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0f81d67cdc8d..cd7e96c73cb7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3881,7 +3881,7 @@ drop_write:
static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret;
@@ -3914,7 +3914,7 @@ drop_write:
static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret = 0;
@@ -4020,7 +4020,7 @@ out:
static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
const char *label = root->fs_info->super_copy->label;
size_t len = strnlen(label, BTRFS_LABEL_SIZE);
int ret;
@@ -4039,7 +4039,7 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
struct btrfs_super_block *super_block = root->fs_info->super_copy;
struct btrfs_trans_handle *trans;
char label[BTRFS_LABEL_SIZE];
diff --git a/fs/buffer.c b/fs/buffer.c
index f93392e2df12..4d7433534f5c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh)
EXPORT_SYMBOL(unlock_buffer);
/*
+ * Returns if the page has dirty or writeback buffers. If all the buffers
+ * are unlocked and clean then the PageDirty information is stale. If
+ * any of the pages are locked, it is assumed they are locked for IO.
+ */
+void buffer_check_dirty_writeback(struct page *page,
+ bool *dirty, bool *writeback)
+{
+ struct buffer_head *head, *bh;
+ *dirty = false;
+ *writeback = false;
+
+ BUG_ON(!PageLocked(page));
+
+ if (!page_has_buffers(page))
+ return;
+
+ if (PageWriteback(page))
+ *writeback = true;
+
+ head = page_buffers(page);
+ bh = head;
+ do {
+ if (buffer_locked(bh))
+ *writeback = true;
+
+ if (buffer_dirty(bh))
+ *dirty = true;
+
+ bh = bh->b_this_page;
+ } while (bh != head);
+}
+EXPORT_SYMBOL(buffer_check_dirty_writeback);
+
+/*
* Block until a buffer comes unlocked. This doesn't stop it
* from becoming locked again - you have to lock it yourself
* if you want to preserve its state.
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 317f9ee9c991..ebaff368120d 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -12,6 +12,7 @@
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/swap.h>
#include "internal.h"
/*
@@ -227,8 +228,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
*/
static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
struct fscache_retrieval *op,
- struct page *netpage,
- struct pagevec *pagevec)
+ struct page *netpage)
{
struct cachefiles_one_read *monitor;
struct address_space *bmapping;
@@ -237,8 +237,6 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
_enter("");
- pagevec_reinit(pagevec);
-
_debug("read back %p{%lu,%d}",
netpage, netpage->index, page_count(netpage));
@@ -283,9 +281,7 @@ installed_new_backing_page:
backpage = newpage;
newpage = NULL;
- page_cache_get(backpage);
- pagevec_add(pagevec, backpage);
- __pagevec_lru_add_file(pagevec);
+ lru_cache_add_file(backpage);
read_backing_page:
ret = bmapping->a_ops->readpage(NULL, backpage);
@@ -452,8 +448,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
if (block) {
/* submit the apparently valid page to the backing fs to be
* read from disk */
- ret = cachefiles_read_backing_file_one(object, op, page,
- &pagevec);
+ ret = cachefiles_read_backing_file_one(object, op, page);
} else if (cachefiles_has_space(cache, 0, 1) == 0) {
/* there's space in the cache we can use */
fscache_mark_page_cached(op, page);
@@ -482,14 +477,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
{
struct cachefiles_one_read *monitor = NULL;
struct address_space *bmapping = object->backer->d_inode->i_mapping;
- struct pagevec lru_pvec;
struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
int ret = 0;
_enter("");
- pagevec_init(&lru_pvec, 0);
-
list_for_each_entry_safe(netpage, _n, list, lru) {
list_del(&netpage->lru);
@@ -534,9 +526,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
backpage = newpage;
newpage = NULL;
- page_cache_get(backpage);
- if (!pagevec_add(&lru_pvec, backpage))
- __pagevec_lru_add_file(&lru_pvec);
+ lru_cache_add_file(backpage);
reread_backing_page:
ret = bmapping->a_ops->readpage(NULL, backpage);
@@ -559,9 +549,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
goto nomem;
}
- page_cache_get(netpage);
- if (!pagevec_add(&lru_pvec, netpage))
- __pagevec_lru_add_file(&lru_pvec);
+ lru_cache_add_file(netpage);
/* install a monitor */
page_cache_get(netpage);
@@ -643,9 +631,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
fscache_mark_page_cached(op, netpage);
- page_cache_get(netpage);
- if (!pagevec_add(&lru_pvec, netpage))
- __pagevec_lru_add_file(&lru_pvec);
+ lru_cache_add_file(netpage);
/* the netpage is unlocked and marked up to date here */
fscache_end_io(op, netpage, 0);
@@ -661,8 +647,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
out:
/* tidy up */
- pagevec_lru_add_file(&lru_pvec);
-
if (newpage)
page_cache_release(newpage);
if (netpage)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 656e16907430..16c989d3e23c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -866,16 +866,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
break;
}
- if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
- offset = -EINVAL;
- goto out;
- }
-
- /* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
+ offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
mutex_unlock(&inode->i_mutex);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ebbf680378e2..690f73f42425 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
/**
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
- * array. Must be called with lock_flocks() already held.
+ * array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
*/
int ceph_encode_locks_to_buffer(struct inode *inode,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4d2920304be8..74fd2898b2ab 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2481,20 +2481,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_filelock *flocks;
encode_again:
- lock_flocks();
+ spin_lock(&inode->i_lock);
ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
sizeof(struct ceph_filelock), GFP_NOFS);
if (!flocks) {
err = -ENOMEM;
goto out_free;
}
- lock_flocks();
+ spin_lock(&inode->i_lock);
err = ceph_encode_locks_to_buffer(inode, flocks,
num_fcntl_locks,
num_flock_locks);
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
if (err) {
kfree(flocks);
if (err == -ENOSPC)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 2906ee276408..603f18a65c12 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -10,6 +10,7 @@ config CIFS
select CRYPTO_ECB
select CRYPTO_DES
select CRYPTO_SHA256
+ select CRYPTO_CMAC
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index d59748346020..f3ac4154cbb6 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
tcon->nativeFileSystem);
}
seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
- "\nPathComponentMax: %d Status: 0x%d",
+ "\n\tPathComponentMax: %d Status: 0x%d",
le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
le32_to_cpu(tcon->fsAttrInfo.Attributes),
le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
@@ -224,6 +224,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_puts(m, " type: CDROM ");
else
seq_printf(m, " type: %d ", dev_type);
+ if (server->ops->dump_share_caps)
+ server->ops->dump_share_caps(m, tcon);
if (tcon->need_reconnect)
seq_puts(m, "\tDISCONNECTED ");
@@ -595,9 +597,36 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
return single_open(file, cifs_security_flags_proc_show, NULL);
}
+/*
+ * Ensure that if someone sets a MUST flag, that we disable all other MAY
+ * flags except for the ones corresponding to the given MUST flag. If there are
+ * multiple MUST flags, then try to prefer more secure ones.
+ */
+static void
+cifs_security_flags_handle_must_flags(unsigned int *flags)
+{
+ unsigned int signflags = *flags & CIFSSEC_MUST_SIGN;
+
+ if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
+ *flags = CIFSSEC_MUST_KRB5;
+ else if ((*flags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
+ *flags = CIFSSEC_MUST_NTLMSSP;
+ else if ((*flags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
+ *flags = CIFSSEC_MUST_NTLMV2;
+ else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM)
+ *flags = CIFSSEC_MUST_NTLM;
+ else if ((*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN)
+ *flags = CIFSSEC_MUST_LANMAN;
+ else if ((*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT)
+ *flags = CIFSSEC_MUST_PLNTXT;
+
+ *flags |= signflags;
+}
+
static ssize_t cifs_security_flags_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
+ int rc;
unsigned int flags;
char flags_string[12];
char c;
@@ -620,26 +649,35 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
global_secflags = CIFSSEC_MAX;
return count;
} else if (!isdigit(c)) {
- cifs_dbg(VFS, "invalid flag %c\n", c);
+ cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
+ flags_string);
return -EINVAL;
}
}
- /* else we have a number */
- flags = simple_strtoul(flags_string, NULL, 0);
+ /* else we have a number */
+ rc = kstrtouint(flags_string, 0, &flags);
+ if (rc) {
+ cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
+ flags_string);
+ return rc;
+ }
cifs_dbg(FYI, "sec flags 0x%x\n", flags);
- if (flags <= 0) {
- cifs_dbg(VFS, "invalid security flags %s\n", flags_string);
+ if (flags == 0) {
+ cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", flags_string);
return -EINVAL;
}
if (flags & ~CIFSSEC_MASK) {
- cifs_dbg(VFS, "attempt to set unsupported security flags 0x%x\n",
+ cifs_dbg(VFS, "Unsupported security flags: 0x%x\n",
flags & ~CIFSSEC_MASK);
return -EINVAL;
}
+
+ cifs_security_flags_handle_must_flags(&flags);
+
/* flags look ok - update the global security flags for cifs module */
global_secflags = flags;
if (global_secflags & CIFSSEC_MUST_SIGN) {
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 4fb097468e21..fe8d6276410a 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -327,14 +327,14 @@ UniToupper(register wchar_t uc)
/*
* UniStrupr: Upper case a unicode string
*/
-static inline wchar_t *
-UniStrupr(register wchar_t *upin)
+static inline __le16 *
+UniStrupr(register __le16 *upin)
{
- register wchar_t *up;
+ register __le16 *up;
up = upin;
while (*up) { /* For all characters */
- *up = UniToupper(*up);
+ *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
up++;
}
return upin; /* Return input pointer */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 71436d1fca13..3d8bf941d126 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -276,7 +276,6 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) {
- memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
memcpy(lnm_session_key, password_with_pad,
CIFS_ENCPWD_SIZE);
return 0;
@@ -414,7 +413,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
int rc = 0;
int len;
char nt_hash[CIFS_NTHASH_SIZE];
- wchar_t *user;
+ __le16 *user;
wchar_t *domain;
wchar_t *server;
@@ -439,7 +438,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
return rc;
}
- /* convert ses->user_name to unicode and uppercase */
+ /* convert ses->user_name to unicode */
len = ses->user_name ? strlen(ses->user_name) : 0;
user = kmalloc(2 + (len * 2), GFP_KERNEL);
if (user == NULL) {
@@ -448,7 +447,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
}
if (len) {
- len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp);
+ len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp);
UniStrupr(user);
} else {
memset(user, '\0', 2);
@@ -536,7 +535,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
return rc;
}
- if (ses->server->secType == RawNTLMSSP)
+ if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED)
memcpy(ses->auth_key.response + offset,
ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
else
@@ -568,7 +567,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
char ntlmv2_hash[16];
unsigned char *tiblob = NULL; /* target info blob */
- if (ses->server->secType == RawNTLMSSP) {
+ if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) {
if (!ses->domainName) {
rc = find_domain_name(ses, nls_cp);
if (rc) {
@@ -706,6 +705,9 @@ calc_seckey(struct cifs_ses *ses)
void
cifs_crypto_shash_release(struct TCP_Server_Info *server)
{
+ if (server->secmech.cmacaes)
+ crypto_free_shash(server->secmech.cmacaes);
+
if (server->secmech.hmacsha256)
crypto_free_shash(server->secmech.hmacsha256);
@@ -715,6 +717,8 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server)
if (server->secmech.hmacmd5)
crypto_free_shash(server->secmech.hmacmd5);
+ kfree(server->secmech.sdesccmacaes);
+
kfree(server->secmech.sdeschmacsha256);
kfree(server->secmech.sdeschmacmd5);
@@ -748,6 +752,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
goto crypto_allocate_hmacsha256_fail;
}
+ server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0);
+ if (IS_ERR(server->secmech.cmacaes)) {
+ cifs_dbg(VFS, "could not allocate crypto cmac-aes");
+ rc = PTR_ERR(server->secmech.cmacaes);
+ goto crypto_allocate_cmacaes_fail;
+ }
+
size = sizeof(struct shash_desc) +
crypto_shash_descsize(server->secmech.hmacmd5);
server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
@@ -778,8 +789,22 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256;
server->secmech.sdeschmacsha256->shash.flags = 0x0;
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->secmech.cmacaes);
+ server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL);
+ if (!server->secmech.sdesccmacaes) {
+ cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__);
+ rc = -ENOMEM;
+ goto crypto_allocate_cmacaes_sdesc_fail;
+ }
+ server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes;
+ server->secmech.sdesccmacaes->shash.flags = 0x0;
+
return 0;
+crypto_allocate_cmacaes_sdesc_fail:
+ kfree(server->secmech.sdeschmacsha256);
+
crypto_allocate_hmacsha256_sdesc_fail:
kfree(server->secmech.sdescmd5);
@@ -787,6 +812,9 @@ crypto_allocate_md5_sdesc_fail:
kfree(server->secmech.sdeschmacmd5);
crypto_allocate_hmacmd5_sdesc_fail:
+ crypto_free_shash(server->secmech.cmacaes);
+
+crypto_allocate_cmacaes_fail:
crypto_free_shash(server->secmech.hmacsha256);
crypto_allocate_hmacsha256_fail:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 540c1ccfcdb2..4bdd547dbf6f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -312,11 +312,14 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
}
static void
-cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
+cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
{
+ if (ses->sectype == Unspecified)
+ return;
+
seq_printf(s, ",sec=");
- switch (server->secType) {
+ switch (ses->sectype) {
case LANMAN:
seq_printf(s, "lanman");
break;
@@ -338,7 +341,7 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
break;
}
- if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ if (ses->sign)
seq_printf(s, "i");
}
@@ -369,7 +372,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string);
- cifs_show_security(s, tcon->ses->server);
+ cifs_show_security(s, tcon->ses);
cifs_show_cache_flavor(s, cifs_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
@@ -765,7 +768,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
{
- /* note that this is called by vfs setlease with lock_flocks held
+ /* note that this is called by vfs setlease with i_lock held
to protect *lease from going away */
struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index d05b3028e3b9..ea723a5e8226 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.0"
+#define CIFS_VERSION "2.01"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4f07f6fbe494..e66b08882548 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -101,20 +101,14 @@ enum statusEnum {
};
enum securityEnum {
- LANMAN = 0, /* Legacy LANMAN auth */
+ Unspecified = 0, /* not specified */
+ LANMAN, /* Legacy LANMAN auth */
NTLM, /* Legacy NTLM012 auth with NTLM hash */
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
-/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
Kerberos, /* Kerberos via SPNEGO */
};
-enum protocolEnum {
- TCP = 0,
- SCTP
- /* Netbios frames protocol not supported at this time */
-};
-
struct session_key {
unsigned int len;
char *response;
@@ -131,9 +125,11 @@ struct cifs_secmech {
struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
struct crypto_shash *md5; /* md5 hash function */
struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */
+ struct crypto_shash *cmacaes; /* block-cipher based MAC function */
struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */
+ struct sdesc *sdesccmacaes; /* ctxt to generate smb3 signature */
};
/* per smb session structure/fields */
@@ -181,6 +177,7 @@ enum smb_version {
Smb_20,
Smb_21,
Smb_30,
+ Smb_302,
};
struct mid_q_entry;
@@ -228,6 +225,7 @@ struct smb_version_operations {
void (*dump_detail)(void *);
void (*clear_stats)(struct cifs_tcon *);
void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
+ void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
/* verify the message */
int (*check_message)(char *, unsigned int);
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
@@ -367,6 +365,8 @@ struct smb_version_operations {
void (*set_lease_key)(struct inode *, struct cifs_fid *fid);
/* generate new lease key */
void (*new_lease_key)(struct cifs_fid *fid);
+ /* The next two functions will need to be changed to per smb session */
+ void (*generate_signingkey)(struct TCP_Server_Info *server);
int (*calc_signature)(struct smb_rqst *rqst,
struct TCP_Server_Info *server);
};
@@ -387,6 +387,8 @@ struct smb_version_values {
unsigned int cap_nt_find;
unsigned int cap_large_files;
unsigned int oplock_read;
+ __u16 signing_enabled;
+ __u16 signing_required;
};
#define HEADER_SIZE(server) (server->vals->header_size)
@@ -407,7 +409,8 @@ struct smb_vol {
kgid_t backupgid;
umode_t file_mode;
umode_t dir_mode;
- unsigned secFlg;
+ enum securityEnum sectype; /* sectype requested via mnt opts */
+ bool sign; /* was signing requested via mnt opts? */
bool retry:1;
bool intr:1;
bool setuids:1;
@@ -441,6 +444,7 @@ struct smb_vol {
bool mfsymlinks:1; /* use Minshall+French Symlinks */
bool multiuser:1;
bool rwpidforward:1; /* pid forward for read/write operations */
+ bool nosharesock;
unsigned int rsize;
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
@@ -514,6 +518,7 @@ struct TCP_Server_Info {
struct task_struct *tsk;
char server_GUID[16];
__u16 sec_mode;
+ bool sign; /* is signing enabled on this connection? */
bool session_estab; /* mark when very first sess is established */
#ifdef CONFIG_CIFS_SMB2
int echo_credits; /* echo reserved slots */
@@ -521,7 +526,6 @@ struct TCP_Server_Info {
bool echoes:1; /* enable echoes */
#endif
u16 dialect; /* dialect index that server chose */
- enum securityEnum secType;
bool oplocks:1; /* enable oplocks */
unsigned int maxReq; /* Clients should submit no more */
/* than maxReq distinct unanswered SMBs to the server when using */
@@ -540,12 +544,17 @@ struct TCP_Server_Info {
int timeAdj; /* Adjust for difference in server time zone in sec */
__u64 CurrentMid; /* multiplex id - rotating counter */
char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
+ char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* for signing, protected by srv_mutex */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
+#define CIFS_NEGFLAVOR_LANMAN 0 /* wct == 13, LANMAN */
+#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */
+#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */
+ char negflavor; /* NEGOTIATE response flavor */
/* extended security flavors that server supports */
bool sec_ntlmssp; /* supports NTLMSSP */
bool sec_kerberosu2u; /* supports U2U Kerberos */
@@ -697,7 +706,6 @@ struct cifs_ses {
enum statusEnum status;
unsigned overrideSecFlg; /* if non-zero override global sec flags */
__u16 ipc_tid; /* special tid for connection to IPC share */
- __u16 flags;
__u16 vcnum;
char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */
@@ -714,21 +722,14 @@ struct cifs_ses {
char *password;
struct session_key auth_key;
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
+ enum securityEnum sectype; /* what security flavor was specified? */
+ bool sign; /* is signing required? */
bool need_reconnect:1; /* connection reset, uid now invalid */
#ifdef CONFIG_CIFS_SMB2
__u16 session_flags;
#endif /* CONFIG_CIFS_SMB2 */
};
-/* no more than one of the following three session flags may be set */
-#define CIFS_SES_NT4 1
-#define CIFS_SES_OS2 2
-#define CIFS_SES_W9X 4
-/* following flag is set for old servers such as OS2 (and Win95?)
- which do not negotiate NTLM or POSIX dialects, but instead
- negotiate one of the older LANMAN dialects */
-#define CIFS_SES_LANMAN 8
-
static inline bool
cap_unix(struct cifs_ses *ses)
{
@@ -816,7 +817,7 @@ struct cifs_tcon {
#ifdef CONFIG_CIFS_SMB2
bool print:1; /* set if connection to printer share */
bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */
- __u32 capabilities;
+ __le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
__u32 vol_serial_number;
@@ -1348,7 +1349,7 @@ require use of the stronger protocol */
#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
-#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMSSP)
+#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
/*
@@ -1494,4 +1495,7 @@ extern struct smb_version_values smb21_values;
#define SMB30_VERSION_STRING "3.0"
extern struct smb_version_operations smb30_operations;
extern struct smb_version_values smb30_values;
+#define SMB302_VERSION_STRING "3.02"
+/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
+extern struct smb_version_values smb302_values;
#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index e996ff6b26d1..11ca24a8e054 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -142,6 +142,11 @@
*/
#define CIFS_SESS_KEY_SIZE (16)
+/*
+ * Size of the smb3 signing key
+ */
+#define SMB3_SIGN_KEY_SIZE (16)
+
#define CIFS_CLIENT_CHALLENGE_SIZE (8)
#define CIFS_SERVER_CHALLENGE_SIZE (8)
#define CIFS_HMAC_MD5_HASH_SIZE (16)
@@ -531,7 +536,7 @@ typedef struct lanman_neg_rsp {
#define READ_RAW_ENABLE 1
#define WRITE_RAW_ENABLE 2
#define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE)
-
+#define SMB1_CLIENT_GUID_SIZE (16)
typedef struct negotiate_rsp {
struct smb_hdr hdr; /* wct = 17 */
__le16 DialectIndex; /* 0xFFFF = no dialect acceptable */
@@ -553,7 +558,7 @@ typedef struct negotiate_rsp {
/* followed by 16 bytes of server GUID */
/* then security blob if cap_extended_security negotiated */
struct {
- unsigned char GUID[16];
+ unsigned char GUID[SMB1_CLIENT_GUID_SIZE];
unsigned char SecurityBlob[1];
} __attribute__((packed)) extended_response;
} __attribute__((packed)) u;
@@ -1315,6 +1320,14 @@ typedef struct smb_com_ntransact_rsp {
/* parms and data follow */
} __attribute__((packed)) NTRANSACT_RSP;
+/* See MS-SMB 2.2.7.2.1.1 */
+struct srv_copychunk {
+ __le64 SourceOffset;
+ __le64 DestinationOffset;
+ __le32 CopyLength;
+ __u32 Reserved;
+} __packed;
+
typedef struct smb_com_transaction_ioctl_req {
struct smb_hdr hdr; /* wct = 23 */
__u8 MaxSetupCount;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index dda188a94332..c8ff018fae68 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -118,6 +118,8 @@ extern void header_assemble(struct smb_hdr *, char /* command */ ,
extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
struct cifs_ses *ses,
void **request_buf);
+extern enum securityEnum select_sectype(struct TCP_Server_Info *server,
+ enum securityEnum requested);
extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_cp);
extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
@@ -212,6 +214,7 @@ extern int cifs_negotiate_protocol(const unsigned int xid,
struct cifs_ses *ses);
extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
struct nls_table *nls_info);
+extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required);
extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses);
extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
@@ -433,6 +436,7 @@ extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
extern int calc_seckey(struct cifs_ses *);
+extern void generate_smb3signingkey(struct TCP_Server_Info *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern int calc_lanman_hash(const char *password, const char *cryptkey,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a58dc77cc443..a89c4cb4e6cf 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -367,6 +367,185 @@ vt2_err:
return -EINVAL;
}
+static int
+decode_ext_sec_blob(struct cifs_ses *ses, NEGOTIATE_RSP *pSMBr)
+{
+ int rc = 0;
+ u16 count;
+ char *guid = pSMBr->u.extended_response.GUID;
+ struct TCP_Server_Info *server = ses->server;
+
+ count = get_bcc(&pSMBr->hdr);
+ if (count < SMB1_CLIENT_GUID_SIZE)
+ return -EIO;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->srv_count > 1) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (memcmp(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE) != 0) {
+ cifs_dbg(FYI, "server UID changed\n");
+ memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE);
+ }
+ } else {
+ spin_unlock(&cifs_tcp_ses_lock);
+ memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE);
+ }
+
+ if (count == SMB1_CLIENT_GUID_SIZE) {
+ server->sec_ntlmssp = true;
+ } else {
+ count -= SMB1_CLIENT_GUID_SIZE;
+ rc = decode_negTokenInit(
+ pSMBr->u.extended_response.SecurityBlob, count, server);
+ if (rc != 1)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required)
+{
+ bool srv_sign_required = server->sec_mode & server->vals->signing_required;
+ bool srv_sign_enabled = server->sec_mode & server->vals->signing_enabled;
+ bool mnt_sign_enabled = global_secflags & CIFSSEC_MAY_SIGN;
+
+ /*
+ * Is signing required by mnt options? If not then check
+ * global_secflags to see if it is there.
+ */
+ if (!mnt_sign_required)
+ mnt_sign_required = ((global_secflags & CIFSSEC_MUST_SIGN) ==
+ CIFSSEC_MUST_SIGN);
+
+ /*
+ * If signing is required then it's automatically enabled too,
+ * otherwise, check to see if the secflags allow it.
+ */
+ mnt_sign_enabled = mnt_sign_required ? mnt_sign_required :
+ (global_secflags & CIFSSEC_MAY_SIGN);
+
+ /* If server requires signing, does client allow it? */
+ if (srv_sign_required) {
+ if (!mnt_sign_enabled) {
+ cifs_dbg(VFS, "Server requires signing, but it's disabled in SecurityFlags!");
+ return -ENOTSUPP;
+ }
+ server->sign = true;
+ }
+
+ /* If client requires signing, does server allow it? */
+ if (mnt_sign_required) {
+ if (!srv_sign_enabled) {
+ cifs_dbg(VFS, "Server does not support signing!");
+ return -ENOTSUPP;
+ }
+ server->sign = true;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_CIFS_WEAK_PW_HASH
+static int
+decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
+{
+ __s16 tmp;
+ struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr;
+
+ if (server->dialect != LANMAN_PROT && server->dialect != LANMAN2_PROT)
+ return -EOPNOTSUPP;
+
+ server->sec_mode = le16_to_cpu(rsp->SecurityMode);
+ server->maxReq = min_t(unsigned int,
+ le16_to_cpu(rsp->MaxMpxCount),
+ cifs_max_pending);
+ set_credits(server, server->maxReq);
+ server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
+ server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
+ /* even though we do not use raw we might as well set this
+ accurately, in case we ever find a need for it */
+ if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
+ server->max_rw = 0xFF00;
+ server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
+ } else {
+ server->max_rw = 0;/* do not need to use raw anyway */
+ server->capabilities = CAP_MPX_MODE;
+ }
+ tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
+ if (tmp == -1) {
+ /* OS/2 often does not set timezone therefore
+ * we must use server time to calc time zone.
+ * Could deviate slightly from the right zone.
+ * Smallest defined timezone difference is 15 minutes
+ * (i.e. Nepal). Rounding up/down is done to match
+ * this requirement.
+ */
+ int val, seconds, remain, result;
+ struct timespec ts, utc;
+ utc = CURRENT_TIME;
+ ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
+ rsp->SrvTime.Time, 0);
+ cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n",
+ (int)ts.tv_sec, (int)utc.tv_sec,
+ (int)(utc.tv_sec - ts.tv_sec));
+ val = (int)(utc.tv_sec - ts.tv_sec);
+ seconds = abs(val);
+ result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
+ remain = seconds % MIN_TZ_ADJ;
+ if (remain >= (MIN_TZ_ADJ / 2))
+ result += MIN_TZ_ADJ;
+ if (val < 0)
+ result = -result;
+ server->timeAdj = result;
+ } else {
+ server->timeAdj = (int)tmp;
+ server->timeAdj *= 60; /* also in seconds */
+ }
+ cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj);
+
+
+ /* BB get server time for time conversions and add
+ code to use it and timezone since this is not UTC */
+
+ if (rsp->EncryptionKeyLength ==
+ cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
+ memcpy(server->cryptkey, rsp->EncryptionKey,
+ CIFS_CRYPTO_KEY_SIZE);
+ } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
+ return -EIO; /* need cryptkey unless plain text */
+ }
+
+ cifs_dbg(FYI, "LANMAN negotiated\n");
+ return 0;
+}
+#else
+static inline int
+decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
+{
+ cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n");
+ return -EOPNOTSUPP;
+}
+#endif
+
+static bool
+should_set_ext_sec_flag(enum securityEnum sectype)
+{
+ switch (sectype) {
+ case RawNTLMSSP:
+ case Kerberos:
+ return true;
+ case Unspecified:
+ if (global_secflags &
+ (CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP))
+ return true;
+ /* Fallthrough */
+ default:
+ return false;
+ }
+}
+
int
CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
{
@@ -375,41 +554,24 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
int rc = 0;
int bytes_returned;
int i;
- struct TCP_Server_Info *server;
+ struct TCP_Server_Info *server = ses->server;
u16 count;
- unsigned int secFlags;
- if (ses->server)
- server = ses->server;
- else {
- rc = -EIO;
- return rc;
+ if (!server) {
+ WARN(1, "%s: server is NULL!\n", __func__);
+ return -EIO;
}
+
rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ ,
(void **) &pSMB, (void **) &pSMBr);
if (rc)
return rc;
- /* if any of auth flags (ie not sign or seal) are overriden use them */
- if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
- secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */
- else /* if override flags set only sign/seal OR them with global auth */
- secFlags = global_secflags | ses->overrideSecFlg;
-
- cifs_dbg(FYI, "secFlags 0x%x\n", secFlags);
-
pSMB->hdr.Mid = get_next_mid(server);
pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
- if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
- pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
- else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
- cifs_dbg(FYI, "Kerberos only mechanism, enable extended security\n");
- pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
- } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
- pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
- else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
- cifs_dbg(FYI, "NTLMSSP only mechanism, enable extended security\n");
+ if (should_set_ext_sec_flag(ses->sectype)) {
+ cifs_dbg(FYI, "Requesting extended security.");
pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
}
@@ -436,127 +598,21 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
could not negotiate a common dialect */
rc = -EOPNOTSUPP;
goto neg_err_exit;
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- } else if ((pSMBr->hdr.WordCount == 13)
- && ((server->dialect == LANMAN_PROT)
- || (server->dialect == LANMAN2_PROT))) {
- __s16 tmp;
- struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr;
-
- if ((secFlags & CIFSSEC_MAY_LANMAN) ||
- (secFlags & CIFSSEC_MAY_PLNTXT))
- server->secType = LANMAN;
- else {
- cifs_dbg(VFS, "mount failed weak security disabled in /proc/fs/cifs/SecurityFlags\n");
- rc = -EOPNOTSUPP;
- goto neg_err_exit;
- }
- server->sec_mode = le16_to_cpu(rsp->SecurityMode);
- server->maxReq = min_t(unsigned int,
- le16_to_cpu(rsp->MaxMpxCount),
- cifs_max_pending);
- set_credits(server, server->maxReq);
- server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
- server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
- /* even though we do not use raw we might as well set this
- accurately, in case we ever find a need for it */
- if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
- server->max_rw = 0xFF00;
- server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
- } else {
- server->max_rw = 0;/* do not need to use raw anyway */
- server->capabilities = CAP_MPX_MODE;
- }
- tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
- if (tmp == -1) {
- /* OS/2 often does not set timezone therefore
- * we must use server time to calc time zone.
- * Could deviate slightly from the right zone.
- * Smallest defined timezone difference is 15 minutes
- * (i.e. Nepal). Rounding up/down is done to match
- * this requirement.
- */
- int val, seconds, remain, result;
- struct timespec ts, utc;
- utc = CURRENT_TIME;
- ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
- rsp->SrvTime.Time, 0);
- cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n",
- (int)ts.tv_sec, (int)utc.tv_sec,
- (int)(utc.tv_sec - ts.tv_sec));
- val = (int)(utc.tv_sec - ts.tv_sec);
- seconds = abs(val);
- result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
- remain = seconds % MIN_TZ_ADJ;
- if (remain >= (MIN_TZ_ADJ / 2))
- result += MIN_TZ_ADJ;
- if (val < 0)
- result = -result;
- server->timeAdj = result;
- } else {
- server->timeAdj = (int)tmp;
- server->timeAdj *= 60; /* also in seconds */
- }
- cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj);
-
-
- /* BB get server time for time conversions and add
- code to use it and timezone since this is not UTC */
-
- if (rsp->EncryptionKeyLength ==
- cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
- memcpy(ses->server->cryptkey, rsp->EncryptionKey,
- CIFS_CRYPTO_KEY_SIZE);
- } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
- rc = -EIO; /* need cryptkey unless plain text */
- goto neg_err_exit;
- }
-
- cifs_dbg(FYI, "LANMAN negotiated\n");
- /* we will not end up setting signing flags - as no signing
- was in LANMAN and server did not return the flags on */
- goto signing_check;
-#else /* weak security disabled */
} else if (pSMBr->hdr.WordCount == 13) {
- cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n");
- rc = -EOPNOTSUPP;
-#endif /* WEAK_PW_HASH */
- goto neg_err_exit;
+ server->negflavor = CIFS_NEGFLAVOR_LANMAN;
+ rc = decode_lanman_negprot_rsp(server, pSMBr);
+ goto signing_check;
} else if (pSMBr->hdr.WordCount != 17) {
/* unknown wct */
rc = -EOPNOTSUPP;
goto neg_err_exit;
}
- /* else wct == 17 NTLM */
+ /* else wct == 17, NTLM or better */
+
server->sec_mode = pSMBr->SecurityMode;
if ((server->sec_mode & SECMODE_USER) == 0)
cifs_dbg(FYI, "share mode security\n");
- if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0)
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
-#endif /* CIFS_WEAK_PW_HASH */
- cifs_dbg(VFS, "Server requests plain text password but client support disabled\n");
-
- if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
- server->secType = NTLMv2;
- else if (secFlags & CIFSSEC_MAY_NTLM)
- server->secType = NTLM;
- else if (secFlags & CIFSSEC_MAY_NTLMV2)
- server->secType = NTLMv2;
- else if (secFlags & CIFSSEC_MAY_KRB5)
- server->secType = Kerberos;
- else if (secFlags & CIFSSEC_MAY_NTLMSSP)
- server->secType = RawNTLMSSP;
- else if (secFlags & CIFSSEC_MAY_LANMAN)
- server->secType = LANMAN;
- else {
- rc = -EOPNOTSUPP;
- cifs_dbg(VFS, "Invalid security type\n");
- goto neg_err_exit;
- }
- /* else ... any others ...? */
-
/* one byte, so no need to convert this or EncryptionKeyLen from
little endian */
server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount),
@@ -569,90 +625,26 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
server->capabilities = le32_to_cpu(pSMBr->Capabilities);
server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
server->timeAdj *= 60;
+
if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
+ server->negflavor = CIFS_NEGFLAVOR_UNENCAP;
memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
} else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC ||
server->capabilities & CAP_EXTENDED_SECURITY) &&
(pSMBr->EncryptionKeyLength == 0)) {
- /* decode security blob */
- count = get_bcc(&pSMBr->hdr);
- if (count < 16) {
- rc = -EIO;
- goto neg_err_exit;
- }
- spin_lock(&cifs_tcp_ses_lock);
- if (server->srv_count > 1) {
- spin_unlock(&cifs_tcp_ses_lock);
- if (memcmp(server->server_GUID,
- pSMBr->u.extended_response.
- GUID, 16) != 0) {
- cifs_dbg(FYI, "server UID changed\n");
- memcpy(server->server_GUID,
- pSMBr->u.extended_response.GUID,
- 16);
- }
- } else {
- spin_unlock(&cifs_tcp_ses_lock);
- memcpy(server->server_GUID,
- pSMBr->u.extended_response.GUID, 16);
- }
-
- if (count == 16) {
- server->secType = RawNTLMSSP;
- } else {
- rc = decode_negTokenInit(pSMBr->u.extended_response.
- SecurityBlob, count - 16,
- server);
- if (rc == 1)
- rc = 0;
- else
- rc = -EINVAL;
- if (server->secType == Kerberos) {
- if (!server->sec_kerberos &&
- !server->sec_mskerberos)
- rc = -EOPNOTSUPP;
- } else if (server->secType == RawNTLMSSP) {
- if (!server->sec_ntlmssp)
- rc = -EOPNOTSUPP;
- } else
- rc = -EOPNOTSUPP;
- }
+ server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
+ rc = decode_ext_sec_blob(ses, pSMBr);
} else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
rc = -EIO; /* no crypt key only if plain text pwd */
- goto neg_err_exit;
- } else
- server->capabilities &= ~CAP_EXTENDED_SECURITY;
-
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-signing_check:
-#endif
- if ((secFlags & CIFSSEC_MAY_SIGN) == 0) {
- /* MUST_SIGN already includes the MAY_SIGN FLAG
- so if this is zero it means that signing is disabled */
- cifs_dbg(FYI, "Signing disabled\n");
- if (server->sec_mode & SECMODE_SIGN_REQUIRED) {
- cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n");
- rc = -EOPNOTSUPP;
- }
- server->sec_mode &=
- ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
- } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
- /* signing required */
- cifs_dbg(FYI, "Must sign - secFlags 0x%x\n", secFlags);
- if ((server->sec_mode &
- (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
- cifs_dbg(VFS, "signing required but server lacks support\n");
- rc = -EOPNOTSUPP;
- } else
- server->sec_mode |= SECMODE_SIGN_REQUIRED;
} else {
- /* signing optional ie CIFSSEC_MAY_SIGN */
- if ((server->sec_mode & SECMODE_SIGN_REQUIRED) == 0)
- server->sec_mode &=
- ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
+ server->negflavor = CIFS_NEGFLAVOR_UNENCAP;
+ server->capabilities &= ~CAP_EXTENDED_SECURITY;
}
+signing_check:
+ if (!rc)
+ rc = cifs_enable_signing(server, ses->sign);
neg_err_exit:
cifs_buf_release(pSMB);
@@ -777,9 +769,8 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses)
pSMB->hdr.Mid = get_next_mid(ses->server);
- if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
- pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
+ if (ses->server->sign)
+ pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
pSMB->hdr.Uid = ses->Suid;
@@ -1540,8 +1531,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
/* result already set, check signature */
- if (server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+ if (server->sign) {
int rc = 0;
rc = cifs_verify_signature(&rqst, server,
@@ -3940,6 +3930,7 @@ QFileInfoRetry:
pSMB->Pad = 0;
pSMB->Fid = netfid;
inc_rfc1001_len(pSMB, byte_count);
+ pSMB->t2.ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -4108,6 +4099,7 @@ UnixQFileInfoRetry:
pSMB->Pad = 0;
pSMB->Fid = netfid;
inc_rfc1001_len(pSMB, byte_count);
+ pSMB->t2.ByteCount = cpu_to_le16(byte_count);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -4794,11 +4786,8 @@ getDFSRetry:
strncpy(pSMB->RequestFileName, search_name, name_len);
}
- if (ses->server) {
- if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
- pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
- }
+ if (ses->server && ses->server->sign)
+ pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
pSMB->hdr.Uid = ses->Suid;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e3bc39bb9d12..afcb8a1a33b7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -85,7 +85,7 @@ enum {
Opt_acl, Opt_noacl, Opt_locallease,
Opt_sign, Opt_seal, Opt_noac,
Opt_fsc, Opt_mfsymlinks,
- Opt_multiuser, Opt_sloppy,
+ Opt_multiuser, Opt_sloppy, Opt_nosharesock,
/* Mount options which take numeric value */
Opt_backupuid, Opt_backupgid, Opt_uid,
@@ -165,6 +165,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_mfsymlinks, "mfsymlinks" },
{ Opt_multiuser, "multiuser" },
{ Opt_sloppy, "sloppy" },
+ { Opt_nosharesock, "nosharesock" },
{ Opt_backupuid, "backupuid=%s" },
{ Opt_backupgid, "backupgid=%s" },
@@ -275,6 +276,7 @@ static const match_table_t cifs_smb_version_tokens = {
{ Smb_20, SMB20_VERSION_STRING},
{ Smb_21, SMB21_VERSION_STRING },
{ Smb_30, SMB30_VERSION_STRING },
+ { Smb_302, SMB302_VERSION_STRING },
};
static int ip_connect(struct TCP_Server_Info *server);
@@ -1024,44 +1026,48 @@ static int cifs_parse_security_flavors(char *value,
substring_t args[MAX_OPT_ARGS];
+ /*
+ * With mount options, the last one should win. Reset any existing
+ * settings back to default.
+ */
+ vol->sectype = Unspecified;
+ vol->sign = false;
+
switch (match_token(value, cifs_secflavor_tokens, args)) {
- case Opt_sec_krb5:
- vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_SIGN;
- break;
- case Opt_sec_krb5i:
- vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MUST_SIGN;
- break;
case Opt_sec_krb5p:
- /* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */
- cifs_dbg(VFS, "Krb5 cifs privacy not supported\n");
- break;
- case Opt_sec_ntlmssp:
- vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
+ cifs_dbg(VFS, "sec=krb5p is not supported!\n");
+ return 1;
+ case Opt_sec_krb5i:
+ vol->sign = true;
+ /* Fallthrough */
+ case Opt_sec_krb5:
+ vol->sectype = Kerberos;
break;
case Opt_sec_ntlmsspi:
- vol->secFlg |= CIFSSEC_MAY_NTLMSSP | CIFSSEC_MUST_SIGN;
- break;
- case Opt_ntlm:
- /* ntlm is default so can be turned off too */
- vol->secFlg |= CIFSSEC_MAY_NTLM;
+ vol->sign = true;
+ /* Fallthrough */
+ case Opt_sec_ntlmssp:
+ vol->sectype = RawNTLMSSP;
break;
case Opt_sec_ntlmi:
- vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN;
- break;
- case Opt_sec_ntlmv2:
- vol->secFlg |= CIFSSEC_MAY_NTLMV2;
+ vol->sign = true;
+ /* Fallthrough */
+ case Opt_ntlm:
+ vol->sectype = NTLM;
break;
case Opt_sec_ntlmv2i:
- vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN;
+ vol->sign = true;
+ /* Fallthrough */
+ case Opt_sec_ntlmv2:
+ vol->sectype = NTLMv2;
break;
#ifdef CONFIG_CIFS_WEAK_PW_HASH
case Opt_sec_lanman:
- vol->secFlg |= CIFSSEC_MAY_LANMAN;
+ vol->sectype = LANMAN;
break;
#endif
case Opt_sec_none:
vol->nullauth = 1;
- vol->secFlg |= CIFSSEC_MAY_NTLM;
break;
default:
cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -1119,6 +1125,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
vol->ops = &smb30_operations;
vol->vals = &smb30_values;
break;
+ case Smb_302:
+ vol->ops = &smb30_operations; /* currently identical with 3.0 */
+ vol->vals = &smb302_values;
+ break;
#endif
default:
cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
@@ -1424,7 +1434,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->local_lease = 1;
break;
case Opt_sign:
- vol->secFlg |= CIFSSEC_MUST_SIGN;
+ vol->sign = true;
break;
case Opt_seal:
/* we do not do the following in secFlags because seal
@@ -1455,6 +1465,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
case Opt_sloppy:
sloppy = true;
break;
+ case Opt_nosharesock:
+ vol->nosharesock = true;
+ break;
/* Numeric Values */
case Opt_backupuid:
@@ -1978,47 +1991,21 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr,
static bool
match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
{
- unsigned int secFlags;
-
- if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
- secFlags = vol->secFlg;
- else
- secFlags = global_secflags | vol->secFlg;
-
- switch (server->secType) {
- case LANMAN:
- if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT)))
- return false;
- break;
- case NTLMv2:
- if (!(secFlags & CIFSSEC_MAY_NTLMV2))
- return false;
- break;
- case NTLM:
- if (!(secFlags & CIFSSEC_MAY_NTLM))
- return false;
- break;
- case Kerberos:
- if (!(secFlags & CIFSSEC_MAY_KRB5))
- return false;
- break;
- case RawNTLMSSP:
- if (!(secFlags & CIFSSEC_MAY_NTLMSSP))
- return false;
- break;
- default:
- /* shouldn't happen */
+ /*
+ * The select_sectype function should either return the vol->sectype
+ * that was specified, or "Unspecified" if that sectype was not
+ * compatible with the given NEGOTIATE request.
+ */
+ if (select_sectype(server, vol->sectype) == Unspecified)
return false;
- }
- /* now check if signing mode is acceptable */
- if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
- (server->sec_mode & SECMODE_SIGN_REQUIRED))
- return false;
- else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
- (server->sec_mode &
- (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
- return false;
+ /*
+ * Now check if signing mode is acceptable. No need to check
+ * global_secflags at this point since if MUST_SIGN is set then
+ * the server->sign had better be too.
+ */
+ if (vol->sign && !server->sign)
+ return false;
return true;
}
@@ -2027,6 +2014,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
{
struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr;
+ if (vol->nosharesock)
+ return 0;
+
if ((server->vals != vol->vals) || (server->ops != vol->ops))
return 0;
@@ -2216,7 +2206,11 @@ out_err:
static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
{
- switch (ses->server->secType) {
+ if (vol->sectype != Unspecified &&
+ vol->sectype != ses->sectype)
+ return 0;
+
+ switch (ses->sectype) {
case Kerberos:
if (!uid_eq(vol->cred_uid, ses->cred_uid))
return 0;
@@ -2493,7 +2487,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
ses->cred_uid = volume_info->cred_uid;
ses->linux_uid = volume_info->linux_uid;
- ses->overrideSecFlg = volume_info->secFlg;
+ ses->sectype = volume_info->sectype;
+ ses->sign = volume_info->sign;
mutex_lock(&ses->session_mutex);
rc = cifs_negotiate_protocol(xid, ses);
@@ -3656,7 +3651,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
NTLMv2 password here) */
#ifdef CONFIG_CIFS_WEAK_PW_HASH
if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
- (ses->server->secType == LANMAN))
+ (ses->sectype == LANMAN))
calc_lanman_hash(tcon->password, ses->server->cryptkey,
ses->server->sec_mode &
SECMODE_PW_ENCRYPT ? true : false,
@@ -3674,8 +3669,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
}
}
- if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ if (ses->server->sign)
smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
if (ses->capabilities & CAP_STATUS32) {
@@ -3738,7 +3732,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
}
bcc_ptr += length + 1;
bytes_left -= (length + 1);
- strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
+ strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
/* mostly informational -- no need to fail on error here */
kfree(tcon->nativeFileSystem);
@@ -3827,7 +3821,6 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
int rc = -ENOSYS;
struct TCP_Server_Info *server = ses->server;
- ses->flags = 0;
ses->capabilities = server->capabilities;
if (linuxExtEnabled == 0)
ses->capabilities &= (~server->vals->cap_unix);
@@ -3848,6 +3841,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
server->sequence_number = 0x2;
server->session_estab = true;
ses->auth_key.response = NULL;
+ if (server->ops->generate_signingkey)
+ server->ops->generate_signingkey(server);
}
mutex_unlock(&server->srv_mutex);
@@ -3870,23 +3865,11 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
static int
cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses)
{
- switch (ses->server->secType) {
- case Kerberos:
- vol->secFlg = CIFSSEC_MUST_KRB5;
+ vol->sectype = ses->sectype;
+
+ /* krb5 is special, since we don't need username or pw */
+ if (vol->sectype == Kerberos)
return 0;
- case NTLMv2:
- vol->secFlg = CIFSSEC_MUST_NTLMV2;
- break;
- case NTLM:
- vol->secFlg = CIFSSEC_MUST_NTLM;
- break;
- case RawNTLMSSP:
- vol->secFlg = CIFSSEC_MUST_NTLMSSP;
- break;
- case LANMAN:
- vol->secFlg = CIFSSEC_MUST_LANMAN;
- break;
- }
return cifs_set_cifscreds(vol, ses);
}
@@ -3912,6 +3895,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->nocase = master_tcon->nocase;
vol_info->local_lease = master_tcon->local_lease;
vol_info->no_linux_ext = !master_tcon->unix_ext;
+ vol_info->sectype = master_tcon->ses->sectype;
+ vol_info->sign = master_tcon->ses->sign;
rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
if (rc) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 5699b5036ed8..5175aebf6737 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -822,8 +822,7 @@ const struct dentry_operations cifs_dentry_ops = {
/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
};
-static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
- struct qstr *q)
+static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q)
{
struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
unsigned long hash;
@@ -838,12 +837,10 @@ static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
return 0;
}
-static int cifs_ci_compare(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int cifs_ci_compare(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
- struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
+ struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls;
if ((name->len == len) &&
(nls_strnicmp(codepage, name->name, str, len) == 0))
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4d8ba8d491e5..91d8629e69a2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -999,7 +999,7 @@ try_again:
rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
if (!rc)
goto try_again;
- locks_delete_block(flock);
+ posix_unblock_lock(flock);
}
return rc;
}
@@ -1092,6 +1092,7 @@ struct lock_to_push {
static int
cifs_push_posix_locks(struct cifsFileInfo *cfile)
{
+ struct inode *inode = cfile->dentry->d_inode;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock, **before;
unsigned int count = 0, i = 0;
@@ -1102,12 +1103,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
xid = get_xid();
- lock_flocks();
- cifs_for_each_lock(cfile->dentry->d_inode, before) {
+ spin_lock(&inode->i_lock);
+ cifs_for_each_lock(inode, before) {
if ((*before)->fl_flags & FL_POSIX)
count++;
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
INIT_LIST_HEAD(&locks_to_send);
@@ -1126,8 +1127,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
}
el = locks_to_send.next;
- lock_flocks();
- cifs_for_each_lock(cfile->dentry->d_inode, before) {
+ spin_lock(&inode->i_lock);
+ cifs_for_each_lock(inode, before) {
flock = *before;
if ((flock->fl_flags & FL_POSIX) == 0)
continue;
@@ -1152,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
lck->offset = flock->fl_start;
el = el->next;
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
int stored_rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1bec014779fd..f7d4b2285efe 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -267,8 +267,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
if (treeCon->nocase)
buffer->Flags |= SMBFLG_CASELESS;
if ((treeCon->ses) && (treeCon->ses->server))
- if (treeCon->ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ if (treeCon->ses->server->sign)
buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f1213799de1a..ab8778469394 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -126,6 +126,22 @@ out:
dput(dentry);
}
+/*
+ * Is it possible that this directory might turn out to be a DFS referral
+ * once we go to try and use it?
+ */
+static bool
+cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb)
+{
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+
+ if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
+ return true;
+#endif
+ return false;
+}
+
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -135,6 +151,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
+ /*
+ * Windows CIFS servers generally make DFS referrals look
+ * like directories in FIND_* responses with the reparse
+ * attribute flag also set (since DFS junctions are
+ * reparse points). We must revalidate at least these
+ * directory inodes before trying to use them (if
+ * they are DFS we will get PATH_NOT_COVERED back
+ * when queried directly and can then try to connect
+ * to the DFS target)
+ */
+ if (cifs_dfs_is_possible(cifs_sb) &&
+ (fattr->cf_cifsattrs & ATTR_REPARSE))
+ fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index f230571a7ab3..79358e341fd2 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -138,8 +138,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
- if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ if (ses->server->sign)
pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
if (ses->capabilities & CAP_UNICODE) {
@@ -310,11 +309,10 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses,
return;
}
-static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
- struct cifs_ses *ses,
- const struct nls_table *nls_cp)
+static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
+ struct cifs_ses *ses,
+ const struct nls_table *nls_cp)
{
- int rc = 0;
int len;
char *bcc_ptr = *pbcc_area;
@@ -322,24 +320,22 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
len = strnlen(bcc_ptr, bleft);
if (len >= bleft)
- return rc;
+ return;
kfree(ses->serverOS);
ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
if (ses->serverOS)
strncpy(ses->serverOS, bcc_ptr, len);
- if (strncmp(ses->serverOS, "OS/2", 4) == 0) {
+ if (strncmp(ses->serverOS, "OS/2", 4) == 0)
cifs_dbg(FYI, "OS/2 server\n");
- ses->flags |= CIFS_SES_OS2;
- }
bcc_ptr += len + 1;
bleft -= len + 1;
len = strnlen(bcc_ptr, bleft);
if (len >= bleft)
- return rc;
+ return;
kfree(ses->serverNOS);
@@ -352,7 +348,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
len = strnlen(bcc_ptr, bleft);
if (len > bleft)
- return rc;
+ return;
/* No domain field in LANMAN case. Domain is
returned by old servers in the SMB negprot response */
@@ -360,8 +356,6 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
but thus do return domain here we could add parsing
for it later, but it is not very important */
cifs_dbg(FYI, "ascii: bytes left %d\n", bleft);
-
- return rc;
}
int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
@@ -432,8 +426,7 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
- if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+ if (ses->server->sign) {
flags |= NTLMSSP_NEGOTIATE_SIGN;
if (!ses->server->session_estab)
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
@@ -471,8 +464,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
- if (ses->server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+ if (ses->server->sign) {
flags |= NTLMSSP_NEGOTIATE_SIGN;
if (!ses->server->session_estab)
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
@@ -558,6 +550,56 @@ setup_ntlmv2_ret:
return rc;
}
+enum securityEnum
+select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+{
+ switch (server->negflavor) {
+ case CIFS_NEGFLAVOR_EXTENDED:
+ switch (requested) {
+ case Kerberos:
+ case RawNTLMSSP:
+ return requested;
+ case Unspecified:
+ if (server->sec_ntlmssp &&
+ (global_secflags & CIFSSEC_MAY_NTLMSSP))
+ return RawNTLMSSP;
+ if ((server->sec_kerberos || server->sec_mskerberos) &&
+ (global_secflags & CIFSSEC_MAY_KRB5))
+ return Kerberos;
+ /* Fallthrough */
+ default:
+ return Unspecified;
+ }
+ case CIFS_NEGFLAVOR_UNENCAP:
+ switch (requested) {
+ case NTLM:
+ case NTLMv2:
+ return requested;
+ case Unspecified:
+ if (global_secflags & CIFSSEC_MAY_NTLMV2)
+ return NTLMv2;
+ if (global_secflags & CIFSSEC_MAY_NTLM)
+ return NTLM;
+ /* Fallthrough */
+ default:
+ return Unspecified;
+ }
+ case CIFS_NEGFLAVOR_LANMAN:
+ switch (requested) {
+ case LANMAN:
+ return requested;
+ case Unspecified:
+ if (global_secflags & CIFSSEC_MAY_LANMAN)
+ return LANMAN;
+ /* Fallthrough */
+ default:
+ return Unspecified;
+ }
+ default:
+ return Unspecified;
+ }
+}
+
int
CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_cp)
@@ -579,11 +621,18 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
u16 blob_len;
char *ntlmsspblob = NULL;
- if (ses == NULL)
+ if (ses == NULL) {
+ WARN(1, "%s: ses == NULL!", __func__);
return -EINVAL;
+ }
- type = ses->server->secType;
+ type = select_sectype(ses->server, ses->sectype);
cifs_dbg(FYI, "sess setup type %d\n", type);
+ if (type == Unspecified) {
+ cifs_dbg(VFS, "Unable to select appropriate authentication method!");
+ return -EINVAL;
+ }
+
if (type == RawNTLMSSP) {
/* if memory allocation is successful, caller of this function
* frees it.
@@ -643,8 +692,6 @@ ssetup_ntlmssp_authenticate:
}
bcc_ptr = str_area;
- ses->flags &= ~CIFS_SES_LANMAN;
-
iov[1].iov_base = NULL;
iov[1].iov_len = 0;
@@ -668,7 +715,6 @@ ssetup_ntlmssp_authenticate:
ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
true : false, lnm_session_key);
- ses->flags |= CIFS_SES_LANMAN;
memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
bcc_ptr += CIFS_AUTH_RESP_SIZE;
@@ -938,8 +984,7 @@ ssetup_ntlmssp_authenticate:
}
decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp);
} else {
- rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining,
- ses, nls_cp);
+ decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp);
}
ssetup_exit:
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3efdb9d5c0b8..e813f04511d8 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -449,8 +449,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
* WRITEX header, not including the 4 byte RFC1001 length.
*/
if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
- (!(server->capabilities & CAP_UNIX) &&
- (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
+ (!(server->capabilities & CAP_UNIX) && server->sign))
wsize = min_t(unsigned int, wsize,
server->maxBuf - sizeof(WRITE_REQ) + 4);
@@ -765,20 +764,14 @@ smb_set_file_info(struct inode *inode, const char *full_path,
}
tcon = tlink_tcon(tlink);
- /*
- * NT4 apparently returns success on this call, but it doesn't really
- * work.
- */
- if (!(tcon->ses->flags & CIFS_SES_NT4)) {
- rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf,
- cifs_sb->local_nls,
+ rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
- goto out;
- } else if (rc != -EOPNOTSUPP && rc != -EINVAL)
- goto out;
+ if (rc == 0) {
+ cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
+ goto out;
+ } else if (rc != -EOPNOTSUPP && rc != -EINVAL) {
+ goto out;
}
cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n");
@@ -964,4 +957,6 @@ struct smb_version_values smb1_values = {
.cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
.cap_large_files = CAP_LARGE_FILES,
.oplock_read = OPLOCK_READ,
+ .signing_enabled = SECMODE_SIGN_ENABLED,
+ .signing_required = SECMODE_SIGN_REQUIRED,
};
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 7c0e2143e775..c38350851b08 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -54,5 +54,7 @@
#define SMB2_SIGNATURE_SIZE (16)
#define SMB2_NTLMV2_SESSKEY_SIZE (16)
#define SMB2_HMACSHA256_SIZE (32)
+#define SMB2_CMACAES_SIZE (16)
+#define SMB3_SIGNKEY_SIZE (16)
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 10383d8c015b..b0c43345cd98 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -266,6 +266,10 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength);
break;
case SMB2_IOCTL:
+ *off = le32_to_cpu(
+ ((struct smb2_ioctl_rsp *)hdr)->OutputOffset);
+ *len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount);
+ break;
case SMB2_CHANGE_NOTIFY:
default:
/* BB FIXME for unimplemented cases above */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f2e76f3b0c61..6d15cab95b99 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -281,6 +281,25 @@ smb2_clear_stats(struct cifs_tcon *tcon)
}
static void
+smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
+{
+ seq_puts(m, "\n\tShare Capabilities:");
+ if (tcon->capabilities & SMB2_SHARE_CAP_DFS)
+ seq_puts(m, " DFS,");
+ if (tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY)
+ seq_puts(m, " CONTINUOUS AVAILABILITY,");
+ if (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT)
+ seq_puts(m, " SCALEOUT,");
+ if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER)
+ seq_puts(m, " CLUSTER,");
+ if (tcon->capabilities & SMB2_SHARE_CAP_ASYMMETRIC)
+ seq_puts(m, " ASYMMETRIC,");
+ if (tcon->capabilities == 0)
+ seq_puts(m, " None");
+ seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags);
+}
+
+static void
smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
{
#ifdef CONFIG_CIFS_STATS
@@ -292,7 +311,6 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
seq_printf(m, "\nSessionSetups: %d sent %d failed",
atomic_read(&sent[SMB2_SESSION_SETUP_HE]),
atomic_read(&failed[SMB2_SESSION_SETUP_HE]));
-#define SMB2LOGOFF 0x0002 /* trivial request/resp */
seq_printf(m, "\nLogoffs: %d sent %d failed",
atomic_read(&sent[SMB2_LOGOFF_HE]),
atomic_read(&failed[SMB2_LOGOFF_HE]));
@@ -645,6 +663,7 @@ struct smb_version_operations smb30_operations = {
.dump_detail = smb2_dump_detail,
.clear_stats = smb2_clear_stats,
.print_stats = smb2_print_stats,
+ .dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
@@ -690,6 +709,7 @@ struct smb_version_operations smb30_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
+ .generate_signingkey = generate_smb3signingkey,
.calc_signature = smb3_calc_signature,
};
@@ -709,6 +729,8 @@ struct smb_version_values smb20_values = {
.cap_nt_find = SMB2_NT_FIND,
.cap_large_files = SMB2_LARGE_FILES,
.oplock_read = SMB2_OPLOCK_LEVEL_II,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
};
struct smb_version_values smb21_values = {
@@ -727,6 +749,8 @@ struct smb_version_values smb21_values = {
.cap_nt_find = SMB2_NT_FIND,
.cap_large_files = SMB2_LARGE_FILES,
.oplock_read = SMB2_OPLOCK_LEVEL_II,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
};
struct smb_version_values smb30_values = {
@@ -745,4 +769,26 @@ struct smb_version_values smb30_values = {
.cap_nt_find = SMB2_NT_FIND,
.cap_large_files = SMB2_LARGE_FILES,
.oplock_read = SMB2_OPLOCK_LEVEL_II,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+};
+
+struct smb_version_values smb302_values = {
+ .version_string = SMB302_VERSION_STRING,
+ .protocol_id = SMB302_PROT_ID,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .oplock_read = SMB2_OPLOCK_LEVEL_II,
+ .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+ .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
};
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2b95ce2b54e8..2b312e4eeaa6 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1,7 +1,7 @@
/*
* fs/cifs/smb2pdu.c
*
- * Copyright (C) International Business Machines Corp., 2009, 2012
+ * Copyright (C) International Business Machines Corp., 2009, 2013
* Etersoft, 2012
* Author(s): Steve French (sfrench@us.ibm.com)
* Pavel Shilovsky (pshilovsky@samba.org) 2012
@@ -108,19 +108,33 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
if (!tcon)
goto out;
+ /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */
+ /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
+ /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
+ if ((tcon->ses) &&
+ (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ hdr->CreditCharge = cpu_to_le16(1);
+ /* else CreditCharge MBZ */
+
hdr->TreeId = tcon->tid;
/* Uid is not converted */
if (tcon->ses)
hdr->SessionId = tcon->ses->Suid;
- /* BB check following DFS flags BB */
- /* BB do we have to add check for SHI1005_FLAGS_DFS_ROOT too? */
- if (tcon->share_flags & SHI1005_FLAGS_DFS)
- hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS;
- /* BB how does SMB2 do case sensitive? */
- /* if (tcon->nocase)
- hdr->Flags |= SMBFLG_CASELESS; */
- if (tcon->ses && tcon->ses->server &&
- (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED))
+
+ /*
+ * If we would set SMB2_FLAGS_DFS_OPERATIONS on open we also would have
+ * to pass the path on the Open SMB prefixed by \\server\share.
+ * Not sure when we would need to do the augmented path (if ever) and
+ * setting this flag breaks the SMB2 open operation since it is
+ * illegal to send an empty path name (without \\server\share prefix)
+ * when the DFS flag is set in the SMB open header. We could
+ * consider setting the flag on all operations other than open
+ * but it is safer to net set it for now.
+ */
+/* if (tcon->share_flags & SHI1005_FLAGS_DFS)
+ hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */
+
+ if (tcon->ses && tcon->ses->server && tcon->ses->server->sign)
hdr->Flags |= SMB2_FLAGS_SIGNED;
out:
pdu->StructureSize2 = cpu_to_le16(parmsize);
@@ -328,34 +342,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
struct kvec iov[1];
int rc = 0;
int resp_buftype;
- struct TCP_Server_Info *server;
- unsigned int sec_flags;
- u16 temp = 0;
+ struct TCP_Server_Info *server = ses->server;
int blob_offset, blob_length;
char *security_blob;
int flags = CIFS_NEG_OP;
cifs_dbg(FYI, "Negotiate protocol\n");
- if (ses->server)
- server = ses->server;
- else {
- rc = -EIO;
- return rc;
+ if (!server) {
+ WARN(1, "%s: server is NULL!\n", __func__);
+ return -EIO;
}
rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req);
if (rc)
return rc;
- /* if any of auth flags (ie not sign or seal) are overriden use them */
- if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
- sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/
- else /* if override flags set only sign/seal OR them with global auth */
- sec_flags = global_secflags | ses->overrideSecFlg;
-
- cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags);
-
req->hdr.SessionId = 0;
req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
@@ -364,12 +366,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
inc_rfc1001_len(req, 2);
/* only one of SMB2 signing flags may be set in SMB2 request */
- if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN)
- temp = SMB2_NEGOTIATE_SIGNING_REQUIRED;
- else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */
- temp = SMB2_NEGOTIATE_SIGNING_ENABLED;
-
- req->SecurityMode = cpu_to_le16(temp);
+ if (ses->sign)
+ req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
+ else if (global_secflags & CIFSSEC_MAY_SIGN)
+ req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
+ else
+ req->SecurityMode = 0;
req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities);
@@ -399,6 +401,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
cifs_dbg(FYI, "negotiated smb2.1 dialect\n");
else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID))
cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
+ else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID))
+ cifs_dbg(FYI, "negotiated smb3.02 dialect\n");
else {
cifs_dbg(VFS, "Illegal dialect returned by server %d\n",
le16_to_cpu(rsp->DialectRevision));
@@ -407,6 +411,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
}
server->dialect = le16_to_cpu(rsp->DialectRevision);
+ /* SMB2 only has an extended negflavor */
+ server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
server->maxBuf = le32_to_cpu(rsp->MaxTransactSize);
server->max_read = le32_to_cpu(rsp->MaxReadSize);
server->max_write = le32_to_cpu(rsp->MaxWriteSize);
@@ -418,44 +424,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
&rsp->hdr);
- if (blob_length == 0) {
- cifs_dbg(VFS, "missing security blob on negprot\n");
- rc = -EIO;
- goto neg_exit;
- }
-
- cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags);
- if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
- cifs_dbg(FYI, "Signing required\n");
- if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED |
- SMB2_NEGOTIATE_SIGNING_ENABLED))) {
- cifs_dbg(VFS, "signing required but server lacks support\n");
- rc = -EOPNOTSUPP;
- goto neg_exit;
- }
- server->sec_mode |= SECMODE_SIGN_REQUIRED;
- } else if (sec_flags & CIFSSEC_MAY_SIGN) {
- cifs_dbg(FYI, "Signing optional\n");
- if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
- cifs_dbg(FYI, "Server requires signing\n");
- server->sec_mode |= SECMODE_SIGN_REQUIRED;
- } else {
- server->sec_mode &=
- ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
- }
- } else {
- cifs_dbg(FYI, "Signing disabled\n");
- if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
- cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n");
- rc = -EOPNOTSUPP;
- goto neg_exit;
- }
- server->sec_mode &=
- ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
- }
+ /*
+ * See MS-SMB2 section 2.2.4: if no blob, client picks default which
+ * for us will be
+ * ses->sectype = RawNTLMSSP;
+ * but for time being this is our only auth choice so doesn't matter.
+ * We just found a server which sets blob length to zero expecting raw.
+ */
+ if (blob_length == 0)
+ cifs_dbg(FYI, "missing security blob on negprot\n");
+ rc = cifs_enable_signing(server, ses->sign);
#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */
- rc = decode_neg_token_init(security_blob, blob_length,
+ if (rc)
+ goto neg_exit;
+ if (blob_length)
+ rc = decode_neg_token_init(security_blob, blob_length,
&server->sec_type);
if (rc == 1)
rc = 0;
@@ -480,9 +464,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
int rc = 0;
int resp_buftype;
__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
- struct TCP_Server_Info *server;
- unsigned int sec_flags;
- u8 temp = 0;
+ struct TCP_Server_Info *server = ses->server;
u16 blob_length = 0;
char *security_blob;
char *ntlmssp_blob = NULL;
@@ -490,11 +472,9 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
cifs_dbg(FYI, "Session Setup\n");
- if (ses->server)
- server = ses->server;
- else {
- rc = -EIO;
- return rc;
+ if (!server) {
+ WARN(1, "%s: server is NULL!\n", __func__);
+ return -EIO;
}
/*
@@ -505,7 +485,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
if (!ses->ntlmssp)
return -ENOMEM;
- ses->server->secType = RawNTLMSSP;
+ /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
+ ses->sectype = RawNTLMSSP;
ssetup_ntlmssp_authenticate:
if (phase == NtLmChallenge)
@@ -515,28 +496,19 @@ ssetup_ntlmssp_authenticate:
if (rc)
return rc;
- /* if any of auth flags (ie not sign or seal) are overriden use them */
- if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
- sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/
- else /* if override flags set only sign/seal OR them with global auth */
- sec_flags = global_secflags | ses->overrideSecFlg;
-
- cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags);
-
req->hdr.SessionId = 0; /* First session, not a reauthenticate */
req->VcNumber = 0; /* MBZ */
/* to enable echos and oplocks */
req->hdr.CreditRequest = cpu_to_le16(3);
/* only one of SMB2 signing flags may be set in SMB2 request */
- if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN)
- temp = SMB2_NEGOTIATE_SIGNING_REQUIRED;
- else if (ses->server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED)
- temp = SMB2_NEGOTIATE_SIGNING_REQUIRED;
- else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */
- temp = SMB2_NEGOTIATE_SIGNING_ENABLED;
-
- req->SecurityMode = temp;
+ if (server->sign)
+ req->SecurityMode = SMB2_NEGOTIATE_SIGNING_REQUIRED;
+ else if (global_secflags & CIFSSEC_MAY_SIGN) /* one flag unlike MUST_ */
+ req->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED;
+ else
+ req->SecurityMode = 0;
+
req->Capabilities = 0;
req->Channel = 0; /* MBZ */
@@ -679,7 +651,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
/* since no tcon, smb2_init can not do this, so do here */
req->hdr.SessionId = ses->Suid;
- if (server->sec_mode & SECMODE_SIGN_REQUIRED)
+ if (server->sign)
req->hdr.Flags |= SMB2_FLAGS_SIGNED;
rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0);
@@ -788,11 +760,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
}
tcon->share_flags = le32_to_cpu(rsp->ShareFlags);
+ tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
tcon->tidStatus = CifsGood;
tcon->need_reconnect = false;
tcon->tid = rsp->hdr.TreeId;
- strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
+ strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
@@ -1036,6 +1009,122 @@ creat_exit:
return rc;
}
+/*
+ * SMB2 IOCTL is used for both IOCTLs and FSCTLs
+ */
+int
+SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
+ u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data,
+ u32 indatalen, char **out_data, u32 *plen /* returned data len */)
+{
+ struct smb2_ioctl_req *req;
+ struct smb2_ioctl_rsp *rsp;
+ struct TCP_Server_Info *server;
+ struct cifs_ses *ses = tcon->ses;
+ struct kvec iov[2];
+ int resp_buftype;
+ int num_iovecs;
+ int rc = 0;
+
+ cifs_dbg(FYI, "SMB2 IOCTL\n");
+
+ /* zero out returned data len, in case of error */
+ if (plen)
+ *plen = 0;
+
+ if (ses && (ses->server))
+ server = ses->server;
+ else
+ return -EIO;
+
+ rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req);
+ if (rc)
+ return rc;
+
+ req->CtlCode = cpu_to_le32(opcode);
+ req->PersistentFileId = persistent_fid;
+ req->VolatileFileId = volatile_fid;
+
+ if (indatalen) {
+ req->InputCount = cpu_to_le32(indatalen);
+ /* do not set InputOffset if no input data */
+ req->InputOffset =
+ cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4);
+ iov[1].iov_base = in_data;
+ iov[1].iov_len = indatalen;
+ num_iovecs = 2;
+ } else
+ num_iovecs = 1;
+
+ req->OutputOffset = 0;
+ req->OutputCount = 0; /* MBZ */
+
+ /*
+ * Could increase MaxOutputResponse, but that would require more
+ * than one credit. Windows typically sets this smaller, but for some
+ * ioctls it may be useful to allow server to send more. No point
+ * limiting what the server can send as long as fits in one credit
+ */
+ req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */
+
+ if (is_fsctl)
+ req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
+ else
+ req->Flags = 0;
+
+ iov[0].iov_base = (char *)req;
+ /* 4 for rfc1002 length field */
+ iov[0].iov_len = get_rfc1002_length(req) + 4;
+
+ if (indatalen)
+ inc_rfc1001_len(req, indatalen);
+
+ rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
+ rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
+
+ if (rc != 0) {
+ if (tcon)
+ cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+ goto ioctl_exit;
+ }
+
+ /* check if caller wants to look at return data or just return rc */
+ if ((plen == NULL) || (out_data == NULL))
+ goto ioctl_exit;
+
+ *plen = le32_to_cpu(rsp->OutputCount);
+
+ /* We check for obvious errors in the output buffer length and offset */
+ if (*plen == 0)
+ goto ioctl_exit; /* server returned no data */
+ else if (*plen > 0xFF00) {
+ cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", *plen);
+ *plen = 0;
+ rc = -EIO;
+ goto ioctl_exit;
+ }
+
+ if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) {
+ cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen,
+ le32_to_cpu(rsp->OutputOffset));
+ *plen = 0;
+ rc = -EIO;
+ goto ioctl_exit;
+ }
+
+ *out_data = kmalloc(*plen, GFP_KERNEL);
+ if (*out_data == NULL) {
+ rc = -ENOMEM;
+ goto ioctl_exit;
+ }
+
+ memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+ *plen);
+ioctl_exit:
+ free_rsp_buf(resp_buftype, rsp);
+ return rc;
+}
+
int
SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid)
@@ -1384,8 +1473,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
case MID_RESPONSE_RECEIVED:
credits_received = le16_to_cpu(buf->CreditRequest);
/* result already set, check signature */
- if (server->sec_mode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+ if (server->sign) {
int rc;
rc = smb2_verify_signature(&rqst, server);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4cb4ced258cb..f31043b26bd3 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1,7 +1,7 @@
/*
* fs/cifs/smb2pdu.h
*
- * Copyright (c) International Business Machines Corp., 2009, 2010
+ * Copyright (c) International Business Machines Corp., 2009, 2013
* Etersoft, 2012
* Author(s): Steve French (sfrench@us.ibm.com)
* Pavel Shilovsky (pshilovsky@samba.org) 2012
@@ -170,6 +170,7 @@ struct smb2_negotiate_req {
#define SMB20_PROT_ID 0x0202
#define SMB21_PROT_ID 0x0210
#define SMB30_PROT_ID 0x0300
+#define SMB302_PROT_ID 0x0302
#define BAD_PROT_ID 0xFFFF
/* SecurityMode flags */
@@ -283,10 +284,17 @@ struct smb2_tree_connect_rsp {
#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400
#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800
#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000
-#define SHI1005_FLAGS_ENABLE_HASH 0x00002000
+#define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000
+#define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000
+#define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000
+#define SHI1005_FLAGS_ALL 0x0000FF33
/* Possible share capabilities */
-#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008)
+#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */
+#define SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY cpu_to_le32(0x00000010) /* 3.0 */
+#define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */
+#define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */
+#define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */
struct smb2_tree_disconnect_req {
struct smb2_hdr hdr;
@@ -477,6 +485,75 @@ struct create_lease {
struct lease_context lcontext;
} __packed;
+/* this goes in the ioctl buffer when doing a copychunk request */
+struct copychunk_ioctl {
+ char SourceKey[24];
+ __le32 ChunkCount; /* we are only sending 1 */
+ __le32 Reserved;
+ /* array will only be one chunk long for us */
+ __le64 SourceOffset;
+ __le64 TargetOffset;
+ __le32 Length; /* how many bytes to copy */
+ __u32 Reserved2;
+} __packed;
+
+/* Response and Request are the same format */
+struct validate_negotiate_info {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 DialectCount;
+ __le16 Dialect[1];
+} __packed;
+
+#define RSS_CAPABLE 0x00000001
+#define RDMA_CAPABLE 0x00000002
+
+struct network_interface_info_ioctl_rsp {
+ __le32 Next; /* next interface. zero if this is last one */
+ __le32 IfIndex;
+ __le32 Capability; /* RSS or RDMA Capable */
+ __le32 Reserved;
+ __le64 LinkSpeed;
+ char SockAddr_Storage[128];
+} __packed;
+
+#define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */
+
+struct smb2_ioctl_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 57 */
+ __u16 Reserved;
+ __le32 CtlCode;
+ __u64 PersistentFileId; /* opaque endianness */
+ __u64 VolatileFileId; /* opaque endianness */
+ __le32 InputOffset;
+ __le32 InputCount;
+ __le32 MaxInputResponse;
+ __le32 OutputOffset;
+ __le32 OutputCount;
+ __le32 MaxOutputResponse;
+ __le32 Flags;
+ __u32 Reserved2;
+ char Buffer[0];
+} __packed;
+
+struct smb2_ioctl_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 57 */
+ __u16 Reserved;
+ __le32 CtlCode;
+ __u64 PersistentFileId; /* opaque endianness */
+ __u64 VolatileFileId; /* opaque endianness */
+ __le32 InputOffset;
+ __le32 InputCount;
+ __le32 OutputOffset;
+ __le32 OutputCount;
+ __le32 Flags;
+ __u32 Reserved2;
+ /* char * buffer[] */
+} __packed;
+
/* Currently defined values for close flags */
#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001)
struct smb2_close_req {
@@ -517,17 +594,25 @@ struct smb2_flush_rsp {
__le16 Reserved;
} __packed;
+/* For read request Flags field below, following flag is defined for SMB3.02 */
+#define SMB2_READFLAG_READ_UNBUFFERED 0x01
+
+/* Channel field for read and write: exactly one of following flags can be set*/
+#define SMB2_CHANNEL_NONE 0x00000000
+#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */
+#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */
+
struct smb2_read_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 49 */
__u8 Padding; /* offset from start of SMB2 header to place read */
- __u8 Reserved;
+ __u8 Flags; /* MBZ unless SMB3.02 or later */
__le32 Length;
__le64 Offset;
__u64 PersistentFileId; /* opaque endianness */
__u64 VolatileFileId; /* opaque endianness */
__le32 MinimumCount;
- __le32 Channel; /* Reserved MBZ */
+ __le32 Channel; /* MBZ except for SMB3 or later */
__le32 RemainingBytes;
__le16 ReadChannelInfoOffset; /* Reserved MBZ */
__le16 ReadChannelInfoLength; /* Reserved MBZ */
@@ -545,8 +630,9 @@ struct smb2_read_rsp {
__u8 Buffer[1];
} __packed;
-/* For write request Flags field below the following flag is defined: */
-#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
+/* For write request Flags field below the following flags are defined: */
+#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 /* SMB2.1 or later */
+#define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */
struct smb2_write_req {
struct smb2_hdr hdr;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 2aa3535e38ce..d4e1eb807457 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -111,6 +111,10 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon,
__u32 desired_access, __u32 create_disposition,
__u32 file_attributes, __u32 create_options,
__u8 *oplock, struct smb2_file_all_info *buf);
+extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, u32 opcode,
+ bool is_fsctl, char *in_data, u32 indatalen,
+ char **out_data, u32 *plen /* returned data len */);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 01f0ac800780..09b4fbaadeb6 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -116,11 +116,155 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
return rc;
}
+void
+generate_smb3signingkey(struct TCP_Server_Info *server)
+{
+ unsigned char zero = 0x0;
+ __u8 i[4] = {0, 0, 0, 1};
+ __u8 L[4] = {0, 0, 0, 128};
+ int rc = 0;
+ unsigned char prfhash[SMB2_HMACSHA256_SIZE];
+ unsigned char *hashptr = prfhash;
+
+ memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
+ memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+
+ rc = crypto_shash_setkey(server->secmech.hmacsha256,
+ server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not set with session key\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ i, 4);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with n\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ "SMB2AESCMAC", 12);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ &zero, 1);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with zero\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ "SmbSign", 8);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ L, 4);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with L\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
+ hashptr);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
+ goto smb3signkey_ret;
+ }
+
+ memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+
+smb3signkey_ret:
+ return;
+}
+
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
- cifs_dbg(FYI, "smb3 signatures not supported yet\n");
- return -EOPNOTSUPP;
+ int i, rc;
+ unsigned char smb3_signature[SMB2_CMACAES_SIZE];
+ unsigned char *sigptr = smb3_signature;
+ struct kvec *iov = rqst->rq_iov;
+ int n_vec = rqst->rq_nvec;
+ struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+
+ memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE);
+ memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
+
+ rc = crypto_shash_setkey(server->secmech.cmacaes,
+ server->smb3signingkey, SMB2_CMACAES_SIZE);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
+ return rc;
+ }
+
+ rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__);
+ return rc;
+ }
+
+ for (i = 0; i < n_vec; i++) {
+ if (iov[i].iov_len == 0)
+ continue;
+ if (iov[i].iov_base == NULL) {
+ cifs_dbg(VFS, "null iovec entry");
+ return -EIO;
+ }
+ /*
+ * The first entry includes a length field (which does not get
+ * signed that occupies the first 4 bytes before the header).
+ */
+ if (i == 0) {
+ if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
+ break; /* nothing to sign or corrupt header */
+ rc =
+ crypto_shash_update(
+ &server->secmech.sdesccmacaes->shash,
+ iov[i].iov_base + 4, iov[i].iov_len - 4);
+ } else {
+ rc =
+ crypto_shash_update(
+ &server->secmech.sdesccmacaes->shash,
+ iov[i].iov_base, iov[i].iov_len);
+ }
+ if (rc) {
+ cifs_dbg(VFS, "%s: Couldn't update cmac aes with payload\n",
+ __func__);
+ return rc;
+ }
+ }
+
+ /* now hash over the rq_pages array */
+ for (i = 0; i < rqst->rq_npages; i++) {
+ struct kvec p_iov;
+
+ cifs_rqst_page_to_kvec(rqst, i, &p_iov);
+ crypto_shash_update(&server->secmech.sdesccmacaes->shash,
+ p_iov.iov_base, p_iov.iov_len);
+ kunmap(rqst->rq_pages[i]);
+ }
+
+ rc = crypto_shash_final(&server->secmech.sdesccmacaes->shash,
+ sigptr);
+ if (rc)
+ cifs_dbg(VFS, "%s: Could not generate cmac aes\n", __func__);
+
+ memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+
+ return rc;
}
/* must be called with server->srv_mutex held */
@@ -275,8 +419,7 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
dump_smb(mid->resp_buf, min_t(u32, 80, len));
/* convert the length into a more usable form */
- if ((len > 24) &&
- (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) {
+ if (len > 24 && server->sign) {
int rc;
rc = smb2_verify_signature(&rqst, server);
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index 7056b891e087..d952ee48f4dc 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -1,7 +1,7 @@
/*
* fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
*
- * Copyright (c) International Business Machines Corp., 2002,2009
+ * Copyright (c) International Business Machines Corp., 2002,2013
* Author(s): Steve French (sfrench@us.ibm.com)
*
* This library is free software; you can redistribute it and/or modify
@@ -22,7 +22,7 @@
/* IOCTL information */
/*
* List of ioctl/fsctl function codes that are or could be useful in the
- * future to remote clients like cifs or SMB2 client. There is probably
+ * future to remote clients like cifs or SMB2/SMB3 client. This is probably
* a slightly larger set of fsctls that NTFS local filesystem could handle,
* including the seven below that we do not have struct definitions for.
* Even with protocol definitions for most of these now available, we still
@@ -30,7 +30,13 @@
* remotely. Some of the following, such as the encryption/compression ones
* could be invoked from tools via a specialized hook into the VFS rather
* than via the standard vfs entry points
+ *
+ * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are
+ * below). Additional detail on less common ones can be found in MS-FSCC
+ * section 2.3.
*/
+#define FSCTL_DFS_GET_REFERRALS 0x00060194
+#define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0
#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
@@ -71,14 +77,31 @@
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
+#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
#define FSCTL_SIS_LINK_FILES 0x0009C104
#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
/* strange that the number for this op is not sequential with previous op */
#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
+/* Enumerate previous versions of a file */
+#define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064
+/* Retrieve an opaque file reference for server-side data movement ie copy */
+#define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078
+#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */
#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */
+/* Perform server-side data movement */
+#define FSCTL_SRV_COPYCHUNK 0x001440F2
+#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
+#define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */
+#define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */
#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
#define IO_REPARSE_TAG_HSM 0xC0000004
#define IO_REPARSE_TAG_SIS 0x80000007
+
+/* fsctl flags */
+/* If Flags is set to this value, the request is an FSCTL not ioctl request */
+#define SMB2_0_IOCTL_IS_FSCTL 0x00000001
+
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bfbf4700d160..6fdcb1b4a106 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
{
int error;
- error = wait_event_freezekillable(server->response_q,
+ error = wait_event_freezekillable_unsafe(server->response_q,
midQ->mid_state != MID_REQUEST_SUBMITTED);
if (error < 0)
return -ERESTARTSYS;
@@ -463,7 +463,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
struct mid_q_entry *mid;
/* enable signing if server requires it */
- if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ if (server->sign)
hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
mid = AllocMidQEntry(hdr, server);
@@ -612,7 +612,7 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
dump_smb(mid->resp_buf, min_t(u32, 92, len));
/* convert the length into a more usable form */
- if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+ if (server->sign) {
struct kvec iov;
int rc = 0;
struct smb_rqst rqst = { .rq_iov = &iov,
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 87e0ee9f4465..14a14808320c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -487,13 +487,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
/* skip null entries */
if (vdir->d_fileno && name.len) {
- /* try to look up this entry in the dcache, that way
- * userspace doesn't have to worry about breaking
- * getcwd by having mismatched inode numbers for
- * internal volume mountpoints. */
- ino = find_inode_number(de, &name);
- if (!ino) ino = vdir->d_fileno;
-
+ ino = vdir->d_fileno;
type = CDT2DT(vdir->d_type);
if (!dir_emit(ctx, name.name, name.len, ino, type))
break;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 2b6cb23dd14e..1d1c41f1014d 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -203,7 +203,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
mutex_lock(&buffer->mutex);
len = fill_write_buffer(buffer, buf, count);
if (len > 0)
- len = flush_write_buffer(file->f_path.dentry, buffer, count);
+ len = flush_write_buffer(file->f_path.dentry, buffer, len);
if (len > 0)
*ppos += len;
mutex_unlock(&buffer->mutex);
diff --git a/fs/coredump.c b/fs/coredump.c
index dafafbafa731..72f816d6cad9 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -45,69 +45,79 @@
#include <trace/events/sched.h>
int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
unsigned int core_pipe_limit;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+static int core_name_size = CORENAME_MAX_SIZE;
struct core_name {
char *corename;
int used, size;
};
-static atomic_t call_count = ATOMIC_INIT(1);
/* The maximal length of core_pattern is also specified in sysctl.c */
-static int expand_corename(struct core_name *cn)
+static int expand_corename(struct core_name *cn, int size)
{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+ char *corename = krealloc(cn->corename, size, GFP_KERNEL);
- if (!cn->corename) {
- kfree(old_corename);
+ if (!corename)
return -ENOMEM;
- }
+ if (size > core_name_size) /* racy but harmless */
+ core_name_size = size;
+
+ cn->size = ksize(corename);
+ cn->corename = corename;
return 0;
}
+static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg)
+{
+ int free, need;
+
+again:
+ free = cn->size - cn->used;
+ need = vsnprintf(cn->corename + cn->used, free, fmt, arg);
+ if (need < free) {
+ cn->used += need;
+ return 0;
+ }
+
+ if (!expand_corename(cn, cn->size + need - free + 1))
+ goto again;
+
+ return -ENOMEM;
+}
+
static int cn_printf(struct core_name *cn, const char *fmt, ...)
{
- char *cur;
- int need;
- int ret;
va_list arg;
+ int ret;
va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
+ ret = cn_vprintf(cn, fmt, arg);
va_end(arg);
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
+ return ret;
+}
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
+static int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
+{
+ int cur = cn->used;
+ va_list arg;
+ int ret;
-out_printf:
- cur = cn->corename + cn->used;
va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
+ ret = cn_vprintf(cn, fmt, arg);
va_end(arg);
- cn->used += need;
- return 0;
-expand_fail:
+ for (; cur < cn->used; ++cur) {
+ if (cn->corename[cur] == '/')
+ cn->corename[cur] = '!';
+ }
return ret;
}
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
static int cn_print_exe_file(struct core_name *cn)
{
struct file *exe_file;
@@ -115,12 +125,8 @@ static int cn_print_exe_file(struct core_name *cn)
int ret;
exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
+ if (!exe_file)
+ return cn_esc_printf(cn, "%s (path unknown)", current->comm);
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
if (!pathbuf) {
@@ -134,9 +140,7 @@ static int cn_print_exe_file(struct core_name *cn)
goto free_buf;
}
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
+ ret = cn_esc_printf(cn, "%s", path);
free_buf:
kfree(pathbuf);
@@ -157,19 +161,19 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
int pid_in_pattern = 0;
int err = 0;
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
cn->used = 0;
-
- if (!cn->corename)
+ cn->corename = NULL;
+ if (expand_corename(cn, core_name_size))
return -ENOMEM;
+ cn->corename[0] = '\0';
+
+ if (ispipe)
+ ++pat_ptr;
/* Repeat as long as we have more pattern to process and more output
space */
while (*pat_ptr) {
if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
err = cn_printf(cn, "%c", *pat_ptr++);
} else {
switch (*++pat_ptr) {
@@ -210,22 +214,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
break;
}
/* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
+ case 'h':
down_read(&uts_sem);
- err = cn_printf(cn, "%s",
+ err = cn_esc_printf(cn, "%s",
utsname()->nodename);
up_read(&uts_sem);
- cn_escape(namestart);
break;
- }
/* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
+ case 'e':
+ err = cn_esc_printf(cn, "%s", current->comm);
break;
- }
case 'E':
err = cn_print_exe_file(cn);
break;
@@ -244,6 +242,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
return err;
}
+out:
/* Backward compatibility with core_uses_pid:
*
* If core_pattern does not include a %p (as is the default)
@@ -254,7 +253,6 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
if (err)
return err;
}
-out:
return ispipe;
}
@@ -549,7 +547,7 @@ void do_coredump(siginfo_t *siginfo)
if (ispipe < 0) {
printk(KERN_WARNING "format_corename failed\n");
printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
+ goto fail_unlock;
}
if (cprm.limit == 1) {
@@ -584,7 +582,7 @@ void do_coredump(siginfo_t *siginfo)
goto fail_dropcount;
}
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+ helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
if (!helper_argv) {
printk(KERN_WARNING "%s failed to allocate memory\n",
__func__);
@@ -601,7 +599,7 @@ void do_coredump(siginfo_t *siginfo)
argv_free(helper_argv);
if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
+ printk(KERN_INFO "Core dump to |%s pipe failed\n",
cn.corename);
goto close_fail;
}
@@ -669,7 +667,6 @@ fail_dropcount:
atomic_dec(&core_dump_count);
fail_unlock:
kfree(cn.corename);
-fail_corename:
coredump_finish(mm, core_dumped);
revert_creds(old_cred);
fail_creds:
diff --git a/fs/dcache.c b/fs/dcache.c
index 5a23073138df..87bdb5329c3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1730,7 +1730,7 @@ EXPORT_SYMBOL(d_add_ci);
* Do the slow-case of the dentry name compare.
*
* Unlike the dentry_cmp() function, we need to atomically
- * load the name, length and inode information, so that the
+ * load the name and length information, so that the
* filesystem can rely on them, and can use the 'name' and
* 'len' information without worrying about walking off the
* end of memory etc.
@@ -1748,22 +1748,18 @@ enum slow_d_compare {
static noinline enum slow_d_compare slow_dentry_cmp(
const struct dentry *parent,
- struct inode *inode,
struct dentry *dentry,
unsigned int seq,
const struct qstr *name)
{
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
- struct inode *i = dentry->d_inode;
if (read_seqcount_retry(&dentry->d_seq, seq)) {
cpu_relax();
return D_COMP_SEQRETRY;
}
- if (parent->d_op->d_compare(parent, inode,
- dentry, i,
- tlen, tname, name))
+ if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
return D_COMP_NOMATCH;
return D_COMP_OK;
}
@@ -1773,7 +1769,6 @@ static noinline enum slow_d_compare slow_dentry_cmp(
* @parent: parent dentry
* @name: qstr of name we wish to find
* @seqp: returns d_seq value at the point where the dentry was found
- * @inode: returns dentry->d_inode when the inode was found valid.
* Returns: dentry, or NULL
*
* __d_lookup_rcu is the dcache lookup function for rcu-walk name
@@ -1800,7 +1795,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
*/
struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name,
- unsigned *seqp, struct inode *inode)
+ unsigned *seqp)
{
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
@@ -1834,11 +1829,10 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
seqretry:
/*
* The dentry sequence count protects us from concurrent
- * renames, and thus protects inode, parent and name fields.
+ * renames, and thus protects parent and name fields.
*
* The caller must perform a seqcount check in order
- * to do anything useful with the returned dentry,
- * including using the 'd_inode' pointer.
+ * to do anything useful with the returned dentry.
*
* NOTE! We do a "raw" seqcount_begin here. That means that
* we don't wait for the sequence count to stabilize if it
@@ -1852,12 +1846,12 @@ seqretry:
continue;
if (d_unhashed(dentry))
continue;
- *seqp = seq;
if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
if (dentry->d_name.hash != hashlen_hash(hashlen))
continue;
- switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
+ *seqp = seq;
+ switch (slow_dentry_cmp(parent, dentry, seq, name)) {
case D_COMP_OK:
return dentry;
case D_COMP_NOMATCH:
@@ -1869,6 +1863,7 @@ seqretry:
if (dentry->d_name.hash_len != hashlen)
continue;
+ *seqp = seq;
if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
return dentry;
}
@@ -1966,9 +1961,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
if (parent->d_flags & DCACHE_OP_COMPARE) {
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
- if (parent->d_op->d_compare(parent, parent->d_inode,
- dentry, dentry->d_inode,
- tlen, tname, name))
+ if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
goto next;
} else {
if (dentry->d_name.len != len)
@@ -2005,7 +1998,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
*/
name->hash = full_name_hash(name->name, name->len);
if (dir->d_flags & DCACHE_OP_HASH) {
- int err = dir->d_op->d_hash(dir, dir->d_inode, name);
+ int err = dir->d_op->d_hash(dir, name);
if (unlikely(err < 0))
return ERR_PTR(err);
}
@@ -2975,34 +2968,21 @@ rename_retry:
goto again;
}
-/**
- * find_inode_number - check for dentry with name
- * @dir: directory to check
- * @name: Name to find.
- *
- * Check whether a dentry already exists for the given name,
- * and return the inode number if it has an inode. Otherwise
- * 0 is returned.
- *
- * This routine is used to post-process directory listings for
- * filesystems using synthetic inode numbers, and is necessary
- * to keep getcwd() working.
- */
-
-ino_t find_inode_number(struct dentry *dir, struct qstr *name)
+void d_tmpfile(struct dentry *dentry, struct inode *inode)
{
- struct dentry * dentry;
- ino_t ino = 0;
-
- dentry = d_hash_and_lookup(dir, name);
- if (!IS_ERR_OR_NULL(dentry)) {
- if (dentry->d_inode)
- ino = dentry->d_inode->i_ino;
- dput(dentry);
- }
- return ino;
+ inode_dec_link_count(inode);
+ BUG_ON(dentry->d_name.name != dentry->d_iname ||
+ !hlist_unhashed(&dentry->d_alias) ||
+ !d_unlinked(dentry));
+ spin_lock(&dentry->d_parent->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
+ (unsigned long long)inode->i_ino);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dentry->d_parent->d_lock);
+ d_instantiate(dentry, inode);
}
-EXPORT_SYMBOL(find_inode_number);
+EXPORT_SYMBOL(d_tmpfile);
static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f71ec125290d..cfa109a4d5a2 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -2243,12 +2243,11 @@ out:
*/
int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
size_t *plaintext_name_size,
- struct dentry *ecryptfs_dir_dentry,
+ struct super_block *sb,
const char *name, size_t name_size)
{
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
- &ecryptfs_superblock_to_private(
- ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
+ &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
char *decoded_name;
size_t decoded_name_size;
size_t packet_size;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index f622a733f7ad..df19d34a033b 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -575,7 +575,7 @@ int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
struct inode *ecryptfs_inode);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
size_t *decrypted_name_size,
- struct dentry *ecryptfs_dentry,
+ struct super_block *sb,
const char *name, size_t name_size);
int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
int ecryptfs_encrypt_and_encode_filename(
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9aa05e08060b..24f1105fda3a 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -70,7 +70,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
struct ecryptfs_getdents_callback {
struct dir_context ctx;
struct dir_context *caller;
- struct dentry *dentry;
+ struct super_block *sb;
int filldir_called;
int entries_written;
};
@@ -88,7 +88,7 @@ ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
buf->filldir_called++;
rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size,
- buf->dentry, lower_name,
+ buf->sb, lower_name,
lower_namelen);
if (rc) {
printk(KERN_ERR "%s: Error attempting to decode and decrypt "
@@ -114,15 +114,14 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx)
{
int rc;
struct file *lower_file;
- struct inode *inode;
+ struct inode *inode = file_inode(file);
struct ecryptfs_getdents_callback buf = {
.ctx.actor = ecryptfs_filldir,
.caller = ctx,
- .dentry = file->f_path.dentry
+ .sb = inode->i_sb,
};
lower_file = ecryptfs_file_to_lower(file);
lower_file->f_pos = ctx->pos;
- inode = file_inode(file);
rc = iterate_dir(lower_file, &buf.ctx);
ctx->pos = buf.ctx.pos;
if (rc < 0)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5eab400e2590..a2f2bb2c256d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -679,7 +679,7 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf,
set_fs(old_fs);
if (rc < 0)
goto out;
- rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry,
+ rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry->d_sb,
lower_buf, rc);
out:
kfree(lower_buf);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 141aee31884f..a8766b880c07 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -45,8 +45,8 @@ static struct super_block *efivarfs_sb;
* So we need to perform a case-sensitive match on part 1 and a
* case-insensitive match on part 2.
*/
-static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int efivarfs_d_compare(const struct dentry *parent,
+ const struct dentry *dentry,
unsigned int len, const char *str,
const struct qstr *name)
{
@@ -63,8 +63,7 @@ static int efivarfs_d_compare(const struct dentry *parent, const struct inode *p
return strncasecmp(name->name + guid, str + guid, EFI_VARIABLE_GUID_LEN);
}
-static int efivarfs_d_hash(const struct dentry *dentry,
- const struct inode *inode, struct qstr *qstr)
+static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
{
unsigned long hash = init_name_hash();
const unsigned char *s = qstr->name;
@@ -108,7 +107,7 @@ static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
q.name = name;
q.len = strlen(name);
- err = efivarfs_d_hash(NULL, NULL, &q);
+ err = efivarfs_d_hash(NULL, &q);
if (err)
return ERR_PTR(err);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index deecc7294a67..9ad17b15b454 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@
#include <linux/mutex.h>
#include <linux/anon_inodes.h>
#include <linux/device.h>
+#include <linux/freezer.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/mman.h>
@@ -1602,7 +1603,8 @@ fetch_events:
}
spin_unlock_irqrestore(&ep->lock, flags);
- if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+ if (!freezable_schedule_hrtimeout_range(to, slack,
+ HRTIMER_MODE_ABS))
timed_out = 1;
spin_lock_irqsave(&ep->lock, flags);
@@ -1975,8 +1977,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
return -EINVAL;
if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
return -EFAULT;
- sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ sigsaved = current->blocked;
+ set_current_blocked(&ksigmask);
}
error = sys_epoll_wait(epfd, events, maxevents, timeout);
@@ -1993,7 +1995,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
sizeof(sigsaved));
set_restore_sigmask();
} else
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ set_current_blocked(&sigsaved);
}
return error;
@@ -2020,8 +2022,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
return -EFAULT;
sigset_from_compat(&ksigmask, &csigmask);
- sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ sigsaved = current->blocked;
+ set_current_blocked(&ksigmask);
}
err = sys_epoll_wait(epfd, events, maxevents, timeout);
@@ -2038,7 +2040,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
sizeof(sigsaved));
set_restore_sigmask();
} else
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ set_current_blocked(&sigsaved);
}
return err;
diff --git a/fs/exec.c b/fs/exec.c
index ffd7a813ad3d..9c73def87642 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -110,13 +110,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
static const struct open_flags uselib_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
- .intent = LOOKUP_OPEN
+ .intent = LOOKUP_OPEN,
+ .lookup_flags = LOOKUP_FOLLOW,
};
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
@@ -756,10 +757,11 @@ struct file *open_exec(const char *name)
static const struct open_flags open_exec_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_EXEC | MAY_OPEN,
- .intent = LOOKUP_OPEN
+ .intent = LOOKUP_OPEN,
+ .lookup_flags = LOOKUP_FOLLOW,
};
- file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags, LOOKUP_FOLLOW);
+ file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags);
if (IS_ERR(file))
goto out;
@@ -930,6 +932,7 @@ static int de_thread(struct task_struct *tsk)
* also take its birthdate (always earlier than our own).
*/
tsk->start_time = leader->start_time;
+ tsk->real_start_time = leader->real_start_time;
BUG_ON(!same_thread_group(leader, tsk));
BUG_ON(has_group_leader_pid(tsk));
@@ -945,9 +948,8 @@ static int de_thread(struct task_struct *tsk)
* Note: The old leader also uses this pid until release_task
* is called. Odd but simple and correct.
*/
- detach_pid(tsk, PIDTYPE_PID);
tsk->pid = leader->pid;
- attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
+ change_pid(tsk, PIDTYPE_PID, task_pid(leader));
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
@@ -1463,7 +1465,6 @@ static int do_execve_common(const char *filename,
struct files_struct *displaced;
bool clear_in_exec;
int retval;
- const struct cred *cred = current_cred();
/*
* We move the actual failure in case of RLIMIT_NPROC excess from
@@ -1472,7 +1473,7 @@ static int do_execve_common(const char *filename,
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
- atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
+ atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
retval = -EAGAIN;
goto out_ret;
}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 73b0d9519836..256dd5f4c1c4 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -119,6 +119,29 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
return ext2_add_nondir(dentry, inode);
}
+static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode = ext2_new_inode(dir, mode, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = &ext2_file_inode_operations;
+ if (ext2_use_xip(inode->i_sb)) {
+ inode->i_mapping->a_ops = &ext2_aops_xip;
+ inode->i_fop = &ext2_xip_file_operations;
+ } else if (test_opt(inode->i_sb, NOBH)) {
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ inode->i_fop = &ext2_file_operations;
+ } else {
+ inode->i_mapping->a_ops = &ext2_aops;
+ inode->i_fop = &ext2_file_operations;
+ }
+ mark_inode_dirty(inode);
+ d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
+ return 0;
+}
+
static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct inode * inode;
@@ -398,6 +421,7 @@ const struct inode_operations ext2_dir_inode_operations = {
#endif
.setattr = ext2_setattr,
.get_acl = ext2_get_acl,
+ .tmpfile = ext2_tmpfile,
};
const struct inode_operations ext2_special_inode_operations = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f67668f724ba..2bd85486b879 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = {
.direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .is_dirty_writeback = buffer_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
};
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index cea8ecf3e76e..998ea111e537 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1759,6 +1759,45 @@ retry:
return err;
}
+static int ext3_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ handle_t *handle;
+ struct inode *inode;
+ int err, retries = 0;
+
+ dquot_initialize(dir);
+
+retry:
+ handle = ext3_journal_start(dir, EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+ 4 + EXT3_XATTR_TRANS_BLOCKS);
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ inode = ext3_new_inode (handle, dir, NULL, mode);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &ext3_file_inode_operations;
+ inode->i_fop = &ext3_file_operations;
+ ext3_set_aops(inode);
+ err = ext3_orphan_add(handle, inode);
+ if (err)
+ goto err_drop_inode;
+ mark_inode_dirty(inode);
+ d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
+ }
+ ext3_journal_stop(handle);
+ if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+ goto retry;
+ return err;
+err_drop_inode:
+ ext3_journal_stop(handle);
+ unlock_new_inode(inode);
+ iput(inode);
+ return err;
+}
+
static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
{
handle_t *handle;
@@ -2300,7 +2339,7 @@ static int ext3_link (struct dentry * old_dentry,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -2314,6 +2353,11 @@ retry:
err = ext3_add_entry(handle, dentry, inode);
if (!err) {
ext3_mark_inode_dirty(handle, inode);
+ /* this can happen only for tmpfile being
+ * linked the first time
+ */
+ if (inode->i_nlink == 1)
+ ext3_orphan_del(handle, inode);
d_instantiate(dentry, inode);
} else {
drop_nlink(inode);
@@ -2516,6 +2560,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.mkdir = ext3_mkdir,
.rmdir = ext3_rmdir,
.mknod = ext3_mknod,
+ .tmpfile = ext3_tmpfile,
.rename = ext3_rename,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b19f0a457f32..6f4cc567c382 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -494,17 +494,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
if (dataoff > isize)
return -ENXIO;
- if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
- return -EINVAL;
- if (dataoff > maxsize)
- return -EINVAL;
-
- if (dataoff != file->f_pos) {
- file->f_pos = dataoff;
- file->f_version = 0;
- }
-
- return dataoff;
+ return vfs_setpos(file, dataoff, maxsize);
}
/*
@@ -580,17 +570,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
if (holeoff > isize)
holeoff = isize;
- if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
- return -EINVAL;
- if (holeoff > maxsize)
- return -EINVAL;
-
- if (holeoff != file->f_pos) {
- file->f_pos = holeoff;
- file->f_version = 0;
- }
-
- return holeoff;
+ return vfs_setpos(file, holeoff, maxsize);
}
/*
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ab2f6dc44b3a..234b834d5a97 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2296,6 +2296,45 @@ retry:
return err;
}
+static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ handle_t *handle;
+ struct inode *inode;
+ int err, retries = 0;
+
+ dquot_initialize(dir);
+
+retry:
+ inode = ext4_new_inode_start_handle(dir, mode,
+ NULL, 0, NULL,
+ EXT4_HT_DIR,
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+ 4 + EXT4_XATTR_TRANS_BLOCKS);
+ handle = ext4_journal_current_handle();
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &ext4_file_inode_operations;
+ inode->i_fop = &ext4_file_operations;
+ ext4_set_aops(inode);
+ err = ext4_orphan_add(handle, inode);
+ if (err)
+ goto err_drop_inode;
+ mark_inode_dirty(inode);
+ d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
+ }
+ if (handle)
+ ext4_journal_stop(handle);
+ if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
+ goto retry;
+ return err;
+err_drop_inode:
+ ext4_journal_stop(handle);
+ unlock_new_inode(inode);
+ iput(inode);
+ return err;
+}
+
struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
struct ext4_dir_entry_2 *de,
int blocksize, int csum_size,
@@ -2903,7 +2942,7 @@ static int ext4_link(struct dentry *old_dentry,
retry:
handle = ext4_journal_start(dir, EXT4_HT_DIR,
(EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS));
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -2917,6 +2956,11 @@ retry:
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
+ /* this can happen only for tmpfile being
+ * linked the first time
+ */
+ if (inode->i_nlink == 1)
+ ext4_orphan_del(handle, inode);
d_instantiate(dentry, inode);
} else {
drop_nlink(inode);
@@ -3169,6 +3213,7 @@ const struct inode_operations ext4_dir_inode_operations = {
.mkdir = ext4_mkdir,
.rmdir = ext4_rmdir,
.mknod = ext4_mknod,
+ .tmpfile = ext4_tmpfile,
.rename = ext4_rename,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 359d307b5507..628e22a5a543 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -30,7 +30,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
+ fat_msg(sb, KERN_ERR, "error, %pV", &vaf);
va_end(args);
}
@@ -38,8 +38,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
sb->s_flags |= MS_RDONLY;
- printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
- "set read-only\n", sb->s_id);
+ fat_msg(sb, KERN_ERR, "Filesystem has been set read-only");
}
}
EXPORT_SYMBOL_GPL(__fat_fs_error);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 081b759cff83..a783b0e1272a 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,8 +148,7 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
* that the existing dentry can be used. The msdos fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+static int msdos_hash(const struct dentry *dentry, struct qstr *qstr)
{
struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
unsigned char msdos_name[MSDOS_NAME];
@@ -165,8 +164,7 @@ static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
* Compare two msdos names. If either of the names are invalid,
* we fall back to doing the standard name comparison.
*/
-static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int msdos_cmp(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 2da952036a3d..6df8d3d885e5 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -107,8 +107,7 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr)
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+static int vfat_hash(const struct dentry *dentry, struct qstr *qstr)
{
qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
return 0;
@@ -120,8 +119,7 @@ static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr)
{
struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
const unsigned char *name;
@@ -142,8 +140,7 @@ static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
/*
* Case insensitive compare of two vfat names.
*/
-static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int vfat_cmpi(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
@@ -162,8 +159,7 @@ static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
/*
* Case sensitive compare of two vfat names.
*/
-static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int vfat_cmp(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
unsigned int alen, blen;
diff --git a/fs/file_table.c b/fs/file_table.c
index 485dc0eddd67..08e719b884ca 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -227,7 +227,7 @@ static void __fput(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_inode;
might_sleep();
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 35f281033142..5c121fe19c5f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -548,8 +548,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
res = io->bytes < 0 ? io->size : io->bytes;
if (!is_sync_kiocb(io->iocb)) {
- struct path *path = &io->iocb->ki_filp->f_path;
- struct inode *inode = path->dentry->d_inode;
+ struct inode *inode = file_inode(io->iocb->ki_filp);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9a0cdde14a08..0b578598c6ac 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -785,7 +785,7 @@ static const struct super_operations fuse_super_operations = {
static void sanitize_global_limit(unsigned *limit)
{
if (*limit == 0)
- *limit = ((num_physpages << PAGE_SHIFT) >> 13) /
+ *limit = ((totalram_pages << PAGE_SHIFT) >> 13) /
sizeof(struct fuse_req);
if (*limit >= 1 << 16)
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4fddb3c22d25..f2448ab2aac5 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -109,8 +109,7 @@ fail:
return 0;
}
-static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
- struct qstr *str)
+static int gfs2_dhash(const struct dentry *dentry, struct qstr *str)
{
str->hash = gfs2_disk_hash(str->name, str->len);
return 0;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f99f9e8a325f..72c3866a7320 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -912,7 +912,7 @@ out_uninit:
* cluster; until we do, disable leases (by just returning -EINVAL),
* unless the administrator has requested purely local locking.
*
- * Locking: called under lock_flocks
+ * Locking: called under i_lock
*
* Returns: errno
*/
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index a73b11839a41..0524cda47a6e 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -229,13 +229,10 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
/* string.c */
extern const struct dentry_operations hfs_dentry_operations;
-extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
- struct qstr *);
+extern int hfs_hash_dentry(const struct dentry *, struct qstr *);
extern int hfs_strcmp(const unsigned char *, unsigned int,
const unsigned char *, unsigned int);
-extern int hfs_compare_dentry(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+extern int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
/* trans.c */
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 495a976a3cc9..85b610c3909f 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,8 +51,7 @@ static unsigned char caseorder[256] = {
/*
* Hash a string to an integer in a case-independent way
*/
-int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
- struct qstr *this)
+int hfs_hash_dentry(const struct dentry *dentry, struct qstr *this)
{
const unsigned char *name = this->name;
unsigned int hash, len = this->len;
@@ -93,8 +92,7 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
* Test for equality of two strings in the HFS filename character ordering.
* return 1 on failure and 0 on success
*/
-int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
const unsigned char *n1, *n2;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 60b0a3388b26..ede79317cfb8 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -495,11 +495,8 @@ int hfsplus_uni2asc(struct super_block *,
const struct hfsplus_unistr *, char *, int *);
int hfsplus_asc2uni(struct super_block *,
struct hfsplus_unistr *, int, const char *, int);
-int hfsplus_hash_dentry(const struct dentry *dentry,
- const struct inode *inode, struct qstr *str);
-int hfsplus_compare_dentry(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str);
+int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
/* wrapper.c */
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 2c2e47dcfdd8..e8ef121a4d8b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -334,8 +334,7 @@ int hfsplus_asc2uni(struct super_block *sb,
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
-int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
- struct qstr *str)
+int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
{
struct super_block *sb = dentry->d_sb;
const char *astr;
@@ -386,9 +385,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
-int hfsplus_compare_dentry(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct super_block *sb = parent->d_sb;
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 05d4816e4e77..fa27980f2229 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,8 +12,7 @@
* Note: the dentry argument is the parent dentry.
*/
-static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+static int hpfs_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
{
unsigned long hash;
int i;
@@ -35,9 +34,7 @@ static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *ino
return 0;
}
-static int hpfs_compare_dentry(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int hpfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
unsigned al = len;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index fc90ab11c340..4338ff32959d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -69,7 +69,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
struct dentry *parent;
char *root, *name;
const char *seg_name;
- int len, seg_len;
+ int len, seg_len, root_len;
len = 0;
parent = dentry;
@@ -81,7 +81,8 @@ static char *dentry_name(struct dentry *dentry, int extra)
}
root = "proc";
- len += strlen(root);
+ root_len = strlen(root);
+ len += root_len;
name = kmalloc(len + extra + 1, GFP_KERNEL);
if (name == NULL)
return NULL;
@@ -91,7 +92,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
while (parent->d_parent != parent) {
if (is_pid(parent)) {
seg_name = "pid";
- seg_len = strlen("pid");
+ seg_len = strlen(seg_name);
}
else {
seg_name = parent->d_name.name;
@@ -100,10 +101,10 @@ static char *dentry_name(struct dentry *dentry, int extra)
len -= seg_len + 1;
name[len] = '/';
- strncpy(&name[len + 1], seg_name, seg_len);
+ memcpy(&name[len + 1], seg_name, seg_len);
parent = parent->d_parent;
}
- strncpy(name, root, strlen(root));
+ memcpy(name, root, root_len);
return name;
}
diff --git a/fs/inode.c b/fs/inode.c
index 00d5fc3b86e1..d6dfb09c8280 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -333,8 +333,10 @@ EXPORT_SYMBOL(set_nlink);
*/
void inc_nlink(struct inode *inode)
{
- if (WARN_ON(inode->i_nlink == 0))
+ if (unlikely(inode->i_nlink == 0)) {
+ WARN_ON(!(inode->i_state & I_LINKABLE));
atomic_long_dec(&inode->i_sb->s_remove_count);
+ }
inode->__i_nlink++;
}
diff --git a/fs/internal.h b/fs/internal.h
index 68121584ae37..7c5f01cf619d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -96,11 +96,12 @@ struct open_flags {
umode_t mode;
int acc_mode;
int intent;
+ int lookup_flags;
};
extern struct file *do_filp_open(int dfd, struct filename *pathname,
- const struct open_flags *op, int flags);
+ const struct open_flags *op);
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
- const char *, const struct open_flags *, int lookup_flags);
+ const char *, const struct open_flags *);
extern long do_handle_open(int mountdirfd,
struct file_handle __user *ufh, int open_flag);
@@ -130,6 +131,7 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
* read_write.c
*/
extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
+extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/*
* splice.c
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d9b8aebdeb22..c348d6d88624 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -28,31 +28,23 @@
#define BEQUIET
-static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
- struct qstr *qstr);
-static int isofs_hash(const struct dentry *parent, const struct inode *inode,
- struct qstr *qstr);
+static int isofs_hashi(const struct dentry *parent, struct qstr *qstr);
+static int isofs_hash(const struct dentry *parent, struct qstr *qstr);
static int isofs_dentry_cmpi(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+ const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
static int isofs_dentry_cmp(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+ const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
#ifdef CONFIG_JOLIET
-static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
- struct qstr *qstr);
-static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
- struct qstr *qstr);
+static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr);
+static int isofs_hash_ms(const struct dentry *parent, struct qstr *qstr);
static int isofs_dentry_cmpi_ms(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+ const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
static int isofs_dentry_cmp_ms(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+ const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name);
#endif
@@ -265,30 +257,26 @@ static int isofs_dentry_cmp_common(
}
static int
-isofs_hash(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+isofs_hash(const struct dentry *dentry, struct qstr *qstr)
{
return isofs_hash_common(dentry, qstr, 0);
}
static int
-isofs_hashi(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+isofs_hashi(const struct dentry *dentry, struct qstr *qstr)
{
return isofs_hashi_common(dentry, qstr, 0);
}
static int
-isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
return isofs_dentry_cmp_common(len, str, name, 0, 0);
}
static int
-isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
return isofs_dentry_cmp_common(len, str, name, 0, 1);
@@ -296,30 +284,26 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
#ifdef CONFIG_JOLIET
static int
-isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
{
return isofs_hash_common(dentry, qstr, 1);
}
static int
-isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr)
{
return isofs_hashi_common(dentry, qstr, 1);
}
static int
-isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+isofs_dentry_cmp_ms(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
return isofs_dentry_cmp_common(len, str, name, 1, 0);
}
static int
-isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+isofs_dentry_cmpi_ms(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
return isofs_dentry_cmp_common(len, str, name, 1, 1);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index c167028844ed..95295640d9c8 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,8 +37,7 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
qstr.name = compare;
qstr.len = dlen;
- return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
- dentry->d_name.len, dentry->d_name.name, &qstr);
+ return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr);
}
/*
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 89186b7b9002..8b19027291d6 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1538,8 +1538,7 @@ const struct file_operations jfs_dir_operations = {
.llseek = generic_file_llseek,
};
-static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
- struct qstr *this)
+static int jfs_ci_hash(const struct dentry *dir, struct qstr *this)
{
unsigned long hash;
int i;
@@ -1552,9 +1551,7 @@ static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
return 0;
}
-static int jfs_ci_compare(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int jfs_ci_compare(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
int i, result = 1;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index a2aa97d45670..10d6c41aecad 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -305,7 +305,7 @@ static int lockd_start_svc(struct svc_serv *serv)
svc_sock_update_bufs(serv);
serv->sv_maxconn = nlm_max_connections;
- nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
+ nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name);
if (IS_ERR(nlmsvc_task)) {
error = PTR_ERR(nlmsvc_task);
printk(KERN_WARNING
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e703318c41df..067778b0ccc9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block)
dprintk("lockd: unlinking block %p...\n", block);
/* Remove block from list */
- status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
+ status = posix_unblock_lock(&block->b_call->a_args.lock.fl);
nlmsvc_remove_block(block);
return status;
}
@@ -744,8 +744,20 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
}
+/*
+ * Since NLM uses two "keys" for tracking locks, we need to hash them down
+ * to one for the blocked_hash. Here, we're just xor'ing the host address
+ * with the pid in order to create a key value for picking a hash bucket.
+ */
+static unsigned long
+nlmsvc_owner_key(struct file_lock *fl)
+{
+ return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid;
+}
+
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_compare_owner = nlmsvc_same_owner,
+ .lm_owner_key = nlmsvc_owner_key,
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
};
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 97e87415b145..dc5c75930f0f 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
again:
file->f_locks = 0;
- lock_flocks(); /* protects i_flock list */
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
@@ -181,7 +181,7 @@ again:
if (match(lockhost, host)) {
struct file_lock lock = *fl;
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
@@ -193,7 +193,7 @@ again:
goto again;
}
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return 0;
}
@@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file)
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
- lock_flocks();
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops == &nlmsvc_lock_operations) {
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return 1;
}
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
file->f_locks = 0;
return 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index cb424a4fed71..04e2c1fdb157 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -126,6 +126,7 @@
#include <linux/time.h>
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
+#include <linux/hashtable.h>
#include <asm/uaccess.h>
@@ -153,30 +154,51 @@ int lease_break_time = 45;
#define for_each_lock(inode, lockp) \
for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
-static LIST_HEAD(file_lock_list);
-static LIST_HEAD(blocked_list);
+/*
+ * The global file_lock_list is only used for displaying /proc/locks. Protected
+ * by the file_lock_lock.
+ */
+static HLIST_HEAD(file_lock_list);
static DEFINE_SPINLOCK(file_lock_lock);
/*
- * Protects the two list heads above, plus the inode->i_flock list
+ * The blocked_hash is used to find POSIX lock loops for deadlock detection.
+ * It is protected by blocked_lock_lock.
+ *
+ * We hash locks by lockowner in order to optimize searching for the lock a
+ * particular lockowner is waiting on.
+ *
+ * FIXME: make this value scale via some heuristic? We generally will want more
+ * buckets when we have more lockowners holding locks, but that's a little
+ * difficult to determine without knowing what the workload will look like.
*/
-void lock_flocks(void)
-{
- spin_lock(&file_lock_lock);
-}
-EXPORT_SYMBOL_GPL(lock_flocks);
+#define BLOCKED_HASH_BITS 7
+static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
-void unlock_flocks(void)
-{
- spin_unlock(&file_lock_lock);
-}
-EXPORT_SYMBOL_GPL(unlock_flocks);
+/*
+ * This lock protects the blocked_hash. Generally, if you're accessing it, you
+ * want to be holding this lock.
+ *
+ * In addition, it also protects the fl->fl_block list, and the fl->fl_next
+ * pointer for file_lock structures that are acting as lock requests (in
+ * contrast to those that are acting as records of acquired locks).
+ *
+ * Note that when we acquire this lock in order to change the above fields,
+ * we often hold the i_lock as well. In certain cases, when reading the fields
+ * protected by this lock, we can skip acquiring it iff we already hold the
+ * i_lock.
+ *
+ * In particular, adding an entry to the fl_block list requires that you hold
+ * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting
+ * an entry from the list however only requires the file_lock_lock.
+ */
+static DEFINE_SPINLOCK(blocked_lock_lock);
static struct kmem_cache *filelock_cache __read_mostly;
static void locks_init_lock_heads(struct file_lock *fl)
{
- INIT_LIST_HEAD(&fl->fl_link);
+ INIT_HLIST_NODE(&fl->fl_link);
INIT_LIST_HEAD(&fl->fl_block);
init_waitqueue_head(&fl->fl_wait);
}
@@ -210,7 +232,7 @@ void locks_free_lock(struct file_lock *fl)
{
BUG_ON(waitqueue_active(&fl->fl_wait));
BUG_ON(!list_empty(&fl->fl_block));
- BUG_ON(!list_empty(&fl->fl_link));
+ BUG_ON(!hlist_unhashed(&fl->fl_link));
locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
@@ -484,47 +506,108 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
return fl1->fl_owner == fl2->fl_owner;
}
+static inline void
+locks_insert_global_locks(struct file_lock *fl)
+{
+ spin_lock(&file_lock_lock);
+ hlist_add_head(&fl->fl_link, &file_lock_list);
+ spin_unlock(&file_lock_lock);
+}
+
+static inline void
+locks_delete_global_locks(struct file_lock *fl)
+{
+ spin_lock(&file_lock_lock);
+ hlist_del_init(&fl->fl_link);
+ spin_unlock(&file_lock_lock);
+}
+
+static unsigned long
+posix_owner_key(struct file_lock *fl)
+{
+ if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
+ return fl->fl_lmops->lm_owner_key(fl);
+ return (unsigned long)fl->fl_owner;
+}
+
+static inline void
+locks_insert_global_blocked(struct file_lock *waiter)
+{
+ hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
+}
+
+static inline void
+locks_delete_global_blocked(struct file_lock *waiter)
+{
+ hash_del(&waiter->fl_link);
+}
+
/* Remove waiter from blocker's block list.
* When blocker ends up pointing to itself then the list is empty.
+ *
+ * Must be called with blocked_lock_lock held.
*/
static void __locks_delete_block(struct file_lock *waiter)
{
+ locks_delete_global_blocked(waiter);
list_del_init(&waiter->fl_block);
- list_del_init(&waiter->fl_link);
waiter->fl_next = NULL;
}
-/*
- */
-void locks_delete_block(struct file_lock *waiter)
+static void locks_delete_block(struct file_lock *waiter)
{
- lock_flocks();
+ spin_lock(&blocked_lock_lock);
__locks_delete_block(waiter);
- unlock_flocks();
+ spin_unlock(&blocked_lock_lock);
}
-EXPORT_SYMBOL(locks_delete_block);
/* Insert waiter into blocker's block list.
* We use a circular list so that processes can be easily woken up in
* the order they blocked. The documentation doesn't require this but
* it seems like the reasonable thing to do.
+ *
+ * Must be called with both the i_lock and blocked_lock_lock held. The fl_block
+ * list itself is protected by the file_lock_list, but by ensuring that the
+ * i_lock is also held on insertions we can avoid taking the blocked_lock_lock
+ * in some cases when we see that the fl_block list is empty.
*/
-static void locks_insert_block(struct file_lock *blocker,
- struct file_lock *waiter)
+static void __locks_insert_block(struct file_lock *blocker,
+ struct file_lock *waiter)
{
BUG_ON(!list_empty(&waiter->fl_block));
- list_add_tail(&waiter->fl_block, &blocker->fl_block);
waiter->fl_next = blocker;
+ list_add_tail(&waiter->fl_block, &blocker->fl_block);
if (IS_POSIX(blocker))
- list_add(&waiter->fl_link, &blocked_list);
+ locks_insert_global_blocked(waiter);
}
-/* Wake up processes blocked waiting for blocker.
- * If told to wait then schedule the processes until the block list
- * is empty, otherwise empty the block list ourselves.
+/* Must be called with i_lock held. */
+static void locks_insert_block(struct file_lock *blocker,
+ struct file_lock *waiter)
+{
+ spin_lock(&blocked_lock_lock);
+ __locks_insert_block(blocker, waiter);
+ spin_unlock(&blocked_lock_lock);
+}
+
+/*
+ * Wake up processes blocked waiting for blocker.
+ *
+ * Must be called with the inode->i_lock held!
*/
static void locks_wake_up_blocks(struct file_lock *blocker)
{
+ /*
+ * Avoid taking global lock if list is empty. This is safe since new
+ * blocked requests are only added to the list under the i_lock, and
+ * the i_lock is always held here. Note that removal from the fl_block
+ * list does not require the i_lock, so we must recheck list_empty()
+ * after acquiring the blocked_lock_lock.
+ */
+ if (list_empty(&blocker->fl_block))
+ return;
+
+ spin_lock(&blocked_lock_lock);
while (!list_empty(&blocker->fl_block)) {
struct file_lock *waiter;
@@ -536,20 +619,23 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
else
wake_up(&waiter->fl_wait);
}
+ spin_unlock(&blocked_lock_lock);
}
/* Insert file lock fl into an inode's lock list at the position indicated
* by pos. At the same time add the lock to the global file lock list.
+ *
+ * Must be called with the i_lock held!
*/
static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
{
- list_add(&fl->fl_link, &file_lock_list);
-
fl->fl_nspid = get_pid(task_tgid(current));
/* insert into file's list */
fl->fl_next = *pos;
*pos = fl;
+
+ locks_insert_global_locks(fl);
}
/*
@@ -557,14 +643,17 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
* Wake up processes that are blocked waiting for this lock,
* notify the FS that the lock has been cleared and
* finally free the lock.
+ *
+ * Must be called with the i_lock held!
*/
static void locks_delete_lock(struct file_lock **thisfl_p)
{
struct file_lock *fl = *thisfl_p;
+ locks_delete_global_locks(fl);
+
*thisfl_p = fl->fl_next;
fl->fl_next = NULL;
- list_del_init(&fl->fl_link);
if (fl->fl_nspid) {
put_pid(fl->fl_nspid);
@@ -625,8 +714,9 @@ void
posix_test_lock(struct file *filp, struct file_lock *fl)
{
struct file_lock *cfl;
+ struct inode *inode = file_inode(filp);
- lock_flocks();
+ spin_lock(&inode->i_lock);
for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) {
if (!IS_POSIX(cfl))
continue;
@@ -639,7 +729,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
fl->fl_pid = pid_vnr(cfl->fl_nspid);
} else
fl->fl_type = F_UNLCK;
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return;
}
EXPORT_SYMBOL(posix_test_lock);
@@ -676,13 +766,14 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
{
struct file_lock *fl;
- list_for_each_entry(fl, &blocked_list, fl_link) {
+ hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
if (posix_same_owner(fl, block_fl))
return fl->fl_next;
}
return NULL;
}
+/* Must be called with the blocked_lock_lock held! */
static int posix_locks_deadlock(struct file_lock *caller_fl,
struct file_lock *block_fl)
{
@@ -718,7 +809,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
return -ENOMEM;
}
- lock_flocks();
+ spin_lock(&inode->i_lock);
if (request->fl_flags & FL_ACCESS)
goto find_conflict;
@@ -748,9 +839,9 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
* give it the opportunity to lock the file.
*/
if (found) {
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
cond_resched();
- lock_flocks();
+ spin_lock(&inode->i_lock);
}
find_conflict:
@@ -777,7 +868,7 @@ find_conflict:
error = 0;
out:
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
if (new_fl)
locks_free_lock(new_fl);
return error;
@@ -791,7 +882,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
struct file_lock *left = NULL;
struct file_lock *right = NULL;
struct file_lock **before;
- int error, added = 0;
+ int error;
+ bool added = false;
/*
* We may need two file_lock structures for this operation,
@@ -806,7 +898,12 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
new_fl2 = locks_alloc_lock();
}
- lock_flocks();
+ spin_lock(&inode->i_lock);
+ /*
+ * New lock request. Walk all POSIX locks and look for conflicts. If
+ * there are any, either return error or put the request on the
+ * blocker's list of waiters and the global blocked_hash.
+ */
if (request->fl_type != F_UNLCK) {
for_each_lock(inode, before) {
fl = *before;
@@ -819,11 +916,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
error = -EAGAIN;
if (!(request->fl_flags & FL_SLEEP))
goto out;
+ /*
+ * Deadlock detection and insertion into the blocked
+ * locks list must be done while holding the same lock!
+ */
error = -EDEADLK;
- if (posix_locks_deadlock(request, fl))
- goto out;
- error = FILE_LOCK_DEFERRED;
- locks_insert_block(fl, request);
+ spin_lock(&blocked_lock_lock);
+ if (likely(!posix_locks_deadlock(request, fl))) {
+ error = FILE_LOCK_DEFERRED;
+ __locks_insert_block(fl, request);
+ }
+ spin_unlock(&blocked_lock_lock);
goto out;
}
}
@@ -845,7 +948,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
before = &fl->fl_next;
}
- /* Process locks with this owner. */
+ /* Process locks with this owner. */
while ((fl = *before) && posix_same_owner(request, fl)) {
/* Detect adjacent or overlapping regions (if same lock type)
*/
@@ -880,7 +983,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
continue;
}
request = fl;
- added = 1;
+ added = true;
}
else {
/* Processing for different lock types is a bit
@@ -891,7 +994,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (fl->fl_start > request->fl_end)
break;
if (request->fl_type == F_UNLCK)
- added = 1;
+ added = true;
if (fl->fl_start < request->fl_start)
left = fl;
/* If the next lock in the list has a higher end
@@ -921,7 +1024,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
locks_release_private(fl);
locks_copy_private(fl, request);
request = fl;
- added = 1;
+ added = true;
}
}
/* Go on to next lock.
@@ -931,10 +1034,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
}
/*
- * The above code only modifies existing locks in case of
- * merging or replacing. If new lock(s) need to be inserted
- * all modifications are done bellow this, so it's safe yet to
- * bail out.
+ * The above code only modifies existing locks in case of merging or
+ * replacing. If new lock(s) need to be inserted all modifications are
+ * done below this, so it's safe yet to bail out.
*/
error = -ENOLCK; /* "no luck" */
if (right && left == right && !new_fl2)
@@ -974,7 +1076,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
locks_wake_up_blocks(left);
}
out:
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
/*
* Free any unused locks.
*/
@@ -1049,14 +1151,14 @@ int locks_mandatory_locked(struct inode *inode)
/*
* Search the lock list for this inode for any POSIX locks.
*/
- lock_flocks();
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!IS_POSIX(fl))
continue;
if (fl->fl_owner != owner)
break;
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return fl ? -EAGAIN : 0;
}
@@ -1199,7 +1301,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
if (IS_ERR(new_fl))
return PTR_ERR(new_fl);
- lock_flocks();
+ spin_lock(&inode->i_lock);
time_out_leases(inode);
@@ -1249,11 +1351,11 @@ restart:
break_time++;
}
locks_insert_block(flock, new_fl);
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
!new_fl->fl_next, break_time);
- lock_flocks();
- __locks_delete_block(new_fl);
+ spin_lock(&inode->i_lock);
+ locks_delete_block(new_fl);
if (error >= 0) {
if (error == 0)
time_out_leases(inode);
@@ -1270,7 +1372,7 @@ restart:
}
out:
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
locks_free_lock(new_fl);
return error;
}
@@ -1323,9 +1425,10 @@ EXPORT_SYMBOL(lease_get_mtime);
int fcntl_getlease(struct file *filp)
{
struct file_lock *fl;
+ struct inode *inode = file_inode(filp);
int type = F_UNLCK;
- lock_flocks();
+ spin_lock(&inode->i_lock);
time_out_leases(file_inode(filp));
for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
fl = fl->fl_next) {
@@ -1334,11 +1437,11 @@ int fcntl_getlease(struct file *filp)
break;
}
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return type;
}
-int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
+static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
{
struct file_lock *fl, **before, **my_before = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry;
@@ -1403,7 +1506,7 @@ out:
return error;
}
-int generic_delete_lease(struct file *filp, struct file_lock **flp)
+static int generic_delete_lease(struct file *filp, struct file_lock **flp)
{
struct file_lock *fl, **before;
struct dentry *dentry = filp->f_path.dentry;
@@ -1428,7 +1531,7 @@ int generic_delete_lease(struct file *filp, struct file_lock **flp)
* The (input) flp->fl_lmops->lm_break function is required
* by break_lease().
*
- * Called with file_lock_lock held.
+ * Called with inode->i_lock held.
*/
int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
{
@@ -1497,11 +1600,12 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
{
+ struct inode *inode = file_inode(filp);
int error;
- lock_flocks();
+ spin_lock(&inode->i_lock);
error = __vfs_setlease(filp, arg, lease);
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return error;
}
@@ -1519,6 +1623,7 @@ static int do_fcntl_delete_lease(struct file *filp)
static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
{
struct file_lock *fl, *ret;
+ struct inode *inode = file_inode(filp);
struct fasync_struct *new;
int error;
@@ -1532,10 +1637,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
return -ENOMEM;
}
ret = fl;
- lock_flocks();
+ spin_lock(&inode->i_lock);
error = __vfs_setlease(filp, arg, &ret);
if (error) {
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
locks_free_lock(fl);
goto out_free_fasync;
}
@@ -1552,7 +1657,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
new = NULL;
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
out_free_fasync:
if (new)
@@ -2076,7 +2181,7 @@ void locks_remove_flock(struct file *filp)
fl.fl_ops->fl_release_private(&fl);
}
- lock_flocks();
+ spin_lock(&inode->i_lock);
before = &inode->i_flock;
while ((fl = *before) != NULL) {
@@ -2094,30 +2199,28 @@ void locks_remove_flock(struct file *filp)
}
before = &fl->fl_next;
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
}
/**
* posix_unblock_lock - stop waiting for a file lock
- * @filp: how the file was opened
* @waiter: the lock which was waiting
*
* lockd needs to block waiting for locks.
*/
int
-posix_unblock_lock(struct file *filp, struct file_lock *waiter)
+posix_unblock_lock(struct file_lock *waiter)
{
int status = 0;
- lock_flocks();
+ spin_lock(&blocked_lock_lock);
if (waiter->fl_next)
__locks_delete_block(waiter);
else
status = -ENOENT;
- unlock_flocks();
+ spin_unlock(&blocked_lock_lock);
return status;
}
-
EXPORT_SYMBOL(posix_unblock_lock);
/**
@@ -2215,7 +2318,7 @@ static int locks_show(struct seq_file *f, void *v)
{
struct file_lock *fl, *bfl;
- fl = list_entry(v, struct file_lock, fl_link);
+ fl = hlist_entry(v, struct file_lock, fl_link);
lock_get_status(f, fl, *((loff_t *)f->private), "");
@@ -2229,21 +2332,23 @@ static void *locks_start(struct seq_file *f, loff_t *pos)
{
loff_t *p = f->private;
- lock_flocks();
+ spin_lock(&file_lock_lock);
+ spin_lock(&blocked_lock_lock);
*p = (*pos + 1);
- return seq_list_start(&file_lock_list, *pos);
+ return seq_hlist_start(&file_lock_list, *pos);
}
static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
{
loff_t *p = f->private;
++*p;
- return seq_list_next(v, &file_lock_list, pos);
+ return seq_hlist_next(v, &file_lock_list, pos);
}
static void locks_stop(struct seq_file *f, void *v)
{
- unlock_flocks();
+ spin_unlock(&blocked_lock_lock);
+ spin_unlock(&file_lock_lock);
}
static const struct seq_operations locks_seq_operations = {
@@ -2290,7 +2395,8 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
{
struct file_lock *fl;
int result = 1;
- lock_flocks();
+
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (IS_POSIX(fl)) {
if (fl->fl_type == F_RDLCK)
@@ -2307,7 +2413,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
result = 0;
break;
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return result;
}
@@ -2330,7 +2436,8 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
{
struct file_lock *fl;
int result = 1;
- lock_flocks();
+
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (IS_POSIX(fl)) {
if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
@@ -2345,7 +2452,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
result = 0;
break;
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return result;
}
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 08c442902fcd..dfaf6fa9b7b5 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -93,7 +93,7 @@ static int minix_readdir(struct file *file, struct dir_context *ctx)
unsigned offset;
unsigned long n;
- ctx->pos = pos = (pos + chunk_size-1) & ~(chunk_size-1);
+ ctx->pos = pos = ALIGN(pos, chunk_size);
if (pos >= inode->i_size)
return 0;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 0db73d9dd668..cd950e2331b6 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -54,6 +54,18 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode,
return error;
}
+static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int error;
+ struct inode *inode = minix_new_inode(dir, mode, &error);
+ if (inode) {
+ minix_set_inode(inode, 0);
+ mark_inode_dirty(inode);
+ d_tmpfile(dentry, inode);
+ }
+ return error;
+}
+
static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -254,4 +266,5 @@ const struct inode_operations minix_dir_inode_operations = {
.mknod = minix_mknod,
.rename = minix_rename,
.getattr = minix_getattr,
+ .tmpfile = minix_tmpfile,
};
diff --git a/fs/namei.c b/fs/namei.c
index 9ed9361223c0..b2beee7a733f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1352,7 +1352,7 @@ static int lookup_fast(struct nameidata *nd,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
- dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode);
+ dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (!dentry)
goto unlazy;
@@ -1787,8 +1787,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
struct dentry *parent = nd->path.dentry;
nd->flags &= ~LOOKUP_JUMPED;
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- err = parent->d_op->d_hash(parent, nd->inode,
- &this);
+ err = parent->d_op->d_hash(parent, &this);
if (err < 0)
break;
}
@@ -2121,7 +2120,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
* to use its own hash..
*/
if (base->d_flags & DCACHE_OP_HASH) {
- int err = base->d_op->d_hash(base, base->d_inode, &this);
+ int err = base->d_op->d_hash(base, &this);
if (err < 0)
return ERR_PTR(err);
}
@@ -2690,28 +2689,10 @@ static int do_last(struct nameidata *nd, struct path *path,
nd->flags &= ~LOOKUP_PARENT;
nd->flags |= op->intent;
- switch (nd->last_type) {
- case LAST_DOTDOT:
- case LAST_DOT:
+ if (nd->last_type != LAST_NORM) {
error = handle_dots(nd, nd->last_type);
if (error)
return error;
- /* fallthrough */
- case LAST_ROOT:
- error = complete_walk(nd);
- if (error)
- return error;
- audit_inode(name, nd->path.dentry, 0);
- if (open_flag & O_CREAT) {
- error = -EISDIR;
- goto out;
- }
- goto finish_open;
- case LAST_BIND:
- error = complete_walk(nd);
- if (error)
- return error;
- audit_inode(name, dir, 0);
goto finish_open;
}
@@ -2841,19 +2822,19 @@ finish_lookup:
}
nd->inode = inode;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
+finish_open:
error = complete_walk(nd);
if (error) {
path_put(&save_parent);
return error;
}
+ audit_inode(name, nd->path.dentry, 0);
error = -EISDIR;
if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
goto out;
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
goto out;
- audit_inode(name, nd->path.dentry, 0);
-finish_open:
if (!S_ISREG(nd->inode->i_mode))
will_truncate = false;
@@ -2920,6 +2901,67 @@ stale_open:
goto retry_lookup;
}
+static int do_tmpfile(int dfd, struct filename *pathname,
+ struct nameidata *nd, int flags,
+ const struct open_flags *op,
+ struct file *file, int *opened)
+{
+ static const struct qstr name = QSTR_INIT("/", 1);
+ struct dentry *dentry, *child;
+ struct inode *dir;
+ int error = path_lookupat(dfd, pathname->name,
+ flags | LOOKUP_DIRECTORY, nd);
+ if (unlikely(error))
+ return error;
+ error = mnt_want_write(nd->path.mnt);
+ if (unlikely(error))
+ goto out;
+ /* we want directory to be writable */
+ error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC);
+ if (error)
+ goto out2;
+ dentry = nd->path.dentry;
+ dir = dentry->d_inode;
+ if (!dir->i_op->tmpfile) {
+ error = -EOPNOTSUPP;
+ goto out2;
+ }
+ child = d_alloc(dentry, &name);
+ if (unlikely(!child)) {
+ error = -ENOMEM;
+ goto out2;
+ }
+ nd->flags &= ~LOOKUP_DIRECTORY;
+ nd->flags |= op->intent;
+ dput(nd->path.dentry);
+ nd->path.dentry = child;
+ error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode);
+ if (error)
+ goto out2;
+ audit_inode(pathname, nd->path.dentry, 0);
+ error = may_open(&nd->path, op->acc_mode, op->open_flag);
+ if (error)
+ goto out2;
+ file->f_path.mnt = nd->path.mnt;
+ error = finish_open(file, nd->path.dentry, NULL, opened);
+ if (error)
+ goto out2;
+ error = open_check_o_direct(file);
+ if (error) {
+ fput(file);
+ } else if (!(op->open_flag & O_EXCL)) {
+ struct inode *inode = file_inode(file);
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_LINKABLE;
+ spin_unlock(&inode->i_lock);
+ }
+out2:
+ mnt_drop_write(nd->path.mnt);
+out:
+ path_put(&nd->path);
+ return error;
+}
+
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
{
@@ -2935,6 +2977,11 @@ static struct file *path_openat(int dfd, struct filename *pathname,
file->f_flags = op->open_flag;
+ if (unlikely(file->f_flags & O_TMPFILE)) {
+ error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
+ goto out;
+ }
+
error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out;
@@ -2987,9 +3034,10 @@ out:
}
struct file *do_filp_open(int dfd, struct filename *pathname,
- const struct open_flags *op, int flags)
+ const struct open_flags *op)
{
struct nameidata nd;
+ int flags = op->lookup_flags;
struct file *filp;
filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
@@ -3001,17 +3049,16 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
}
struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
- const char *name, const struct open_flags *op, int flags)
+ const char *name, const struct open_flags *op)
{
struct nameidata nd;
struct file *file;
struct filename filename = { .name = name };
+ int flags = op->lookup_flags | LOOKUP_ROOT;
nd.root.mnt = mnt;
nd.root.dentry = dentry;
- flags |= LOOKUP_ROOT;
-
if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
return ERR_PTR(-ELOOP);
@@ -3586,12 +3633,18 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
mutex_lock(&inode->i_mutex);
/* Make sure we don't allow creating hardlink to an unlinked file */
- if (inode->i_nlink == 0)
+ if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
error = -ENOENT;
else if (max_links && inode->i_nlink >= max_links)
error = -EMLINK;
else
error = dir->i_op->link(old_dentry, dir, new_dentry);
+
+ if (!error && (inode->i_state & I_LINKABLE)) {
+ spin_lock(&inode->i_lock);
+ inode->i_state &= ~I_LINKABLE;
+ spin_unlock(&inode->i_lock);
+ }
mutex_unlock(&inode->i_mutex);
if (!error)
fsnotify_link(dir, inode, new_dentry);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 0e7f00298213..3be047474bfc 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -73,10 +73,8 @@ const struct inode_operations ncp_dir_inode_operations =
* Dentry operations routines
*/
static int ncp_lookup_validate(struct dentry *, unsigned int);
-static int ncp_hash_dentry(const struct dentry *, const struct inode *,
- struct qstr *);
-static int ncp_compare_dentry(const struct dentry *, const struct inode *,
- const struct dentry *, const struct inode *,
+static int ncp_hash_dentry(const struct dentry *, struct qstr *);
+static int ncp_compare_dentry(const struct dentry *, const struct dentry *,
unsigned int, const char *, const struct qstr *);
static int ncp_delete_dentry(const struct dentry *);
@@ -119,11 +117,19 @@ static inline int ncp_case_sensitive(const struct inode *i)
/*
* Note: leave the hash unchanged if the directory
* is case-sensitive.
+ *
+ * Accessing the parent inode can be racy under RCU pathwalking.
+ * Use ACCESS_ONCE() to make sure we use _one_ particular inode,
+ * the callers will handle races.
*/
static int
-ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
- struct qstr *this)
+ncp_hash_dentry(const struct dentry *dentry, struct qstr *this)
{
+ struct inode *inode = ACCESS_ONCE(dentry->d_inode);
+
+ if (!inode)
+ return 0;
+
if (!ncp_case_sensitive(inode)) {
struct super_block *sb = dentry->d_sb;
struct nls_table *t;
@@ -140,14 +146,24 @@ ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
return 0;
}
+/*
+ * Accessing the parent inode can be racy under RCU pathwalking.
+ * Use ACCESS_ONCE() to make sure we use _one_ particular inode,
+ * the callers will handle races.
+ */
static int
-ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
+ struct inode *pinode;
+
if (len != name->len)
return 1;
+ pinode = ACCESS_ONCE(parent->d_inode);
+ if (!pinode)
+ return 1;
+
if (ncp_case_sensitive(pinode))
return strncmp(str, name->name, len);
@@ -660,8 +676,6 @@ end_advance:
ctl.valid = 0;
if (!ctl.filled && (ctl.fpos == ctx->pos)) {
if (!ino)
- ino = find_inode_number(dentry, &qname);
- if (!ino)
ino = iunique(dir->i_sb, 2);
ctl.filled = !dir_emit(ctx, qname.name, qname.len,
ino, DT_UNKNOWN);
@@ -1123,17 +1137,6 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
new_dentry->d_parent->d_name.name, new_dentry->d_name.name);
- if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) {
- /*
- * fail with EBUSY if there are still references to this
- * directory.
- */
- dentry_unhash(new_dentry);
- error = -EBUSY;
- if (!d_unhashed(new_dentry))
- goto out;
- }
-
ncp_age_dentry(server, old_dentry);
ncp_age_dentry(server, new_dentry);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 26910c8154da..0765ad12c382 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -891,6 +891,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
if (!server) /* How this could happen? */
goto out;
+ result = -EPERM;
+ if (IS_DEADDIR(dentry->d_inode))
+ goto out;
+
/* ageing the dentry to force validation */
ncp_age_dentry(server, dentry);
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index ee24df5af1f9..3c5dd55d284c 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -117,7 +117,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma)
return -EINVAL;
/* we do not support files bigger than 4GB... We eventually
supports just 4GB... */
- if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff
+ if (vma_pages(vma) + vma->vm_pgoff
> (1U << (32 - PAGE_SHIFT)))
return -EFBIG;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index cff089a412c7..da6a43d19aa3 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -211,7 +211,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
struct svc_rqst *rqstp;
int (*callback_svc)(void *vrqstp);
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
- char svc_name[12];
int ret;
nfs_callback_bc_serv(minorversion, xprt, serv);
@@ -235,10 +234,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
svc_sock_update_bufs(serv);
- sprintf(svc_name, "nfsv4.%u-svc", minorversion);
cb_info->serv = serv;
cb_info->rqst = rqstp;
- cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name);
+ cb_info->task = kthread_run(callback_svc, cb_info->rqst,
+ "nfsv4.%u-svc", minorversion);
if (IS_ERR(cb_info->task)) {
ret = PTR_ERR(cb_info->task);
svc_exit_thread(cb_info->rqst);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 57db3244f4d9..7ec4814e298d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
if (inode->i_flock == NULL)
goto out;
- /* Protect inode->i_flock using the file locks lock */
- lock_flocks();
+ /* Protect inode->i_flock using the i_lock */
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file) != ctx)
continue;
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0)
goto out;
- lock_flocks();
+ spin_lock(&inode->i_lock);
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
out:
return status;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5d051419527b..d7ed697133f0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,6 +33,7 @@
#include <linux/pagevec.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/swap.h>
#include <linux/sched.h>
#include <linux/kmemleak.h>
#include <linux/xattr.h>
@@ -1758,7 +1759,6 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
*/
int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
- struct pagevec lru_pvec;
struct page *page;
char *kaddr;
struct iattr attr;
@@ -1798,11 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
* No big deal if we can't add this page to the page cache here.
* READLINK will get the missing page from the server if needed.
*/
- pagevec_init(&lru_pvec, 0);
- if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+ if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0,
GFP_KERNEL)) {
- pagevec_add(&lru_pvec, page);
- pagevec_lru_add_file(&lru_pvec);
SetPageUptodate(page);
unlock_page(page);
} else
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6b4a79f4ad1d..94e94bd11aae 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -495,6 +495,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
return nfs_fscache_release_page(page, gfp);
}
+static void nfs_check_dirty_writeback(struct page *page,
+ bool *dirty, bool *writeback)
+{
+ struct nfs_inode *nfsi;
+ struct address_space *mapping = page_file_mapping(page);
+
+ if (!mapping || PageSwapCache(page))
+ return;
+
+ /*
+ * Check if an unstable page is currently being committed and
+ * if so, have the VM treat it as if the page is under writeback
+ * so it will not block due to pages that will shortly be freeable.
+ */
+ nfsi = NFS_I(mapping->host);
+ if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
+ *writeback = true;
+ return;
+ }
+
+ /*
+ * If PagePrivate() is set, then the page is not freeable and as the
+ * inode is not being committed, it's not going to be cleaned in the
+ * near future so treat it as dirty
+ */
+ if (PagePrivate(page))
+ *dirty = true;
+}
+
/*
* Attempt to clear the private state associated with a page when an error
* occurs that requires the cached contents of an inode to be written back or
@@ -542,6 +571,7 @@ const struct address_space_operations nfs_file_aops = {
.direct_IO = nfs_direct_IO,
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
+ .is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c1c7a9d78722..ce727047ee87 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
- freezable_schedule();
+ freezable_schedule_unsafe();
return 0;
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 43ea96ced28c..ce90eb4775c2 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+ freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d7ba5616989c..28241a42f363 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -268,7 +268,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*timeout = NFS4_POLL_RETRY_MIN;
if (*timeout > NFS4_POLL_RETRY_MAX)
*timeout = NFS4_POLL_RETRY_MAX;
- freezable_schedule_timeout_killable(*timeout);
+ freezable_schedule_timeout_killable_unsafe(*timeout);
if (fatal_signal_pending(current))
res = -ERESTARTSYS;
*timeout <<= 1;
@@ -4528,7 +4528,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- freezable_schedule_timeout_killable(timeout);
+ freezable_schedule_timeout_killable_unsafe(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1fab140764c4..55418811a55a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1194,7 +1194,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
snprintf(buf, sizeof(buf), "%s-manager",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
rcu_read_unlock();
- task = kthread_run(nfs4_run_state_manager, clp, buf);
+ task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
if (IS_ERR(task)) {
printk(KERN_ERR "%s: kthread_run: %ld\n",
__func__, PTR_ERR(task));
@@ -1373,13 +1373,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* Guard against delegation returns and new lock/unlock calls */
down_write(&nfsi->rwsem);
/* Protect inode->i_flock using the BKL */
- lock_flocks();
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
status = ops->recover_lock(state, fl);
switch (status) {
case 0:
@@ -1406,9 +1406,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* kill_proc(fl->fl_pid, SIGLOST, 1); */
status = 0;
}
- lock_flocks();
+ spin_lock(&inode->i_lock);
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
out:
up_write(&nfsi->rwsem);
return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 316ec843dec2..f17051838b41 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2645,13 +2645,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
- /* only place dl_time is set. protected by lock_flocks*/
+ /* Only place dl_time is set; protected by i_lock: */
dp->dl_time = get_seconds();
nfsd4_cb_recall(dp);
}
-/* Called from break_lease() with lock_flocks() held. */
+/* Called from break_lease() with i_lock held. */
static void nfsd_break_deleg_cb(struct file_lock *fl)
{
struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
@@ -4520,7 +4520,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
struct inode *inode = filp->fi_inode;
int status = 0;
- lock_flocks();
+ spin_lock(&inode->i_lock);
for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
status = 1;
@@ -4528,7 +4528,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
}
}
out:
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
return status;
}
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index eed4d7b26249..741fd02e0444 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
}
/**
+ * nilfs_palloc_count_desc_blocks - count descriptor blocks number
+ * @inode: inode of metadata file using this allocator
+ * @desc_blocks: descriptor blocks number [out]
+ */
+static int nilfs_palloc_count_desc_blocks(struct inode *inode,
+ unsigned long *desc_blocks)
+{
+ unsigned long blknum;
+ int ret;
+
+ ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
+ if (likely(!ret))
+ *desc_blocks = DIV_ROUND_UP(
+ blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block);
+ return ret;
+}
+
+/**
+ * nilfs_palloc_mdt_file_can_grow - check potential opportunity for
+ * MDT file growing
+ * @inode: inode of metadata file using this allocator
+ * @desc_blocks: known current descriptor blocks count
+ */
+static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode,
+ unsigned long desc_blocks)
+{
+ return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) <
+ nilfs_palloc_groups_count(inode);
+}
+
+/**
+ * nilfs_palloc_count_max_entries - count max number of entries that can be
+ * described by descriptor blocks count
+ * @inode: inode of metadata file using this allocator
+ * @nused: current number of used entries
+ * @nmaxp: max number of entries [out]
+ */
+int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
+{
+ unsigned long desc_blocks = 0;
+ u64 entries_per_desc_block, nmax;
+ int err;
+
+ err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks);
+ if (unlikely(err))
+ return err;
+
+ entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) *
+ nilfs_palloc_groups_per_desc_block(inode);
+ nmax = entries_per_desc_block * desc_blocks;
+
+ if (nused == nmax &&
+ nilfs_palloc_mdt_file_can_grow(inode, desc_blocks))
+ nmax += entries_per_desc_block;
+
+ if (nused > nmax)
+ return -ERANGE;
+
+ *nmaxp = nmax;
+ return 0;
+}
+
+/**
* nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index fb7238100548..4bd6451b5703 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int,
void *nilfs_palloc_block_get_entry(const struct inode *, __u64,
const struct buffer_head *, void *);
+int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *);
+
/**
* nilfs_palloc_req - persistent allocator request and reply
* @pr_entry_nr: entry number (vblocknr or inode number)
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index d8e65bde083c..6548c7851b48 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -160,6 +160,28 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
}
/**
+ * nilfs_ifile_count_free_inodes - calculate free inodes count
+ * @ifile: ifile inode
+ * @nmaxinodes: current maximum of available inodes count [out]
+ * @nfreeinodes: free inodes count [out]
+ */
+int nilfs_ifile_count_free_inodes(struct inode *ifile,
+ u64 *nmaxinodes, u64 *nfreeinodes)
+{
+ u64 nused;
+ int err;
+
+ *nmaxinodes = 0;
+ *nfreeinodes = 0;
+
+ nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count);
+ err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes);
+ if (likely(!err))
+ *nfreeinodes = *nmaxinodes - nused;
+ return err;
+}
+
+/**
* nilfs_ifile_read - read or get ifile inode
* @sb: super block instance
* @root: root object
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 59b6f2b51df6..679674d13372 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
int nilfs_ifile_delete_inode(struct inode *, ino_t);
int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
+int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *);
+
int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
size_t inode_size, struct nilfs_inode *raw_inode,
struct inode **inodep);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index bccfec8343c5..b1a5277cfd18 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -54,7 +54,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n)
inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
if (root)
- atomic_add(n, &root->blocks_count);
+ atomic64_add(n, &root->blocks_count);
}
void nilfs_inode_sub_blocks(struct inode *inode, int n)
@@ -63,7 +63,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
if (root)
- atomic_sub(n, &root->blocks_count);
+ atomic64_sub(n, &root->blocks_count);
}
/**
@@ -369,7 +369,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
goto failed_ifile_create_inode;
/* reference count of i_bh inherits from nilfs_mdt_read_block() */
- atomic_inc(&root->inodes_count);
+ atomic64_inc(&root->inodes_count);
inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -801,7 +801,7 @@ void nilfs_evict_inode(struct inode *inode)
ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
if (!ret)
- atomic_dec(&ii->i_root->inodes_count);
+ atomic64_dec(&ii->i_root->inodes_count);
nilfs_clear_inode(inode);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index a5752a589932..bd88a7461063 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -835,9 +835,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
raw_cp->cp_snapshot_list.ssl_next = 0;
raw_cp->cp_snapshot_list.ssl_prev = 0;
raw_cp->cp_inodes_count =
- cpu_to_le64(atomic_read(&sci->sc_root->inodes_count));
+ cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
raw_cp->cp_blocks_count =
- cpu_to_le64(atomic_read(&sci->sc_root->blocks_count));
+ cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
raw_cp->cp_nblk_inc =
cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index c7d1f9f18b09..1427de5ebf4d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -554,8 +554,10 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
if (err)
goto failed_bh;
- atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count));
- atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count));
+ atomic64_set(&root->inodes_count,
+ le64_to_cpu(raw_cp->cp_inodes_count));
+ atomic64_set(&root->blocks_count,
+ le64_to_cpu(raw_cp->cp_blocks_count));
nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
@@ -609,6 +611,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
unsigned long overhead;
unsigned long nrsvblocks;
sector_t nfreeblocks;
+ u64 nmaxinodes, nfreeinodes;
int err;
/*
@@ -633,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (unlikely(err))
return err;
+ err = nilfs_ifile_count_free_inodes(root->ifile,
+ &nmaxinodes, &nfreeinodes);
+ if (unlikely(err)) {
+ printk(KERN_WARNING
+ "NILFS warning: fail to count free inodes: err %d.\n",
+ err);
+ if (err == -ERANGE) {
+ /*
+ * If nilfs_palloc_count_max_entries() returns
+ * -ERANGE error code then we simply treat
+ * curent inodes count as maximum possible and
+ * zero as free inodes value.
+ */
+ nmaxinodes = atomic64_read(&root->inodes_count);
+ nfreeinodes = 0;
+ err = 0;
+ } else
+ return err;
+ }
+
buf->f_type = NILFS_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = blocks - overhead;
buf->f_bfree = nfreeblocks;
buf->f_bavail = (buf->f_bfree >= nrsvblocks) ?
(buf->f_bfree - nrsvblocks) : 0;
- buf->f_files = atomic_read(&root->inodes_count);
- buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
+ buf->f_files = nmaxinodes;
+ buf->f_ffree = nfreeinodes;
buf->f_namelen = NILFS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 41e6a04a561f..94c451ce6d24 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -764,8 +764,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
new->ifile = NULL;
new->nilfs = nilfs;
atomic_set(&new->count, 1);
- atomic_set(&new->inodes_count, 0);
- atomic_set(&new->blocks_count, 0);
+ atomic64_set(&new->inodes_count, 0);
+ atomic64_set(&new->blocks_count, 0);
rb_link_node(&new->rb_node, parent, p);
rb_insert_color(&new->rb_node, &nilfs->ns_cptree);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index be1267a34cea..de8cc53b4a5c 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -241,8 +241,8 @@ struct nilfs_root {
struct the_nilfs *nilfs;
struct inode *ifile;
- atomic_t inodes_count;
- atomic_t blocks_count;
+ atomic64_t inodes_count;
+ atomic64_t blocks_count;
};
/* Special checkpoint number */
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6c80083a984f..1ea52f7c031f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -399,9 +399,6 @@ static int fanotify_release(struct inode *ignored, struct file *file)
wake_up(&group->fanotify_data.access_waitq);
#endif
- if (file->f_flags & FASYNC)
- fsnotify_fasync(-1, file, 0);
-
/* matches the fanotify_init->fsnotify_alloc_group */
fsnotify_destroy_group(group);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b8a9d87231b1..17e6bdde96c5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5655,7 +5655,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
&ref_tree, NULL);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto bail;
}
ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -5666,7 +5666,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
&extra_blocks);
if (ret < 0) {
mlog_errno(ret);
- goto out;
+ goto bail;
}
}
@@ -5674,7 +5674,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
extra_blocks);
if (ret) {
mlog_errno(ret);
- return ret;
+ goto bail;
}
mutex_lock(&tl_inode->i_mutex);
@@ -5734,7 +5734,7 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out:
mutex_unlock(&tl_inode->i_mutex);
-
+bail:
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 42252bf64b51..5c1c864e81cc 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -176,7 +176,7 @@ static void o2hb_dead_threshold_set(unsigned int threshold)
}
}
-static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode)
+static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode)
{
int ret = -1;
@@ -500,7 +500,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
}
atomic_inc(&write_wc->wc_num_reqs);
- submit_bio(WRITE, bio);
+ submit_bio(WRITE_SYNC, bio);
status = 0;
bail:
@@ -2271,7 +2271,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group,
if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len))
continue;
- ret = o2hb_global_hearbeat_mode_set(i);
+ ret = o2hb_global_heartbeat_mode_set(i);
if (!ret)
printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n",
o2hb_heartbeat_mode_desc[i]);
@@ -2304,7 +2304,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
NULL,
};
-static struct configfs_item_operations o2hb_hearbeat_group_item_ops = {
+static struct configfs_item_operations o2hb_heartbeat_group_item_ops = {
.show_attribute = o2hb_heartbeat_group_show,
.store_attribute = o2hb_heartbeat_group_store,
};
@@ -2316,7 +2316,7 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = {
static struct config_item_type o2hb_heartbeat_group_type = {
.ct_group_ops = &o2hb_heartbeat_group_group_ops,
- .ct_item_ops = &o2hb_hearbeat_group_item_ops,
+ .ct_item_ops = &o2hb_heartbeat_group_item_ops,
.ct_attrs = o2hb_heartbeat_group_attrs,
.ct_owner = THIS_MODULE,
};
@@ -2389,6 +2389,9 @@ static int o2hb_region_pin(const char *region_uuid)
assert_spin_locked(&o2hb_live_lock);
list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ if (reg->hr_item_dropped)
+ continue;
+
uuid = config_item_name(&reg->hr_item);
/* local heartbeat */
@@ -2439,6 +2442,9 @@ static void o2hb_region_unpin(const char *region_uuid)
assert_spin_locked(&o2hb_live_lock);
list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ if (reg->hr_item_dropped)
+ continue;
+
uuid = config_item_name(&reg->hr_item);
if (region_uuid) {
if (strcmp(region_uuid, uuid))
@@ -2654,6 +2660,9 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions)
p = region_uuids;
list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ if (reg->hr_item_dropped)
+ continue;
+
mlog(0, "Region: %s\n", config_item_name(&reg->hr_item));
if (numregs < max_regions) {
memcpy(p, config_item_name(&reg->hr_item),
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index c19897d0fe14..1ec141e758d7 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -264,7 +264,7 @@ void o2quo_hb_still_up(u8 node)
/* This is analogous to hb_up. as a node's connection comes up we delay the
* quorum decision until we see it heartbeating. the hold will be droped in
* hb_up or hb_down. it might be perpetuated by con_err until hb_down. if
- * it's already heartbeating we we might be dropping a hold that conn_up got.
+ * it's already heartbeating we might be dropping a hold that conn_up got.
* */
void o2quo_conn_up(u8 node)
{
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index aa88bd8bcedc..d644dc611425 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -406,6 +406,9 @@ static void sc_kref_release(struct kref *kref)
sc->sc_node = NULL;
o2net_debug_del_sc(sc);
+
+ if (sc->sc_page)
+ __free_page(sc->sc_page);
kfree(sc);
}
@@ -630,19 +633,19 @@ static void o2net_state_change(struct sock *sk)
state_change = sc->sc_state_change;
switch(sk->sk_state) {
- /* ignore connecting sockets as they make progress */
- case TCP_SYN_SENT:
- case TCP_SYN_RECV:
- break;
- case TCP_ESTABLISHED:
- o2net_sc_queue_work(sc, &sc->sc_connect_work);
- break;
- default:
- printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT
- " shutdown, state %d\n",
- SC_NODEF_ARGS(sc), sk->sk_state);
- o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
- break;
+ /* ignore connecting sockets as they make progress */
+ case TCP_SYN_SENT:
+ case TCP_SYN_RECV:
+ break;
+ case TCP_ESTABLISHED:
+ o2net_sc_queue_work(sc, &sc->sc_connect_work);
+ break;
+ default:
+ printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT
+ " shutdown, state %d\n",
+ SC_NODEF_ARGS(sc), sk->sk_state);
+ o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
+ break;
}
out:
read_unlock(&sk->sk_callback_lock);
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 975810b98492..47e67c2d228f 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -178,6 +178,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
lock->ml.node);
}
} else {
+ status = DLM_NORMAL;
dlm_lock_get(lock);
list_add_tail(&lock->list, &res->blocked);
kick_thread = 1;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index e68588e6b1e8..773bd32bfd8c 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -55,9 +55,6 @@
static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node);
static int dlm_recovery_thread(void *data);
-void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
-int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
-void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
static int dlm_do_recovery(struct dlm_ctxt *dlm);
static int dlm_pick_recovery_master(struct dlm_ctxt *dlm);
@@ -789,7 +786,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
u8 dead_node)
{
struct dlm_lock_request lr;
- enum dlm_status ret;
+ int ret;
mlog(0, "\n");
@@ -802,7 +799,6 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
lr.dead_node = dead_node;
// send message
- ret = DLM_NOLOCKMGR;
ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
&lr, sizeof(lr), request_from, NULL);
@@ -2696,6 +2692,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
dlm->name, br->node_idx, br->dead_node,
dlm->reco.dead_node, dlm->reco.new_master);
spin_unlock(&dlm->spinlock);
+ dlm_put(dlm);
return -EAGAIN;
}
spin_unlock(&dlm->spinlock);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a38714f1d92..41000f223ca4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2646,17 +2646,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
goto out;
}
- if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
- ret = -EINVAL;
- if (!ret && offset > inode->i_sb->s_maxbytes)
- ret = -EINVAL;
- if (ret)
- goto out;
-
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
+ offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
mutex_unlock(&inode->i_mutex);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index a3385b63ff5e..96f9ac237e86 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -200,7 +200,6 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
{
- atomic_set(&osb->needs_checkpoint, 1);
wake_up(&osb->checkpoint_event);
}
@@ -538,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
- ocfs2_quota_trans_credits(sb);
+ ocfs2_quota_trans_credits(sb) + bits_wanted;
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b4a5cdf9dbc5..be3f8676a438 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -522,7 +522,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
fe->i_last_eb_blk = 0;
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
- le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
+ fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
fe->i_atime = fe->i_ctime = fe->i_mtime =
cpu_to_le64(CURRENT_TIME.tv_sec);
fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
@@ -773,7 +773,7 @@ static int ocfs2_remote_dentry_delete(struct dentry *dentry)
return ret;
}
-static inline int inode_is_unlinkable(struct inode *inode)
+static inline int ocfs2_inode_is_unlinkable(struct inode *inode)
{
if (S_ISDIR(inode->i_mode)) {
if (inode->i_nlink == 2)
@@ -791,6 +791,7 @@ static int ocfs2_unlink(struct inode *dir,
{
int status;
int child_locked = 0;
+ bool is_unlinkable = false;
struct inode *inode = dentry->d_inode;
struct inode *orphan_dir = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
@@ -865,7 +866,7 @@ static int ocfs2_unlink(struct inode *dir,
goto leave;
}
- if (inode_is_unlinkable(inode)) {
+ if (ocfs2_inode_is_unlinkable(inode)) {
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
OCFS2_I(inode)->ip_blkno,
orphan_name, &orphan_insert);
@@ -873,6 +874,7 @@ static int ocfs2_unlink(struct inode *dir,
mlog_errno(status);
goto leave;
}
+ is_unlinkable = true;
}
handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
@@ -892,15 +894,6 @@ static int ocfs2_unlink(struct inode *dir,
fe = (struct ocfs2_dinode *) fe_bh->b_data;
- if (inode_is_unlinkable(inode)) {
- status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name,
- &orphan_insert, orphan_dir);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
- }
-
/* delete the name from the parent dir */
status = ocfs2_delete_entry(handle, dir, &lookup);
if (status < 0) {
@@ -923,6 +916,14 @@ static int ocfs2_unlink(struct inode *dir,
mlog_errno(status);
if (S_ISDIR(inode->i_mode))
inc_nlink(dir);
+ goto leave;
+ }
+
+ if (is_unlinkable) {
+ status = ocfs2_orphan_add(osb, handle, inode, fe_bh,
+ orphan_name, &orphan_insert, orphan_dir);
+ if (status < 0)
+ mlog_errno(status);
}
leave:
@@ -2012,6 +2013,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
goto leave;
}
+ /*
+ * We're going to journal the change of i_flags and i_orphaned_slot.
+ * It's safe anyway, though some callers may duplicate the journaling.
+ * Journaling within the func just make the logic look more
+ * straightforward.
+ */
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(inode),
+ fe_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
/* we're a cluster, and nlink can change on disk from
* underneath us... */
orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
@@ -2026,25 +2042,10 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
orphan_dir_bh, lookup);
if (status < 0) {
mlog_errno(status);
- goto leave;
- }
-
- /*
- * We're going to journal the change of i_flags and i_orphaned_slot.
- * It's safe anyway, though some callers may duplicate the journaling.
- * Journaling within the func just make the logic look more
- * straightforward.
- */
- status = ocfs2_journal_access_di(handle,
- INODE_CACHE(inode),
- fe_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
+ goto rollback;
}
- le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
+ fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
/* Record which orphan dir our inode now resides
@@ -2057,11 +2058,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
osb->slot_num);
+rollback:
+ if (status < 0) {
+ if (S_ISDIR(inode->i_mode))
+ ocfs2_add_links_count(orphan_fe, -1);
+ set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
+ }
+
leave:
brelse(orphan_dir_bh);
- if (status)
- mlog_errno(status);
return status;
}
@@ -2434,7 +2440,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
}
di = (struct ocfs2_dinode *)di_bh->b_data;
- le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
+ di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL);
di->i_orphaned_slot = 0;
set_nlink(inode, 1);
ocfs2_set_links_count(di, inode->i_nlink);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d355e6e36b36..3a903470c794 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -347,7 +347,6 @@ struct ocfs2_super
struct task_struct *recovery_thread_task;
int disable_recovery;
wait_queue_head_t checkpoint_event;
- atomic_t needs_checkpoint;
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index b7e74b580c0f..5397c07ce608 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1422,7 +1422,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
int status;
/* there is a really tiny chance the journal calls could fail,
* but we wouldn't want inconsistent blocks in *any* case. */
- u64 fe_ptr, bg_ptr, prev_bg_ptr;
+ u64 bg_ptr, prev_bg_ptr;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
@@ -1437,51 +1437,44 @@ static int ocfs2_relink_block_group(handle_t *handle,
(unsigned long long)le64_to_cpu(bg->bg_blkno),
(unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
- fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
bg_ptr = le64_to_cpu(bg->bg_next_group);
prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
prev_bg_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto out_rollback;
- }
+ if (status < 0)
+ goto out;
prev_bg->bg_next_group = bg->bg_next_group;
ocfs2_journal_dirty(handle, prev_bg_bh);
status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto out_rollback;
- }
+ if (status < 0)
+ goto out_rollback_prev_bg;
bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
ocfs2_journal_dirty(handle, bg_bh);
status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto out_rollback;
- }
+ if (status < 0)
+ goto out_rollback_bg;
fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
ocfs2_journal_dirty(handle, fe_bh);
-out_rollback:
- if (status < 0) {
- fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
- bg->bg_next_group = cpu_to_le64(bg_ptr);
- prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
- }
-
- if (status)
+out:
+ if (status < 0)
mlog_errno(status);
return status;
+
+out_rollback_bg:
+ bg->bg_next_group = cpu_to_le64(bg_ptr);
+out_rollback_prev_bg:
+ prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
+ goto out;
}
static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 01b85165552b..854d80955bf8 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -286,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
spin_unlock(&osb->osb_lock);
out += snprintf(buf + out, len - out,
- "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
+ "%10s => Pid: %d Interval: %lu\n", "Commit",
(osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
- osb->osb_commit_interval,
- atomic_read(&osb->needs_checkpoint));
+ osb->osb_commit_interval);
out += snprintf(buf + out, len - out,
"%10s => State: %d TxnId: %lu NumTxns: %d\n",
@@ -2154,7 +2153,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
init_waitqueue_head(&osb->checkpoint_event);
- atomic_set(&osb->needs_checkpoint, 0);
osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 2e3ea308c144..317ef0abccbb 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2751,7 +2751,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
{
int ret;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
struct ocfs2_xa_loc loc;
if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
@@ -2759,13 +2758,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
down_write(&oi->ip_alloc_sem);
if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
- if (!ocfs2_xattr_has_space_inline(inode, di)) {
- ret = -ENOSPC;
- goto out;
- }
- }
-
- if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
if (ret) {
if (ret != -ENOSPC)
@@ -6499,6 +6491,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
diff --git a/fs/open.c b/fs/open.c
index 8c741002f947..fca72c4d3f17 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -840,11 +840,15 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
if (flags & __O_SYNC)
flags |= O_DSYNC;
- /*
- * If we have O_PATH in the open flag. Then we
- * cannot have anything other than the below set of flags
- */
- if (flags & O_PATH) {
+ if (flags & O_TMPFILE) {
+ if (!(flags & O_CREAT))
+ return -EINVAL;
+ acc_mode = MAY_OPEN | ACC_MODE(flags);
+ } else if (flags & O_PATH) {
+ /*
+ * If we have O_PATH in the open flag. Then we
+ * cannot have anything other than the below set of flags
+ */
flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
acc_mode = 0;
} else {
@@ -876,7 +880,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
lookup_flags |= LOOKUP_DIRECTORY;
if (!(flags & O_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
- return lookup_flags;
+ op->lookup_flags = lookup_flags;
+ return 0;
}
/**
@@ -893,8 +898,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
struct file *file_open_name(struct filename *name, int flags, umode_t mode)
{
struct open_flags op;
- int lookup = build_open_flags(flags, mode, &op);
- return do_filp_open(AT_FDCWD, name, &op, lookup);
+ int err = build_open_flags(flags, mode, &op);
+ return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op);
}
/**
@@ -919,37 +924,43 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
const char *filename, int flags)
{
struct open_flags op;
- int lookup = build_open_flags(flags, 0, &op);
+ int err = build_open_flags(flags, 0, &op);
+ if (err)
+ return ERR_PTR(err);
if (flags & O_CREAT)
return ERR_PTR(-EINVAL);
if (!filename && (flags & O_DIRECTORY))
if (!dentry->d_inode->i_op->lookup)
return ERR_PTR(-ENOTDIR);
- return do_file_open_root(dentry, mnt, filename, &op, lookup);
+ return do_file_open_root(dentry, mnt, filename, &op);
}
EXPORT_SYMBOL(file_open_root);
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
- int lookup = build_open_flags(flags, mode, &op);
- struct filename *tmp = getname(filename);
- int fd = PTR_ERR(tmp);
-
- if (!IS_ERR(tmp)) {
- fd = get_unused_fd_flags(flags);
- if (fd >= 0) {
- struct file *f = do_filp_open(dfd, tmp, &op, lookup);
- if (IS_ERR(f)) {
- put_unused_fd(fd);
- fd = PTR_ERR(f);
- } else {
- fsnotify_open(f);
- fd_install(fd, f);
- }
+ int fd = build_open_flags(flags, mode, &op);
+ struct filename *tmp;
+
+ if (fd)
+ return fd;
+
+ tmp = getname(filename);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ fd = get_unused_fd_flags(flags);
+ if (fd >= 0) {
+ struct file *f = do_filp_open(dfd, tmp, &op);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else {
+ fsnotify_open(f);
+ fd_install(fd, f);
}
- putname(tmp);
}
+ putname(tmp);
return fd;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0016350ad95e..1485e38daaa3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1686,41 +1686,29 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
instantiate_t instantiate, struct task_struct *task, const void *ptr)
{
struct dentry *child, *dir = file->f_path.dentry;
+ struct qstr qname = QSTR_INIT(name, len);
struct inode *inode;
- struct qstr qname;
- ino_t ino = 0;
- unsigned type = DT_UNKNOWN;
+ unsigned type;
+ ino_t ino;
- qname.name = name;
- qname.len = len;
- qname.hash = full_name_hash(name, len);
-
- child = d_lookup(dir, &qname);
+ child = d_hash_and_lookup(dir, &qname);
if (!child) {
- struct dentry *new;
- new = d_alloc(dir, &qname);
- if (new) {
- child = instantiate(dir->d_inode, new, task, ptr);
- if (child)
- dput(new);
- else
- child = new;
+ child = d_alloc(dir, &qname);
+ if (!child)
+ goto end_instantiate;
+ if (instantiate(dir->d_inode, child, task, ptr) < 0) {
+ dput(child);
+ goto end_instantiate;
}
}
- if (!child || IS_ERR(child) || !child->d_inode)
- goto end_instantiate;
inode = child->d_inode;
- if (inode) {
- ino = inode->i_ino;
- type = inode->i_mode >> 12;
- }
+ ino = inode->i_ino;
+ type = inode->i_mode >> 12;
dput(child);
-end_instantiate:
- if (!ino)
- ino = find_inode_number(dir, &qname);
- if (!ino)
- ino = 1;
return dir_emit(ctx, name, len, ino, type);
+
+end_instantiate:
+ return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
}
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -1846,7 +1834,7 @@ struct map_files_info {
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
-static struct dentry *
+static int
proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
@@ -1856,7 +1844,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
- return ERR_PTR(-ENOENT);
+ return -ENOENT;
ei = PROC_I(inode);
ei->op.proc_get_link = proc_map_files_get_link;
@@ -1873,7 +1861,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
d_set_d_op(dentry, &tid_map_files_dentry_operations);
d_add(dentry, inode);
- return NULL;
+ return 0;
}
static struct dentry *proc_map_files_lookup(struct inode *dir,
@@ -1882,23 +1870,23 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
struct task_struct *task;
- struct dentry *result;
+ int result;
struct mm_struct *mm;
- result = ERR_PTR(-EPERM);
+ result = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out;
- result = ERR_PTR(-ENOENT);
+ result = -ENOENT;
task = get_proc_task(dir);
if (!task)
goto out;
- result = ERR_PTR(-EACCES);
+ result = -EACCES;
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
- result = ERR_PTR(-ENOENT);
+ result = -ENOENT;
if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
goto out_put_task;
@@ -1921,7 +1909,7 @@ out_no_vma:
out_put_task:
put_task_struct(task);
out:
- return result;
+ return ERR_PTR(result);
}
static const struct inode_operations proc_map_files_inode_operations = {
@@ -2135,13 +2123,12 @@ static const struct file_operations proc_timers_operations = {
};
#endif /* CONFIG_CHECKPOINT_RESTORE */
-static struct dentry *proc_pident_instantiate(struct inode *dir,
+static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
const struct pid_entry *p = ptr;
struct inode *inode;
struct proc_inode *ei;
- struct dentry *error = ERR_PTR(-ENOENT);
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
@@ -2160,9 +2147,9 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *proc_pident_lookup(struct inode *dir,
@@ -2170,11 +2157,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
const struct pid_entry *ents,
unsigned int nents)
{
- struct dentry *error;
+ int error;
struct task_struct *task = get_proc_task(dir);
const struct pid_entry *p, *last;
- error = ERR_PTR(-ENOENT);
+ error = -ENOENT;
if (!task)
goto out_no_task;
@@ -2197,7 +2184,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
out:
put_task_struct(task);
out_no_task:
- return error;
+ return ERR_PTR(error);
}
static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
@@ -2780,11 +2767,10 @@ void proc_flush_task(struct task_struct *task)
}
}
-static struct dentry *proc_pid_instantiate(struct inode *dir,
- struct dentry * dentry,
- struct task_struct *task, const void *ptr)
+static int proc_pid_instantiate(struct inode *dir,
+ struct dentry * dentry,
+ struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
struct inode *inode;
inode = proc_pid_make_inode(dir->i_sb, task);
@@ -2804,14 +2790,14 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- struct dentry *result = NULL;
+ int result = 0;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
@@ -2832,7 +2818,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign
result = proc_pid_instantiate(dir, dentry, task, NULL);
put_task_struct(task);
out:
- return result;
+ return ERR_PTR(result);
}
/*
@@ -2884,21 +2870,21 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns;
+ struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info;
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
return 0;
if (pos == TGID_OFFSET - 1) {
- if (!proc_fill_cache(file, ctx, "self", 4, NULL, NULL, NULL))
+ struct inode *inode = ns->proc_self->d_inode;
+ if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
return 0;
iter.tgid = 0;
} else {
iter.tgid = pos - TGID_OFFSET;
}
iter.task = NULL;
- ns = file->f_dentry->d_sb->s_fs_info;
for (iter = next_tgid(ns, iter);
iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
@@ -3027,10 +3013,9 @@ static const struct inode_operations proc_tid_base_inode_operations = {
.setattr = proc_setattr,
};
-static struct dentry *proc_task_instantiate(struct inode *dir,
+static int proc_task_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
struct inode *inode;
inode = proc_pid_make_inode(dir->i_sb, task);
@@ -3049,14 +3034,14 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- struct dentry *result = ERR_PTR(-ENOENT);
+ int result = -ENOENT;
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
@@ -3086,7 +3071,7 @@ out_drop_task:
out:
put_task_struct(leader);
out_no_task:
- return result;
+ return ERR_PTR(result);
}
/*
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 1441f143c43b..75f2890abbd8 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -167,11 +167,10 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
return ret;
}
-static struct dentry *
+static int
proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
unsigned fd = (unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
@@ -194,9 +193,9 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -204,7 +203,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
instantiate_t instantiate)
{
struct task_struct *task = get_proc_task(dir);
- struct dentry *result = ERR_PTR(-ENOENT);
+ int result = -ENOENT;
unsigned fd = name_to_int(dentry);
if (!task)
@@ -216,7 +215,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
out:
put_task_struct(task);
out_no_task:
- return result;
+ return ERR_PTR(result);
}
static int proc_readfd_common(struct file *file, struct dir_context *ctx,
@@ -300,11 +299,10 @@ const struct inode_operations proc_fd_inode_operations = {
.setattr = proc_setattr,
};
-static struct dentry *
+static int
proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
unsigned fd = (unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
@@ -324,9 +322,9 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 4eae2e149f31..651d09a11dde 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -170,7 +170,7 @@ extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned
extern loff_t mem_lseek(struct file *, loff_t, int);
/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+typedef int instantiate_t(struct inode *, struct dentry *,
struct task_struct *, const void *);
extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int,
instantiate_t, struct task_struct *, const void *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0a22194e5d58..06ea155e1a59 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
prpsinfo.pr_zomb = 0;
strcpy(prpsinfo.pr_fname, "vmlinux");
- strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
+ strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
nhdr->p_filesz += notesize(&notes[1]);
bufp = storenote(&notes[1], bufp);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index f6abbbbfad8a..49a7fff2e83a 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -187,13 +187,12 @@ static const struct inode_operations proc_ns_link_inode_operations = {
.setattr = proc_setattr,
};
-static struct dentry *proc_ns_instantiate(struct inode *dir,
+static int proc_ns_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
const struct proc_ns_operations *ns_ops = ptr;
struct inode *inode;
struct proc_inode *ei;
- struct dentry *error = ERR_PTR(-ENOENT);
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
@@ -208,9 +207,9 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -248,12 +247,12 @@ const struct file_operations proc_ns_dir_operations = {
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
- struct dentry *error;
+ int error;
struct task_struct *task = get_proc_task(dir);
const struct proc_ns_operations **entry, **last;
unsigned int len = dentry->d_name.len;
- error = ERR_PTR(-ENOENT);
+ error = -ENOENT;
if (!task)
goto out_no_task;
@@ -272,7 +271,7 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
out:
put_task_struct(task);
out_no_task:
- return error;
+ return ERR_PTR(error);
}
const struct inode_operations proc_ns_dir_inode_operations = {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f3a570e7c257..71290463a1d3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -796,15 +796,16 @@ static int sysctl_is_seen(struct ctl_table_header *p)
return res;
}
-static int proc_sys_compare(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct ctl_table_header *head;
+ struct inode *inode;
+
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
/* AV: can it, indeed? */
+ inode = ACCESS_ONCE(dentry->d_inode);
if (!inode)
return 1;
if (name->len != len)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3e636d864d56..dbf61f6174f0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -688,10 +689,58 @@ const struct file_operations proc_tid_smaps_operations = {
.release = seq_release_private,
};
+/*
+ * We do not want to have constant page-shift bits sitting in
+ * pagemap entries and are about to reuse them some time soon.
+ *
+ * Here's the "migration strategy":
+ * 1. when the system boots these bits remain what they are,
+ * but a warning about future change is printed in log;
+ * 2. once anyone clears soft-dirty bits via clear_refs file,
+ * these flag is set to denote, that user is aware of the
+ * new API and those page-shift bits change their meaning.
+ * The respective warning is printed in dmesg;
+ * 3. In a couple of releases we will remove all the mentions
+ * of page-shift in pagemap entries.
+ */
+
+static bool soft_dirty_cleared __read_mostly;
+
+enum clear_refs_types {
+ CLEAR_REFS_ALL = 1,
+ CLEAR_REFS_ANON,
+ CLEAR_REFS_MAPPED,
+ CLEAR_REFS_SOFT_DIRTY,
+ CLEAR_REFS_LAST,
+};
+
+struct clear_refs_private {
+ struct vm_area_struct *vma;
+ enum clear_refs_types type;
+};
+
+static inline void clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
+{
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ /*
+ * The soft-dirty tracker uses #PF-s to catch writes
+ * to pages, so write-protect the pte as well. See the
+ * Documentation/vm/soft-dirty.txt for full description
+ * of how soft-dirty works.
+ */
+ pte_t ptent = *pte;
+ ptent = pte_wrprotect(ptent);
+ ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
+#endif
+}
+
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct vm_area_struct *vma = walk->private;
+ struct clear_refs_private *cp = walk->private;
+ struct vm_area_struct *vma = cp->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
@@ -706,6 +755,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (!pte_present(ptent))
continue;
+ if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+ clear_soft_dirty(vma, addr, pte);
+ continue;
+ }
+
page = vm_normal_page(vma, addr, ptent);
if (!page)
continue;
@@ -719,10 +773,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
-#define CLEAR_REFS_ALL 1
-#define CLEAR_REFS_ANON 2
-#define CLEAR_REFS_MAPPED 3
-
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -730,7 +780,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
char buffer[PROC_NUMBUF];
struct mm_struct *mm;
struct vm_area_struct *vma;
- int type;
+ enum clear_refs_types type;
+ int itype;
int rv;
memset(buffer, 0, sizeof(buffer));
@@ -738,23 +789,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- rv = kstrtoint(strstrip(buffer), 10, &type);
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
if (rv < 0)
return rv;
- if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
+ type = (enum clear_refs_types)itype;
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
return -EINVAL;
+
+ if (type == CLEAR_REFS_SOFT_DIRTY) {
+ soft_dirty_cleared = true;
+ pr_warn_once("The pagemap bits 55-60 has changed their meaning! "
+ "See the linux/Documentation/vm/pagemap.txt for details.\n");
+ }
+
task = get_proc_task(file_inode(file));
if (!task)
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ struct clear_refs_private cp = {
+ .type = type,
+ };
struct mm_walk clear_refs_walk = {
.pmd_entry = clear_refs_pte_range,
.mm = mm,
+ .private = &cp,
};
down_read(&mm->mmap_sem);
+ if (type == CLEAR_REFS_SOFT_DIRTY)
+ mmu_notifier_invalidate_range_start(mm, 0, -1);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
- clear_refs_walk.private = vma;
+ cp.vma = vma;
if (is_vm_hugetlb_page(vma))
continue;
/*
@@ -773,6 +838,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
walk_page_range(vma->vm_start, vma->vm_end,
&clear_refs_walk);
}
+ if (type == CLEAR_REFS_SOFT_DIRTY)
+ mmu_notifier_invalidate_range_end(mm, 0, -1);
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
mmput(mm);
@@ -794,6 +861,7 @@ typedef struct {
struct pagemapread {
int pos, len;
pagemap_entry_t *buffer;
+ bool v2;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -807,14 +875,17 @@ struct pagemapread {
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+/* in "new" pagemap pshift bits are occupied with more status bits */
+#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT))
+#define __PM_SOFT_DIRTY (1LL)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
#define PM_FILE PM_STATUS(1LL)
-#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
+#define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0)
#define PM_END_OF_BUFFER 1
static inline pagemap_entry_t make_pme(u64 val)
@@ -837,7 +908,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
struct pagemapread *pm = walk->private;
unsigned long addr;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
for (addr = start; addr < end; addr += PAGE_SIZE) {
err = add_to_pagemap(addr, &pme, pm);
@@ -847,11 +918,12 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
return err;
}
-static void pte_to_pagemap_entry(pagemap_entry_t *pme,
+static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
u64 frame, flags;
struct page *page = NULL;
+ int flags2 = 0;
if (pte_present(pte)) {
frame = pte_pfn(pte);
@@ -866,19 +938,21 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme,
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
} else {
- *pme = make_pme(PM_NOT_PRESENT);
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2));
return;
}
if (page && !PageAnon(page))
flags |= PM_FILE;
+ if (pte_soft_dirty(pte))
+ flags2 |= __PM_SOFT_DIRTY;
- *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags);
+ *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
- pmd_t pmd, int offset)
+static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+ pmd_t pmd, int offset, int pmd_flags2)
{
/*
* Currently pmd for thp is always present because thp can not be
@@ -887,13 +961,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
*/
if (pmd_present(pmd))
*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
else
- *pme = make_pme(PM_NOT_PRESENT);
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2));
}
#else
-static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
- pmd_t pmd, int offset)
+static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+ pmd_t pmd, int offset, int pmd_flags2)
{
}
#endif
@@ -905,17 +979,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct pagemapread *pm = walk->private;
pte_t *pte;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
+ int pmd_flags2;
+
+ pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0);
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
offset = (addr & ~PAGEMAP_WALK_MASK) >>
PAGE_SHIFT;
- thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
+ thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
@@ -932,7 +1009,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
* and need a new, higher one */
if (vma && (addr >= vma->vm_end)) {
vma = find_vma(walk->mm, addr);
- pme = make_pme(PM_NOT_PRESENT);
+ pme = make_pme(PM_NOT_PRESENT(pm->v2));
}
/* check that 'vma' actually covers this address,
@@ -940,7 +1017,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
- pte_to_pagemap_entry(&pme, vma, addr, *pte);
+ pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
@@ -955,14 +1032,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
}
#ifdef CONFIG_HUGETLB_PAGE
-static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
+static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
pte_t pte, int offset)
{
if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ | PM_STATUS2(pm->v2, 0) | PM_PRESENT);
else
- *pme = make_pme(PM_NOT_PRESENT);
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2));
}
/* This function walks within one hugetlb entry in the single call */
@@ -976,7 +1053,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
- huge_pte_to_pagemap_entry(&pme, *pte, offset);
+ huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
@@ -1038,6 +1115,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!count)
goto out_task;
+ pm.v2 = soft_dirty_cleared;
pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
ret = -ENOMEM;
@@ -1110,9 +1188,18 @@ out:
return ret;
}
+static int pagemap_open(struct inode *inode, struct file *file)
+{
+ pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
+ "to stop being page-shift some time soon. See the "
+ "linux/Documentation/vm/pagemap.txt for details.\n");
+ return 0;
+}
+
const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
+ .open = pagemap_open,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 9610ac772d7e..061894625903 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v)
for_each_possible_cpu(i)
idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
- do_posix_clock_monotonic_gettime(&uptime);
- monotonic_to_bootbased(&uptime);
+ get_monotonic_boottime(&uptime);
nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 17f7e080d7ff..28503172f2e4 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/crash_dump.h>
#include <linux/list.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include "internal.h"
@@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list);
/* Stores the pointer to the buffer containing kernel elf core headers. */
static char *elfcorebuf;
static size_t elfcorebuf_sz;
+static size_t elfcorebuf_sz_orig;
+
+static char *elfnotes_buf;
+static size_t elfnotes_sz;
/* Total size of vmcore file. */
static u64 vmcore_size;
@@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
return read;
}
-/* Maps vmcore file offset to respective physical address in memroy. */
-static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list,
- struct vmcore **m_ptr)
-{
- struct vmcore *m;
- u64 paddr;
-
- list_for_each_entry(m, vc_list, list) {
- u64 start, end;
- start = m->offset;
- end = m->offset + m->size - 1;
- if (offset >= start && offset <= end) {
- paddr = m->paddr + offset - start;
- *m_ptr = m;
- return paddr;
- }
- }
- *m_ptr = NULL;
- return 0;
-}
-
/* Read from the ELF header and then the crash dump. On error, negative value is
* returned otherwise number of bytes read are returned.
*/
@@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
{
ssize_t acc = 0, tmp;
size_t tsz;
- u64 start, nr_bytes;
- struct vmcore *curr_m = NULL;
+ u64 start;
+ struct vmcore *m = NULL;
if (buflen == 0 || *fpos >= vmcore_size)
return 0;
@@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
/* Read ELF core header */
if (*fpos < elfcorebuf_sz) {
- tsz = elfcorebuf_sz - *fpos;
- if (buflen < tsz)
- tsz = buflen;
+ tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
return -EFAULT;
buflen -= tsz;
@@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
return acc;
}
- start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
- if (!curr_m)
- return -EINVAL;
-
- while (buflen) {
- tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK));
+ /* Read Elf note segment */
+ if (*fpos < elfcorebuf_sz + elfnotes_sz) {
+ void *kaddr;
- /* Calculate left bytes in current memory segment. */
- nr_bytes = (curr_m->size - (start - curr_m->paddr));
- if (tsz > nr_bytes)
- tsz = nr_bytes;
-
- tmp = read_from_oldmem(buffer, tsz, &start, 1);
- if (tmp < 0)
- return tmp;
+ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
+ kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
+ if (copy_to_user(buffer, kaddr, tsz))
+ return -EFAULT;
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
acc += tsz;
- if (start >= (curr_m->paddr + curr_m->size)) {
- if (curr_m->list.next == &vmcore_list)
- return acc; /*EOF*/
- curr_m = list_entry(curr_m->list.next,
- struct vmcore, list);
- start = curr_m->paddr;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
+ }
+
+ list_for_each_entry(m, &vmcore_list, list) {
+ if (*fpos < m->offset + m->size) {
+ tsz = min_t(size_t, m->offset + m->size - *fpos, buflen);
+ start = m->paddr + *fpos - m->offset;
+ tmp = read_from_oldmem(buffer, tsz, &start, 1);
+ if (tmp < 0)
+ return tmp;
+ buflen -= tsz;
+ *fpos += tsz;
+ buffer += tsz;
+ acc += tsz;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
}
}
+
return acc;
}
+/**
+ * alloc_elfnotes_buf - allocate buffer for ELF note segment in
+ * vmalloc memory
+ *
+ * @notes_sz: size of buffer
+ *
+ * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
+ * the buffer to user-space by means of remap_vmalloc_range().
+ *
+ * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
+ * disabled and there's no need to allow users to mmap the buffer.
+ */
+static inline char *alloc_elfnotes_buf(size_t notes_sz)
+{
+#ifdef CONFIG_MMU
+ return vmalloc_user(notes_sz);
+#else
+ return vzalloc(notes_sz);
+#endif
+}
+
+/*
+ * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is
+ * essential for mmap_vmcore() in order to map physically
+ * non-contiguous objects (ELF header, ELF note segment and memory
+ * regions in the 1st kernel pointed to by PT_LOAD entries) into
+ * virtually contiguous user-space in ELF layout.
+ */
+#ifdef CONFIG_MMU
+static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
+{
+ size_t size = vma->vm_end - vma->vm_start;
+ u64 start, end, len, tsz;
+ struct vmcore *m;
+
+ start = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ end = start + size;
+
+ if (size > vmcore_size || end > vmcore_size)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+ vma->vm_flags |= VM_MIXEDMAP;
+
+ len = 0;
+
+ if (start < elfcorebuf_sz) {
+ u64 pfn;
+
+ tsz = min(elfcorebuf_sz - (size_t)start, size);
+ pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT;
+ if (remap_pfn_range(vma, vma->vm_start, pfn, tsz,
+ vma->vm_page_prot))
+ return -EAGAIN;
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ if (size == 0)
+ return 0;
+ }
+
+ if (start < elfcorebuf_sz + elfnotes_sz) {
+ void *kaddr;
+
+ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
+ kaddr = elfnotes_buf + start - elfcorebuf_sz;
+ if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
+ kaddr, tsz))
+ goto fail;
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ if (size == 0)
+ return 0;
+ }
+
+ list_for_each_entry(m, &vmcore_list, list) {
+ if (start < m->offset + m->size) {
+ u64 paddr = 0;
+
+ tsz = min_t(size_t, m->offset + m->size - start, size);
+ paddr = m->paddr + start - m->offset;
+ if (remap_pfn_range(vma, vma->vm_start + len,
+ paddr >> PAGE_SHIFT, tsz,
+ vma->vm_page_prot))
+ goto fail;
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ if (size == 0)
+ return 0;
+ }
+ }
+
+ return 0;
+fail:
+ do_munmap(vma->vm_mm, vma->vm_start, len);
+ return -EAGAIN;
+}
+#else
+static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
+{
+ return -ENOSYS;
+}
+#endif
+
static const struct file_operations proc_vmcore_operations = {
.read = read_vmcore,
.llseek = default_llseek,
+ .mmap = mmap_vmcore,
};
static struct vmcore* __init get_new_element(void)
@@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void)
return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
}
-static u64 __init get_vmcore_size_elf64(char *elfptr)
+static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
+ struct list_head *vc_list)
{
- int i;
- u64 size;
- Elf64_Ehdr *ehdr_ptr;
- Elf64_Phdr *phdr_ptr;
-
- ehdr_ptr = (Elf64_Ehdr *)elfptr;
- phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
- size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr));
- for (i = 0; i < ehdr_ptr->e_phnum; i++) {
- size += phdr_ptr->p_memsz;
- phdr_ptr++;
- }
- return size;
-}
-
-static u64 __init get_vmcore_size_elf32(char *elfptr)
-{
- int i;
u64 size;
- Elf32_Ehdr *ehdr_ptr;
- Elf32_Phdr *phdr_ptr;
+ struct vmcore *m;
- ehdr_ptr = (Elf32_Ehdr *)elfptr;
- phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
- size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr));
- for (i = 0; i < ehdr_ptr->e_phnum; i++) {
- size += phdr_ptr->p_memsz;
- phdr_ptr++;
+ size = elfsz + elfnotesegsz;
+ list_for_each_entry(m, vc_list, list) {
+ size += m->size;
}
return size;
}
-/* Merges all the PT_NOTE headers into one. */
-static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
- struct list_head *vc_list)
+/**
+ * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry
+ *
+ * @ehdr_ptr: ELF header
+ *
+ * This function updates p_memsz member of each PT_NOTE entry in the
+ * program header table pointed to by @ehdr_ptr to real size of ELF
+ * note segment.
+ */
+static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
{
- int i, nr_ptnote=0, rc=0;
- char *tmp;
- Elf64_Ehdr *ehdr_ptr;
- Elf64_Phdr phdr, *phdr_ptr;
+ int i, rc=0;
+ Elf64_Phdr *phdr_ptr;
Elf64_Nhdr *nhdr_ptr;
- u64 phdr_sz = 0, note_off;
- ehdr_ptr = (Elf64_Ehdr *)elfptr;
- phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+ phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
- int j;
void *notes_section;
- struct vmcore *new;
u64 offset, max_sz, sz, real_sz = 0;
if (phdr_ptr->p_type != PT_NOTE)
continue;
- nr_ptnote++;
max_sz = phdr_ptr->p_memsz;
offset = phdr_ptr->p_offset;
notes_section = kmalloc(max_sz, GFP_KERNEL);
@@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
return rc;
}
nhdr_ptr = notes_section;
- for (j = 0; j < max_sz; j += sz) {
+ while (real_sz < max_sz) {
if (nhdr_ptr->n_namesz == 0)
break;
sz = sizeof(Elf64_Nhdr) +
@@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
real_sz += sz;
nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
}
-
- /* Add this contiguous chunk of notes section to vmcore list.*/
- new = get_new_element();
- if (!new) {
- kfree(notes_section);
- return -ENOMEM;
- }
- new->paddr = phdr_ptr->p_offset;
- new->size = real_sz;
- list_add_tail(&new->list, vc_list);
- phdr_sz += real_sz;
kfree(notes_section);
+ phdr_ptr->p_memsz = real_sz;
+ }
+
+ return 0;
+}
+
+/**
+ * get_note_number_and_size_elf64 - get the number of PT_NOTE program
+ * headers and sum of real size of their ELF note segment headers and
+ * data.
+ *
+ * @ehdr_ptr: ELF header
+ * @nr_ptnote: buffer for the number of PT_NOTE program headers
+ * @sz_ptnote: buffer for size of unique PT_NOTE program header
+ *
+ * This function is used to merge multiple PT_NOTE program headers
+ * into a unique single one. The resulting unique entry will have
+ * @sz_ptnote in its phdr->p_mem.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf64
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr,
+ int *nr_ptnote, u64 *sz_ptnote)
+{
+ int i;
+ Elf64_Phdr *phdr_ptr;
+
+ *nr_ptnote = *sz_ptnote = 0;
+
+ phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ *nr_ptnote += 1;
+ *sz_ptnote += phdr_ptr->p_memsz;
+ }
+
+ return 0;
+}
+
+/**
+ * copy_notes_elf64 - copy ELF note segments in a given buffer
+ *
+ * @ehdr_ptr: ELF header
+ * @notes_buf: buffer into which ELF note segments are copied
+ *
+ * This function is used to copy ELF note segment in the 1st kernel
+ * into the buffer @notes_buf in the 2nd kernel. It is assumed that
+ * size of the buffer @notes_buf is equal to or larger than sum of the
+ * real ELF note segment headers and data.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf64
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
+{
+ int i, rc=0;
+ Elf64_Phdr *phdr_ptr;
+
+ phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1);
+
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 offset;
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ offset = phdr_ptr->p_offset;
+ rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+ if (rc < 0)
+ return rc;
+ notes_buf += phdr_ptr->p_memsz;
}
+ return 0;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
+ char **notes_buf, size_t *notes_sz)
+{
+ int i, nr_ptnote=0, rc=0;
+ char *tmp;
+ Elf64_Ehdr *ehdr_ptr;
+ Elf64_Phdr phdr;
+ u64 phdr_sz = 0, note_off;
+
+ ehdr_ptr = (Elf64_Ehdr *)elfptr;
+
+ rc = update_note_header_size_elf64(ehdr_ptr);
+ if (rc < 0)
+ return rc;
+
+ rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz);
+ if (rc < 0)
+ return rc;
+
+ *notes_sz = roundup(phdr_sz, PAGE_SIZE);
+ *notes_buf = alloc_elfnotes_buf(*notes_sz);
+ if (!*notes_buf)
+ return -ENOMEM;
+
+ rc = copy_notes_elf64(ehdr_ptr, *notes_buf);
+ if (rc < 0)
+ return rc;
+
/* Prepare merged PT_NOTE program header. */
phdr.p_type = PT_NOTE;
phdr.p_flags = 0;
note_off = sizeof(Elf64_Ehdr) +
(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
- phdr.p_offset = note_off;
+ phdr.p_offset = roundup(note_off, PAGE_SIZE);
phdr.p_vaddr = phdr.p_paddr = 0;
phdr.p_filesz = phdr.p_memsz = phdr_sz;
phdr.p_align = 0;
@@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
*elfsz = *elfsz - i;
memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
+ memset(elfptr + *elfsz, 0, i);
+ *elfsz = roundup(*elfsz, PAGE_SIZE);
/* Modify e_phnum to reflect merged headers. */
ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
@@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
return 0;
}
-/* Merges all the PT_NOTE headers into one. */
-static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
- struct list_head *vc_list)
+/**
+ * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry
+ *
+ * @ehdr_ptr: ELF header
+ *
+ * This function updates p_memsz member of each PT_NOTE entry in the
+ * program header table pointed to by @ehdr_ptr to real size of ELF
+ * note segment.
+ */
+static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
{
- int i, nr_ptnote=0, rc=0;
- char *tmp;
- Elf32_Ehdr *ehdr_ptr;
- Elf32_Phdr phdr, *phdr_ptr;
+ int i, rc=0;
+ Elf32_Phdr *phdr_ptr;
Elf32_Nhdr *nhdr_ptr;
- u64 phdr_sz = 0, note_off;
- ehdr_ptr = (Elf32_Ehdr *)elfptr;
- phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+ phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
- int j;
void *notes_section;
- struct vmcore *new;
u64 offset, max_sz, sz, real_sz = 0;
if (phdr_ptr->p_type != PT_NOTE)
continue;
- nr_ptnote++;
max_sz = phdr_ptr->p_memsz;
offset = phdr_ptr->p_offset;
notes_section = kmalloc(max_sz, GFP_KERNEL);
@@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
return rc;
}
nhdr_ptr = notes_section;
- for (j = 0; j < max_sz; j += sz) {
+ while (real_sz < max_sz) {
if (nhdr_ptr->n_namesz == 0)
break;
sz = sizeof(Elf32_Nhdr) +
@@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
real_sz += sz;
nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
}
-
- /* Add this contiguous chunk of notes section to vmcore list.*/
- new = get_new_element();
- if (!new) {
- kfree(notes_section);
- return -ENOMEM;
- }
- new->paddr = phdr_ptr->p_offset;
- new->size = real_sz;
- list_add_tail(&new->list, vc_list);
- phdr_sz += real_sz;
kfree(notes_section);
+ phdr_ptr->p_memsz = real_sz;
+ }
+
+ return 0;
+}
+
+/**
+ * get_note_number_and_size_elf32 - get the number of PT_NOTE program
+ * headers and sum of real size of their ELF note segment headers and
+ * data.
+ *
+ * @ehdr_ptr: ELF header
+ * @nr_ptnote: buffer for the number of PT_NOTE program headers
+ * @sz_ptnote: buffer for size of unique PT_NOTE program header
+ *
+ * This function is used to merge multiple PT_NOTE program headers
+ * into a unique single one. The resulting unique entry will have
+ * @sz_ptnote in its phdr->p_mem.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf32
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr,
+ int *nr_ptnote, u64 *sz_ptnote)
+{
+ int i;
+ Elf32_Phdr *phdr_ptr;
+
+ *nr_ptnote = *sz_ptnote = 0;
+
+ phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ *nr_ptnote += 1;
+ *sz_ptnote += phdr_ptr->p_memsz;
+ }
+
+ return 0;
+}
+
+/**
+ * copy_notes_elf32 - copy ELF note segments in a given buffer
+ *
+ * @ehdr_ptr: ELF header
+ * @notes_buf: buffer into which ELF note segments are copied
+ *
+ * This function is used to copy ELF note segment in the 1st kernel
+ * into the buffer @notes_buf in the 2nd kernel. It is assumed that
+ * size of the buffer @notes_buf is equal to or larger than sum of the
+ * real ELF note segment headers and data.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf32
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
+{
+ int i, rc=0;
+ Elf32_Phdr *phdr_ptr;
+
+ phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1);
+
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 offset;
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ offset = phdr_ptr->p_offset;
+ rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+ if (rc < 0)
+ return rc;
+ notes_buf += phdr_ptr->p_memsz;
}
+ return 0;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
+ char **notes_buf, size_t *notes_sz)
+{
+ int i, nr_ptnote=0, rc=0;
+ char *tmp;
+ Elf32_Ehdr *ehdr_ptr;
+ Elf32_Phdr phdr;
+ u64 phdr_sz = 0, note_off;
+
+ ehdr_ptr = (Elf32_Ehdr *)elfptr;
+
+ rc = update_note_header_size_elf32(ehdr_ptr);
+ if (rc < 0)
+ return rc;
+
+ rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz);
+ if (rc < 0)
+ return rc;
+
+ *notes_sz = roundup(phdr_sz, PAGE_SIZE);
+ *notes_buf = alloc_elfnotes_buf(*notes_sz);
+ if (!*notes_buf)
+ return -ENOMEM;
+
+ rc = copy_notes_elf32(ehdr_ptr, *notes_buf);
+ if (rc < 0)
+ return rc;
+
/* Prepare merged PT_NOTE program header. */
phdr.p_type = PT_NOTE;
phdr.p_flags = 0;
note_off = sizeof(Elf32_Ehdr) +
(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
- phdr.p_offset = note_off;
+ phdr.p_offset = roundup(note_off, PAGE_SIZE);
phdr.p_vaddr = phdr.p_paddr = 0;
phdr.p_filesz = phdr.p_memsz = phdr_sz;
phdr.p_align = 0;
@@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
*elfsz = *elfsz - i;
memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
+ memset(elfptr + *elfsz, 0, i);
+ *elfsz = roundup(*elfsz, PAGE_SIZE);
/* Modify e_phnum to reflect merged headers. */
ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
@@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
* the new offset fields of exported program headers. */
static int __init process_ptload_program_headers_elf64(char *elfptr,
size_t elfsz,
+ size_t elfnotes_sz,
struct list_head *vc_list)
{
int i;
@@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr,
ehdr_ptr = (Elf64_Ehdr *)elfptr;
phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
- /* First program header is PT_NOTE header. */
- vmcore_off = sizeof(Elf64_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) +
- phdr_ptr->p_memsz; /* Note sections */
+ /* Skip Elf header, program headers and Elf note segment. */
+ vmcore_off = elfsz + elfnotes_sz;
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 paddr, start, end, size;
+
if (phdr_ptr->p_type != PT_LOAD)
continue;
+ paddr = phdr_ptr->p_offset;
+ start = rounddown(paddr, PAGE_SIZE);
+ end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
+ size = end - start;
+
/* Add this contiguous chunk of memory to vmcore list.*/
new = get_new_element();
if (!new)
return -ENOMEM;
- new->paddr = phdr_ptr->p_offset;
- new->size = phdr_ptr->p_memsz;
+ new->paddr = start;
+ new->size = size;
list_add_tail(&new->list, vc_list);
/* Update the program header offset. */
- phdr_ptr->p_offset = vmcore_off;
- vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+ phdr_ptr->p_offset = vmcore_off + (paddr - start);
+ vmcore_off = vmcore_off + size;
}
return 0;
}
static int __init process_ptload_program_headers_elf32(char *elfptr,
size_t elfsz,
+ size_t elfnotes_sz,
struct list_head *vc_list)
{
int i;
@@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
ehdr_ptr = (Elf32_Ehdr *)elfptr;
phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
- /* First program header is PT_NOTE header. */
- vmcore_off = sizeof(Elf32_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) +
- phdr_ptr->p_memsz; /* Note sections */
+ /* Skip Elf header, program headers and Elf note segment. */
+ vmcore_off = elfsz + elfnotes_sz;
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 paddr, start, end, size;
+
if (phdr_ptr->p_type != PT_LOAD)
continue;
+ paddr = phdr_ptr->p_offset;
+ start = rounddown(paddr, PAGE_SIZE);
+ end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
+ size = end - start;
+
/* Add this contiguous chunk of memory to vmcore list.*/
new = get_new_element();
if (!new)
return -ENOMEM;
- new->paddr = phdr_ptr->p_offset;
- new->size = phdr_ptr->p_memsz;
+ new->paddr = start;
+ new->size = size;
list_add_tail(&new->list, vc_list);
/* Update the program header offset */
- phdr_ptr->p_offset = vmcore_off;
- vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+ phdr_ptr->p_offset = vmcore_off + (paddr - start);
+ vmcore_off = vmcore_off + size;
}
return 0;
}
/* Sets offset fields of vmcore elements. */
-static void __init set_vmcore_list_offsets_elf64(char *elfptr,
- struct list_head *vc_list)
+static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
+ struct list_head *vc_list)
{
loff_t vmcore_off;
- Elf64_Ehdr *ehdr_ptr;
struct vmcore *m;
- ehdr_ptr = (Elf64_Ehdr *)elfptr;
-
- /* Skip Elf header and program headers. */
- vmcore_off = sizeof(Elf64_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr);
+ /* Skip Elf header, program headers and Elf note segment. */
+ vmcore_off = elfsz + elfnotes_sz;
list_for_each_entry(m, vc_list, list) {
m->offset = vmcore_off;
@@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr,
}
}
-/* Sets offset fields of vmcore elements. */
-static void __init set_vmcore_list_offsets_elf32(char *elfptr,
- struct list_head *vc_list)
+static void free_elfcorebuf(void)
{
- loff_t vmcore_off;
- Elf32_Ehdr *ehdr_ptr;
- struct vmcore *m;
-
- ehdr_ptr = (Elf32_Ehdr *)elfptr;
-
- /* Skip Elf header and program headers. */
- vmcore_off = sizeof(Elf32_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr);
-
- list_for_each_entry(m, vc_list, list) {
- m->offset = vmcore_off;
- vmcore_off += m->size;
- }
+ free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig));
+ elfcorebuf = NULL;
+ vfree(elfnotes_buf);
+ elfnotes_buf = NULL;
}
static int __init parse_crash_elf64_headers(void)
@@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void)
}
/* Read in all elf headers. */
- elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr);
- elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+ elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) +
+ ehdr.e_phnum * sizeof(Elf64_Phdr);
+ elfcorebuf_sz = elfcorebuf_sz_orig;
+ elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(elfcorebuf_sz_orig));
if (!elfcorebuf)
return -ENOMEM;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
- if (rc < 0) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+ if (rc < 0)
+ goto fail;
/* Merge all PT_NOTE headers into one. */
- rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz,
+ &elfnotes_buf, &elfnotes_sz);
+ if (rc)
+ goto fail;
rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
- &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
- set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list);
+ elfnotes_sz, &vmcore_list);
+ if (rc)
+ goto fail;
+ set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
return 0;
+fail:
+ free_elfcorebuf();
+ return rc;
}
static int __init parse_crash_elf32_headers(void)
@@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void)
}
/* Read in all elf headers. */
- elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
- elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+ elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
+ elfcorebuf_sz = elfcorebuf_sz_orig;
+ elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(elfcorebuf_sz_orig));
if (!elfcorebuf)
return -ENOMEM;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
- if (rc < 0) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+ if (rc < 0)
+ goto fail;
/* Merge all PT_NOTE headers into one. */
- rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz,
+ &elfnotes_buf, &elfnotes_sz);
+ if (rc)
+ goto fail;
rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
- &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
- set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list);
+ elfnotes_sz, &vmcore_list);
+ if (rc)
+ goto fail;
+ set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
return 0;
+fail:
+ free_elfcorebuf();
+ return rc;
}
static int __init parse_crash_elf_headers(void)
@@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void)
rc = parse_crash_elf64_headers();
if (rc)
return rc;
-
- /* Determine vmcore size. */
- vmcore_size = get_vmcore_size_elf64(elfcorebuf);
} else if (e_ident[EI_CLASS] == ELFCLASS32) {
rc = parse_crash_elf32_headers();
if (rc)
return rc;
-
- /* Determine vmcore size. */
- vmcore_size = get_vmcore_size_elf32(elfcorebuf);
} else {
pr_warn("Warning: Core image elf header is not sane\n");
return -EINVAL;
}
+
+ /* Determine vmcore size. */
+ vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
+ &vmcore_list);
+
return 0;
}
@@ -711,7 +986,6 @@ void vmcore_cleanup(void)
list_del(&m->list);
kfree(m);
}
- kfree(elfcorebuf);
- elfcorebuf = NULL;
+ free_elfcorebuf();
}
EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index e4bcb2cf055a..bfd95bf38005 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -178,6 +178,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
if (p->psi->erase)
p->psi->erase(p->type, p->id, p->count,
dentry->d_inode->i_ctime, p->psi);
+ else
+ return -EPERM;
return simple_unlink(dir, dentry);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 86d1038b5a12..b7ffe2bcd9c4 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -239,17 +239,15 @@ int pstore_register(struct pstore_info *psi)
{
struct module *owner = psi->owner;
+ if (backend && strcmp(backend, psi->name))
+ return -EPERM;
+
spin_lock(&pstore_lock);
if (psinfo) {
spin_unlock(&pstore_lock);
return -EBUSY;
}
- if (backend && strcmp(backend, psi->name)) {
- spin_unlock(&pstore_lock);
- return -EINVAL;
- }
-
if (!psi->write)
psi->write = pstore_write_compat;
psinfo = psi;
@@ -274,6 +272,9 @@ int pstore_register(struct pstore_info *psi)
add_timer(&pstore_timer);
}
+ pr_info("pstore: Registered %s as persistent store backend\n",
+ psi->name);
+
return 0;
}
EXPORT_SYMBOL_GPL(pstore_register);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1376e5a8f0d6..43abee2c6cb9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -399,8 +399,6 @@ static int ramoops_probe(struct platform_device *pdev)
goto fail_out;
}
- if (!is_power_of_2(pdata->mem_size))
- pdata->mem_size = rounddown_pow_of_two(pdata->mem_size);
if (!is_power_of_2(pdata->record_size))
pdata->record_size = rounddown_pow_of_two(pdata->record_size);
if (!is_power_of_2(pdata->console_size))
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 59337326e288..de272d426763 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -46,7 +46,7 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
}
/* increase and wrap the start pointer, returning the old value */
-static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
+static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a)
{
int old;
int new;
@@ -62,7 +62,7 @@ static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
}
/* increase the size counter until it hits the max size */
-static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
+static void buffer_size_add_atomic(struct persistent_ram_zone *prz, size_t a)
{
size_t old;
size_t new;
@@ -78,6 +78,53 @@ static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
} while (atomic_cmpxchg(&prz->buffer->size, old, new) != old);
}
+static DEFINE_RAW_SPINLOCK(buffer_lock);
+
+/* increase and wrap the start pointer, returning the old value */
+static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a)
+{
+ int old;
+ int new;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&buffer_lock, flags);
+
+ old = atomic_read(&prz->buffer->start);
+ new = old + a;
+ while (unlikely(new > prz->buffer_size))
+ new -= prz->buffer_size;
+ atomic_set(&prz->buffer->start, new);
+
+ raw_spin_unlock_irqrestore(&buffer_lock, flags);
+
+ return old;
+}
+
+/* increase the size counter until it hits the max size */
+static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a)
+{
+ size_t old;
+ size_t new;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&buffer_lock, flags);
+
+ old = atomic_read(&prz->buffer->size);
+ if (old == prz->buffer_size)
+ goto exit;
+
+ new = old + a;
+ if (new > prz->buffer_size)
+ new = prz->buffer_size;
+ atomic_set(&prz->buffer->size, new);
+
+exit:
+ raw_spin_unlock_irqrestore(&buffer_lock, flags);
+}
+
+static size_t (*buffer_start_add)(struct persistent_ram_zone *, size_t) = buffer_start_add_atomic;
+static void (*buffer_size_add)(struct persistent_ram_zone *, size_t) = buffer_size_add_atomic;
+
static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
uint8_t *data, size_t len, uint8_t *ecc)
{
@@ -372,6 +419,9 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size)
return NULL;
}
+ buffer_start_add = buffer_start_add_locked;
+ buffer_size_add = buffer_size_add_locked;
+
return ioremap(start, size);
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 2cefa417be34..122a3846d9e1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -41,8 +41,19 @@ static inline int unsigned_offsets(struct file *file)
return file->f_mode & FMODE_UNSIGNED_OFFSET;
}
-static loff_t lseek_execute(struct file *file, struct inode *inode,
- loff_t offset, loff_t maxsize)
+/**
+ * vfs_setpos - update the file offset for lseek
+ * @file: file structure in question
+ * @offset: file offset to seek to
+ * @maxsize: maximum file size
+ *
+ * This is a low-level filesystem helper for updating the file offset to
+ * the value specified by @offset if the given offset is valid and it is
+ * not equal to the current file offset.
+ *
+ * Return the specified offset on success and -EINVAL on invalid offset.
+ */
+loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
{
if (offset < 0 && !unsigned_offsets(file))
return -EINVAL;
@@ -55,6 +66,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
}
return offset;
}
+EXPORT_SYMBOL(vfs_setpos);
/**
* generic_file_llseek_size - generic llseek implementation for regular files
@@ -76,8 +88,6 @@ loff_t
generic_file_llseek_size(struct file *file, loff_t offset, int whence,
loff_t maxsize, loff_t eof)
{
- struct inode *inode = file->f_mapping->host;
-
switch (whence) {
case SEEK_END:
offset += eof;
@@ -97,8 +107,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
* like SEEK_SET.
*/
spin_lock(&file->f_lock);
- offset = lseek_execute(file, inode, file->f_pos + offset,
- maxsize);
+ offset = vfs_setpos(file, file->f_pos + offset, maxsize);
spin_unlock(&file->f_lock);
return offset;
case SEEK_DATA:
@@ -120,7 +129,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
break;
}
- return lseek_execute(file, inode, offset, maxsize);
+ return vfs_setpos(file, offset, maxsize);
}
EXPORT_SYMBOL(generic_file_llseek_size);
@@ -145,6 +154,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
EXPORT_SYMBOL(generic_file_llseek);
/**
+ * fixed_size_llseek - llseek implementation for fixed-sized devices
+ * @file: file structure to seek on
+ * @offset: file offset to seek to
+ * @whence: type of seek
+ * @size: size of the file
+ *
+ */
+loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
+{
+ switch (whence) {
+ case SEEK_SET: case SEEK_CUR: case SEEK_END:
+ return generic_file_llseek_size(file, offset, whence,
+ size, size);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(fixed_size_llseek);
+
+/**
* noop_llseek - No Operation Performed llseek implementation
* @file: file structure to seek on
* @offset: file offset to seek to
@@ -296,7 +325,7 @@ out_putf:
* them to something that fits in "int" so that others
* won't have to do range checks all the time.
*/
-int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
+int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
struct inode *inode;
loff_t pos;
@@ -477,7 +506,8 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_read(f.file, buf, count, &pos);
- file_pos_write(f.file, pos);
+ if (ret >= 0)
+ file_pos_write(f.file, pos);
fdput(f);
}
return ret;
@@ -492,7 +522,8 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_write(f.file, buf, count, &pos);
- file_pos_write(f.file, pos);
+ if (ret >= 0)
+ file_pos_write(f.file, pos);
fdput(f);
}
@@ -780,7 +811,8 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_readv(f.file, vec, vlen, &pos);
- file_pos_write(f.file, pos);
+ if (ret >= 0)
+ file_pos_write(f.file, pos);
fdput(f);
}
@@ -799,7 +831,8 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
if (f.file) {
loff_t pos = file_pos_read(f.file);
ret = vfs_writev(f.file, vec, vlen, &pos);
- file_pos_write(f.file, pos);
+ if (ret >= 0)
+ file_pos_write(f.file, pos);
fdput(f);
}
@@ -959,7 +992,8 @@ COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
return -EBADF;
pos = f.file->f_pos;
ret = compat_readv(f.file, vec, vlen, &pos);
- f.file->f_pos = pos;
+ if (ret >= 0)
+ f.file->f_pos = pos;
fdput(f);
return ret;
}
@@ -1025,7 +1059,8 @@ COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
return -EBADF;
pos = f.file->f_pos;
ret = compat_writev(f.file, vec, vlen, &pos);
- f.file->f_pos = pos;
+ if (ret >= 0)
+ f.file->f_pos = pos;
fdput(f);
return ret;
}
@@ -1129,7 +1164,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
if (in.file->f_flags & O_NONBLOCK)
fl = SPLICE_F_NONBLOCK;
#endif
+ file_start_write(out.file);
retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
+ file_end_write(out.file);
if (retval > 0) {
add_rchar(current, retval);
diff --git a/fs/select.c b/fs/select.c
index 8c1c96c27062..6b14dc7df3a4 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -27,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
#include <linux/sched/rt.h>
+#include <linux/freezer.h>
#include <asm/uaccess.h>
@@ -236,7 +237,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
set_current_state(state);
if (!pwq->triggered)
- rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+ rc = freezable_schedule_hrtimeout_range(expires, slack,
+ HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
/*
diff --git a/fs/splice.c b/fs/splice.c
index d37431dd60a1..3b7ee656f3aa 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1098,27 +1098,13 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
{
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int);
- int ret;
-
- if (unlikely(!(out->f_mode & FMODE_WRITE)))
- return -EBADF;
-
- if (unlikely(out->f_flags & O_APPEND))
- return -EINVAL;
-
- ret = rw_verify_area(WRITE, out, ppos, len);
- if (unlikely(ret < 0))
- return ret;
if (out->f_op && out->f_op->splice_write)
splice_write = out->f_op->splice_write;
else
splice_write = default_file_splice_write;
- file_start_write(out);
- ret = splice_write(pipe, out, ppos, len, flags);
- file_end_write(out);
- return ret;
+ return splice_write(pipe, out, ppos, len, flags);
}
/*
@@ -1307,6 +1293,16 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
};
long ret;
+ if (unlikely(!(out->f_mode & FMODE_WRITE)))
+ return -EBADF;
+
+ if (unlikely(out->f_flags & O_APPEND))
+ return -EINVAL;
+
+ ret = rw_verify_area(WRITE, out, opos, len);
+ if (unlikely(ret < 0))
+ return ret;
+
ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
if (ret > 0)
*ppos = sd.pos;
@@ -1362,7 +1358,19 @@ static long do_splice(struct file *in, loff_t __user *off_in,
offset = out->f_pos;
}
+ if (unlikely(!(out->f_mode & FMODE_WRITE)))
+ return -EBADF;
+
+ if (unlikely(out->f_flags & O_APPEND))
+ return -EINVAL;
+
+ ret = rw_verify_area(WRITE, out, &offset, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ file_start_write(out);
ret = do_splice_from(ipipe, out, &offset, len, flags);
+ file_end_write(out);
if (!off_out)
out->f_pos = offset;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 1c0d5f264767..731b2bbcaab3 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,8 +27,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
return err;
}
-static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
- struct qstr *qstr)
+static int sysv_hash(const struct dentry *dentry, struct qstr *qstr)
{
/* Truncate the name in place, avoids having to define a compare
function. */
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 102c072c6bbf..5f6fc17d6bc5 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -594,6 +594,29 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
return 0;
}
+static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+ struct udf_inode_info *iinfo;
+ int err;
+
+ inode = udf_new_inode(dir, mode, &err);
+ if (!inode)
+ return err;
+
+ iinfo = UDF_I(inode);
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ inode->i_data.a_ops = &udf_adinicb_aops;
+ else
+ inode->i_data.a_ops = &udf_aops;
+ inode->i_op = &udf_file_inode_operations;
+ inode->i_fop = &udf_file_operations;
+ mark_inode_dirty(inode);
+
+ d_tmpfile(dentry, inode);
+ return 0;
+}
+
static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t rdev)
{
@@ -1311,6 +1334,7 @@ const struct inode_operations udf_dir_inode_operations = {
.rmdir = udf_rmdir,
.mknod = udf_mknod,
.rename = udf_rename,
+ .tmpfile = udf_tmpfile,
};
const struct inode_operations udf_symlink_inode_operations = {
.readlink = generic_readlink,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 0ad2b95fca12..de3dc98f4e8f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1268,8 +1268,7 @@ xfs_seek_data(
}
out:
- if (offset != file->f_pos)
- file->f_pos = offset;
+ offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
xfs_iunlock_map_shared(ip, lock);
@@ -1377,8 +1376,7 @@ out:
* situation in particular.
*/
offset = min_t(loff_t, offset, isize);
- if (offset != file->f_pos)
- file->f_pos = offset;
+ offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
xfs_iunlock_map_shared(ip, lock);