summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt5
-rw-r--r--fs/afs/cell.c6
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/afs/server.c6
-rw-r--r--fs/aio.c39
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c49
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/extent-tree.c14
-rw-r--r--fs/btrfs/inode-item.c44
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/raid56.c1
-rw-r--r--fs/btrfs/relocation.c18
-rw-r--r--fs/btrfs/send.c3
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/tree-log.c114
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/ceph/caps.c26
-rw-r--r--fs/ceph/dir.c28
-rw-r--r--fs/ceph/file.c9
-rw-r--r--fs/ceph/super.c27
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/dcookies.c11
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/dlm/lowcomms.c7
-rw-r--r--fs/efivarfs/file.c6
-rw-r--r--fs/eventfd.c9
-rw-r--r--fs/eventpoll.c23
-rw-r--r--fs/fcntl.c12
-rw-r--r--fs/file.c17
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/gfs2/bmap.c3
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hostfs/hostfs_user.c2
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/internal.h14
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/minix/Kconfig2
-rw-r--r--fs/namei.c66
-rw-r--r--fs/namespace.c19
-rw-r--r--fs/nfs/callback_proc.c14
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4client.c6
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/pnfs.c13
-rw-r--r--fs/nfs/pnfs_nfs.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/write.c89
-rw-r--r--fs/nfsd/nfs4state.c62
-rw-r--r--fs/notify/fanotify/fanotify_user.c14
-rw-r--r--fs/notify/inotify/inotify_user.c9
-rw-r--r--fs/nsfs.c1
-rw-r--r--fs/ocfs2/cluster/tcp.c6
-rw-r--r--fs/ocfs2/filecheck.c9
-rw-r--r--fs/open.c77
-rw-r--r--fs/overlayfs/Kconfig14
-rw-r--r--fs/overlayfs/export.c216
-rw-r--r--fs/overlayfs/inode.c58
-rw-r--r--fs/overlayfs/namei.c6
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/super.c1
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/quota/compat.c13
-rw-r--r--fs/quota/quota.c10
-rw-r--r--fs/read_write.c45
-rw-r--r--fs/readdir.c11
-rw-r--r--fs/select.c29
-rw-r--r--fs/signalfd.c46
-rw-r--r--fs/splice.c12
-rw-r--r--fs/stat.c12
-rw-r--r--fs/sync.c19
-rw-r--r--fs/sysfs/symlink.c1
-rw-r--r--fs/utimes.c25
-rw-r--r--fs/xfs/scrub/agheader.c3
-rw-r--r--fs/xfs/xfs_iomap.c42
-rw-r--r--fs/xfs/xfs_refcount_item.c9
-rw-r--r--fs/xfs/xfs_rmap_item.c4
-rw-r--r--fs/xfs/xfs_super.c2
86 files changed, 1131 insertions, 462 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 58c2bbd385ad..57a27c42b5ac 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,6 @@
config BINFMT_ELF
bool "Kernel support for ELF binaries"
- depends on MMU && (BROKEN || !FRV)
+ depends on MMU
select ELFCORE
default y
---help---
@@ -35,7 +35,7 @@ config ARCH_BINFMT_ELF_STATE
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y if !BINFMT_ELF
- depends on (ARM || FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
+ depends on (ARM || (SUPERH32 && !MMU) || C6X)
select ELFCORE
help
ELF FDPIC binaries are based on ELF, but allow the individual load
@@ -90,7 +90,6 @@ config BINFMT_SCRIPT
config BINFMT_FLAT
bool "Kernel support for flat binaries"
depends on !MMU || ARM || M68K
- depends on !FRV || BROKEN
help
Support uClinux FLAT format binaries.
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 9bb921d120d0..3d2c5e0e854e 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -25,7 +25,7 @@ static void afs_manage_cell(struct work_struct *);
static void afs_dec_cells_outstanding(struct afs_net *net)
{
if (atomic_dec_and_test(&net->cells_outstanding))
- wake_up_atomic_t(&net->cells_outstanding);
+ wake_up_var(&net->cells_outstanding);
}
/*
@@ -764,7 +764,7 @@ void afs_cell_purge(struct afs_net *net)
afs_queue_cell_manager(net);
_debug("wait");
- wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&net->cells_outstanding,
+ !atomic_read(&net->cells_outstanding));
_leave("");
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f38d6a561a84..72217170b155 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -118,6 +118,7 @@ struct afs_call {
bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
u16 service_id; /* Actual service ID (after upgrade) */
+ unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e1126659f043..f7ae54b6a393 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -103,8 +103,8 @@ void afs_close_socket(struct afs_net *net)
}
_debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
- wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&net->nr_outstanding_calls,
+ !atomic_read(&net->nr_outstanding_calls));
_debug("no outstanding calls");
kernel_sock_shutdown(net->socket, SHUT_RDWR);
@@ -131,6 +131,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->type = type;
call->net = net;
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
atomic_set(&call->usage, 1);
INIT_WORK(&call->async_work, afs_process_async_call);
init_waitqueue_head(&call->waitq);
@@ -169,13 +170,14 @@ void afs_put_call(struct afs_call *call)
afs_put_server(call->net, call->cm_server);
afs_put_cb_interest(call->net, call->cbi);
kfree(call->request);
- kfree(call);
- o = atomic_dec_return(&net->nr_outstanding_calls);
trace_afs_call(call, afs_call_trace_free, 0, o,
__builtin_return_address(0));
+ kfree(call);
+
+ o = atomic_dec_return(&net->nr_outstanding_calls);
if (o == 0)
- wake_up_atomic_t(&net->nr_outstanding_calls);
+ wake_up_var(&net->nr_outstanding_calls);
}
}
@@ -378,7 +380,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
(async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter),
- call->upgrade);
+ call->upgrade,
+ call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
goto error_kill_call;
@@ -727,7 +730,8 @@ void afs_charge_preallocation(struct work_struct *work)
afs_wake_up_async_call,
afs_rx_attach,
(unsigned long)call,
- GFP_KERNEL) < 0)
+ GFP_KERNEL,
+ call->debug_id) < 0)
break;
call = NULL;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1880f1b6a9f1..a43ef77dabae 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -25,7 +25,7 @@ static void afs_inc_servers_outstanding(struct afs_net *net)
static void afs_dec_servers_outstanding(struct afs_net *net)
{
if (atomic_dec_and_test(&net->servers_outstanding))
- wake_up_atomic_t(&net->servers_outstanding);
+ wake_up_var(&net->servers_outstanding);
}
/*
@@ -521,8 +521,8 @@ void afs_purge_servers(struct afs_net *net)
afs_queue_server_manager(net);
_debug("wait");
- wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&net->servers_outstanding,
+ !atomic_read(&net->servers_outstanding));
_leave("");
}
diff --git a/fs/aio.c b/fs/aio.c
index a062d75109cb..88d7927ffbc6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
#define AIO_RING_PAGES 8
struct kioctx_table {
- struct rcu_head rcu;
- unsigned nr;
- struct kioctx *table[];
+ struct rcu_head rcu;
+ unsigned nr;
+ struct kioctx __rcu *table[];
};
struct kioctx_cpu {
@@ -115,7 +115,7 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct work_struct free_work;
+ struct rcu_work free_rwork; /* see free_ioctx() */
/*
* signals when all in-flight requests are done
@@ -329,7 +329,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
- ctx = table->table[i];
+ ctx = rcu_dereference(table->table[i]);
if (ctx && ctx->aio_ring_file == file) {
if (!atomic_read(&ctx->dead)) {
ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -588,10 +588,15 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
return cancel(&kiocb->common);
}
+/*
+ * free_ioctx() should be RCU delayed to synchronize against the RCU
+ * protected lookup_ioctx() and also needs process context to call
+ * aio_free_ring(). Use rcu_work.
+ */
static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-
+ struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
+ free_rwork);
pr_debug("freeing %p\n", ctx);
aio_free_ring(ctx);
@@ -609,8 +614,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
complete(&ctx->rq_wait->comp);
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ /* Synchronize against RCU protected table->table[] dereferences */
+ INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
+ queue_rcu_work(system_wq, &ctx->free_rwork);
}
/*
@@ -651,9 +657,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
while (1) {
if (table)
for (i = 0; i < table->nr; i++)
- if (!table->table[i]) {
+ if (!rcu_access_pointer(table->table[i])) {
ctx->id = i;
- table->table[i] = ctx;
+ rcu_assign_pointer(table->table[i], ctx);
spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress,
@@ -834,11 +840,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
}
table = rcu_dereference_raw(mm->ioctx_table);
- WARN_ON(ctx != table->table[ctx->id]);
- table->table[ctx->id] = NULL;
+ WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
+ RCU_INIT_POINTER(table->table[ctx->id], NULL);
spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */
+ /* free_ioctx_reqs() will do the necessary RCU synchronization */
wake_up_all(&ctx->wait);
/*
@@ -880,7 +886,8 @@ void exit_aio(struct mm_struct *mm)
skipped = 0;
for (i = 0; i < table->nr; ++i) {
- struct kioctx *ctx = table->table[i];
+ struct kioctx *ctx =
+ rcu_dereference_protected(table->table[i], true);
if (!ctx) {
skipped++;
@@ -1069,7 +1076,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
- ctx = table->table[id];
+ ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
percpu_ref_get(&ctx->users);
ret = ctx;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index b7c816f39404..26f6b4f41ce6 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -310,7 +310,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp,
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
- return sys_close(param->ioctlfd);
+ return ksys_close(param->ioctlfd);
}
/*
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7c5a9861bef..a41b48f82a70 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -241,7 +241,7 @@ ret:
return retval;
error:
if (fd_binary > 0)
- sys_close(fd_binary);
+ ksys_close(fd_binary);
bprm->interp_flags = 0;
bprm->interp_data = 0;
goto ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4a181fcb5175..fe09ef9c21f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1058,6 +1058,27 @@ retry:
return 0;
}
+static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
+{
+ struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
+
+ if (!disk)
+ return NULL;
+ /*
+ * Now that we hold gendisk reference we make sure bdev we looked up is
+ * not stale. If it is, it means device got removed and created before
+ * we looked up gendisk and we fail open in such case. Associating
+ * unhashed bdev with newly created gendisk could lead to two bdevs
+ * (and thus two independent caches) being associated with one device
+ * which is bad.
+ */
+ if (inode_unhashed(bdev->bd_inode)) {
+ put_disk_and_module(disk);
+ return NULL;
+ }
+ return disk;
+}
+
/**
* bd_start_claiming - start claiming a block device
* @bdev: block device of interest
@@ -1094,7 +1115,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
* @bdev might not have been initialized properly yet, look up
* and grab the outer block device the hard way.
*/
- disk = get_gendisk(bdev->bd_dev, &partno);
+ disk = bdev_get_gendisk(bdev, &partno);
if (!disk)
return ERR_PTR(-ENXIO);
@@ -1111,8 +1132,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
else
whole = bdgrab(bdev);
- module_put(disk->fops->owner);
- put_disk(disk);
+ put_disk_and_module(disk);
if (!whole)
return ERR_PTR(-ENOMEM);
@@ -1407,10 +1427,10 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
{
struct gendisk *disk;
- struct module *owner;
int ret;
int partno;
int perm = 0;
+ bool first_open = false;
if (mode & FMODE_READ)
perm |= MAY_READ;
@@ -1430,14 +1450,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
restart:
ret = -ENXIO;
- disk = get_gendisk(bdev->bd_dev, &partno);
+ disk = bdev_get_gendisk(bdev, &partno);
if (!disk)
goto out;
- owner = disk->fops->owner;
disk_block_events(disk);
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
+ first_open = true;
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
@@ -1463,8 +1483,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_queue = NULL;
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
- put_disk(disk);
- module_put(owner);
+ put_disk_and_module(disk);
goto restart;
}
}
@@ -1524,15 +1543,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (ret)
goto out_unlock_bdev;
}
- /* only one opener holds refs to the module and disk */
- put_disk(disk);
- module_put(owner);
}
bdev->bd_openers++;
if (for_part)
bdev->bd_part_count++;
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
+ /* only one opener holds refs to the module and disk */
+ if (!first_open)
+ put_disk_and_module(disk);
return 0;
out_clear:
@@ -1546,8 +1565,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
- put_disk(disk);
- module_put(owner);
+ put_disk_and_module(disk);
out:
bdput(bdev);
@@ -1770,8 +1788,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
disk->fops->release(disk, mode);
}
if (!bdev->bd_openers) {
- struct module *owner = disk->fops->owner;
-
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
@@ -1779,8 +1795,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
- put_disk(disk);
- module_put(owner);
+ put_disk_and_module(disk);
}
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f94b2d8c744a..26484648d090 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1519,6 +1519,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
if (!node)
break;
bytenr = node->val;
+ shared.share_count = 0;
cond_resched();
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1a462ab85c49..da308774b8a4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2974,7 +2974,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
security_free_mnt_opts(&fs_info->security_opts);
- kfree(fs_info);
+ kvfree(fs_info);
}
/* tree mod log functions from ctree.c */
@@ -3095,7 +3095,10 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
u64 inode_objectid, u64 ref_objectid, int ins_len,
int cow);
-int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
+int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
+ const char *name,
+ int name_len, struct btrfs_inode_ref **ref_ret);
+int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
u64 ref_objectid, const char *name,
int name_len,
struct btrfs_inode_extref **extref_ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c1618ab9fecf..e0460d7b5622 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3990,7 +3990,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
bg = btrfs_lookup_block_group(fs_info, bytenr);
ASSERT(bg);
if (atomic_dec_and_test(&bg->nocow_writers))
- wake_up_atomic_t(&bg->nocow_writers);
+ wake_up_var(&bg->nocow_writers);
/*
* Once for our lookup and once for the lookup done by a previous call
* to btrfs_inc_nocow_writers()
@@ -4001,8 +4001,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
{
- wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
}
static const char *alloc_name(u64 flags)
@@ -6526,7 +6525,7 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
bg = btrfs_lookup_block_group(fs_info, start);
ASSERT(bg);
if (atomic_dec_and_test(&bg->reservations))
- wake_up_atomic_t(&bg->reservations);
+ wake_up_var(&bg->reservations);
btrfs_put_block_group(bg);
}
@@ -6552,8 +6551,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
down_write(&space_info->groups_sem);
up_write(&space_info->groups_sem);
- wait_on_atomic_t(&bg->reservations, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
}
/**
@@ -11061,7 +11059,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
ret = btrfs_start_write_no_snapshotting(root);
if (ret)
break;
- wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ wait_var_event(&root->will_be_snapshotted,
+ !atomic_read(&root->will_be_snapshotted));
}
}
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 39c968f80157..65e1a76bf755 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -22,10 +22,10 @@
#include "transaction.h"
#include "print-tree.h"
-static int find_name_in_backref(struct btrfs_path *path, const char *name,
- int name_len, struct btrfs_inode_ref **ref_ret)
+int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
+ const char *name,
+ int name_len, struct btrfs_inode_ref **ref_ret)
{
- struct extent_buffer *leaf;
struct btrfs_inode_ref *ref;
unsigned long ptr;
unsigned long name_ptr;
@@ -33,9 +33,8 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
u32 cur_offset = 0;
int len;
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
while (cur_offset < item_size) {
ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
len = btrfs_inode_ref_name_len(leaf, ref);
@@ -44,18 +43,19 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
if (len != name_len)
continue;
if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
- *ref_ret = ref;
+ if (ref_ret)
+ *ref_ret = ref;
return 1;
}
}
return 0;
}
-int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
+int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
+ u64 ref_objectid,
const char *name, int name_len,
struct btrfs_inode_extref **extref_ret)
{
- struct extent_buffer *leaf;
struct btrfs_inode_extref *extref;
unsigned long ptr;
unsigned long name_ptr;
@@ -63,9 +63,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
u32 cur_offset = 0;
int ref_name_len;
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
/*
* Search all extended backrefs in this item. We're only
@@ -113,7 +112,9 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
if (ret > 0)
return NULL;
- if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref))
+ if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+ ref_objectid, name, name_len,
+ &extref))
return NULL;
return extref;
}
@@ -155,7 +156,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
* This should always succeed so error here will make the FS
* readonly.
*/
- if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
+ if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+ ref_objectid,
name, name_len, &extref)) {
btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
ret = -EROFS;
@@ -225,7 +227,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
} else if (ret < 0) {
goto out;
}
- if (!find_name_in_backref(path, name, name_len, &ref)) {
+ if (!btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, name_len, &ref)) {
ret = -ENOENT;
search_ext_refs = 1;
goto out;
@@ -293,7 +296,9 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);
if (ret == -EEXIST) {
- if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+ if (btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0],
+ ref_objectid,
name, name_len, NULL))
goto out;
@@ -351,7 +356,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
if (ret == -EEXIST) {
u32 old_size;
- if (find_name_in_backref(path, name, name_len, &ref))
+ if (btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, name_len, &ref))
goto out;
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
@@ -365,7 +371,9 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
ret = 0;
} else if (ret < 0) {
if (ret == -EOVERFLOW) {
- if (find_name_in_backref(path, name, name_len, &ref))
+ if (btrfs_find_name_in_backref(path->nodes[0],
+ path->slots[0],
+ name, name_len, &ref))
ret = -EEXIST;
else
ret = -EMLINK;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a79299a89b7d..f53470112670 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2043,12 +2043,15 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct inode *inode, struct list_head *list)
{
struct btrfs_ordered_sum *sum;
+ int ret;
list_for_each_entry(sum, list, list) {
trans->adding_csums = true;
- btrfs_csum_file_blocks(trans,
+ ret = btrfs_csum_file_blocks(trans,
BTRFS_I(inode)->root->fs_info->csum_root, sum);
trans->adding_csums = false;
+ if (ret)
+ return ret;
}
return 0;
}
@@ -3062,7 +3065,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- add_pending_csums(trans, inode, &ordered_extent->list);
+ ret = add_pending_csums(trans, inode, &ordered_extent->list);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode_fallback(trans, root, inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 111ee282b777..3278ae592a2c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -723,7 +723,7 @@ fail:
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
if (atomic_dec_and_test(&root->will_be_snapshotted))
- wake_up_atomic_t(&root->will_be_snapshotted);
+ wake_up_var(&root->will_be_snapshotted);
free_pending:
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index dec0907dfb8a..fcfc20de2df3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1370,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
+ stripe->dev->bdev &&
bio->bi_disk == stripe->dev->bdev->bd_disk &&
bio->bi_partno == stripe->dev->bdev->bd_partno) {
return i;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f0c3f00e97cb..cd2298d185dd 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3268,8 +3268,22 @@ static int relocate_file_extent_cluster(struct inode *inode,
nr++;
}
- btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL,
- 0);
+ ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
+ NULL, 0);
+ if (ret) {
+ unlock_page(page);
+ put_page(page);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ PAGE_SIZE);
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ PAGE_SIZE);
+
+ clear_extent_bits(&BTRFS_I(inode)->io_tree,
+ page_start, page_end,
+ EXTENT_LOCKED | EXTENT_BOUNDARY);
+ goto out;
+
+ }
set_page_dirty(page);
unlock_extent(&BTRFS_I(inode)->io_tree,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f306c608dc28..484e2af793de 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5005,6 +5005,9 @@ static int send_hole(struct send_ctx *sctx, u64 end)
u64 len;
int ret = 0;
+ if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+ return send_update_extent(sctx, offset, end - offset);
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6e71a2a78363..4b817947e00f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1545,7 +1545,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
* it for searching for existing supers, so this lets us do that and
* then open_ctree will properly initialize everything later.
*/
- fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
+ fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
if (!fs_info) {
error = -ENOMEM;
goto error_sec_opts;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4fd19b4d6675..434457794c27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -967,7 +967,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
if (key->type == BTRFS_INODE_EXTREF_KEY) {
- if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+ if (btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0],
+ ref_objectid,
name, namelen, NULL))
match = 1;
@@ -1191,7 +1193,8 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
read_extent_buffer(eb, *name, (unsigned long)&extref->name,
*namelen);
- *index = btrfs_inode_extref_index(eb, extref);
+ if (index)
+ *index = btrfs_inode_extref_index(eb, extref);
if (parent_objectid)
*parent_objectid = btrfs_inode_extref_parent(eb, extref);
@@ -1212,12 +1215,102 @@ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
- *index = btrfs_inode_ref_index(eb, ref);
+ if (index)
+ *index = btrfs_inode_ref_index(eb, ref);
return 0;
}
/*
+ * Take an inode reference item from the log tree and iterate all names from the
+ * inode reference item in the subvolume tree with the same key (if it exists).
+ * For any name that is not in the inode reference item from the log tree, do a
+ * proper unlink of that name (that is, remove its entry from the inode
+ * reference item and both dir index keys).
+ */
+static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_inode *inode,
+ struct extent_buffer *log_eb,
+ int log_slot,
+ struct btrfs_key *key)
+{
+ int ret;
+ unsigned long ref_ptr;
+ unsigned long ref_end;
+ struct extent_buffer *eb;
+
+again:
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+
+ eb = path->nodes[0];
+ ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+ ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]);
+ while (ref_ptr < ref_end) {
+ char *name = NULL;
+ int namelen;
+ u64 parent_id;
+
+ if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+ NULL, &parent_id);
+ } else {
+ parent_id = key->offset;
+ ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+ NULL);
+ }
+ if (ret)
+ goto out;
+
+ if (key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = btrfs_find_name_in_ext_backref(log_eb, log_slot,
+ parent_id, name,
+ namelen, NULL);
+ else
+ ret = btrfs_find_name_in_backref(log_eb, log_slot, name,
+ namelen, NULL);
+
+ if (!ret) {
+ struct inode *dir;
+
+ btrfs_release_path(path);
+ dir = read_one_inode(root, parent_id);
+ if (!dir) {
+ ret = -ENOENT;
+ kfree(name);
+ goto out;
+ }
+ ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+ inode, name, namelen);
+ kfree(name);
+ iput(dir);
+ if (ret)
+ goto out;
+ goto again;
+ }
+
+ kfree(name);
+ ref_ptr += namelen;
+ if (key->type == BTRFS_INODE_EXTREF_KEY)
+ ref_ptr += sizeof(struct btrfs_inode_extref);
+ else
+ ref_ptr += sizeof(struct btrfs_inode_ref);
+ }
+ ret = 0;
+ out:
+ btrfs_release_path(path);
+ return ret;
+}
+
+/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
* root is the destination we are replaying into, and path is for temp
@@ -1345,6 +1438,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * Before we overwrite the inode reference item in the subvolume tree
+ * with the item from the log tree, we must unlink all names from the
+ * parent directory that are in the subvolume's tree inode reference
+ * item, otherwise we end up with an inconsistent subvolume tree where
+ * dir index entries exist for a name but there is no inode reference
+ * item with the same name.
+ */
+ ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot,
+ key);
+ if (ret)
+ goto out;
+
/* finally write the back reference in the inode */
ret = overwrite_item(trans, root, path, eb, slot, key);
out:
@@ -5853,7 +5959,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
* this will force the logging code to walk the dentry chain
* up for the file
*/
- if (S_ISREG(inode->vfs_inode.i_mode))
+ if (!S_ISDIR(inode->vfs_inode.i_mode))
inode->last_unlink_trans = trans->transid;
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2ceb924ca0d6..b2d05c6b1c56 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4829,10 +4829,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ndevs = min(ndevs, devs_max);
/*
- * the primary goal is to maximize the number of stripes, so use as many
- * devices as possible, even if the stripes are not maximum sized.
+ * The primary goal is to maximize the number of stripes, so use as
+ * many devices as possible, even if the stripes are not maximum sized.
+ *
+ * The DUP profile stores more than one stripe per device, the
+ * max_avail is the total size so we have to adjust.
*/
- stripe_size = devices_info[ndevs-1].max_avail;
+ stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
num_stripes = ndevs * dev_stripes;
/*
@@ -4867,8 +4870,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
stripe_size = devices_info[ndevs-1].max_avail;
}
- stripe_size = div_u64(stripe_size, dev_stripes);
-
/* align to BTRFS_STRIPE_LEN */
stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6582c4507e6c..0e5bd3e3344e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3965,6 +3965,32 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
}
/*
+ * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * looks like the link count will hit 0, drop any other caps (other
+ * than PIN) we don't specifically want (due to the file still being
+ * open).
+ */
+int ceph_drop_caps_for_unlink(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (inode->i_nlink == 1) {
+ drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
+
+ ci->i_ceph_flags |= CEPH_I_NODELAY;
+ if (__ceph_caps_dirty(ci)) {
+ struct ceph_mds_client *mdsc =
+ ceph_inode_to_client(inode)->mdsc;
+ __cap_delay_requeue_front(mdsc, ci);
+ }
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ return drop;
+}
+
+/*
* Helpers for embedding cap and dentry lease releases into mds
* requests.
*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0c4346806e17..f1d9c6cc0491 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1003,26 +1003,6 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
}
/*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
- * looks like the link count will hit 0, drop any other caps (other
- * than PIN) we don't specifically want (due to the file still being
- * open).
- */
-static int drop_caps_for_unlink(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-
- spin_lock(&ci->i_ceph_lock);
- if (inode->i_nlink == 1) {
- drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
- ci->i_ceph_flags |= CEPH_I_NODELAY;
- }
- spin_unlock(&ci->i_ceph_lock);
- return drop;
-}
-
-/*
* rmdir and unlink are differ only by the metadata op code
*/
static int ceph_unlink(struct inode *dir, struct dentry *dentry)
@@ -1056,7 +1036,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
- req->r_inode_drop = drop_caps_for_unlink(inode);
+ req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err && !req->r_reply_info.head->is_dentry)
d_delete(dentry);
@@ -1104,8 +1084,10 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_RDCACHE on source inode (mds will lock it) */
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
- if (d_really_is_positive(new_dentry))
- req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
+ if (d_really_is_positive(new_dentry)) {
+ req->r_inode_drop =
+ ceph_drop_caps_for_unlink(d_inode(new_dentry));
+ }
err = ceph_mdsc_do_request(mdsc, old_dir, req);
if (!err && !req->r_reply_info.head->is_dentry) {
/*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6639926eed4e..b67eec3532a1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -640,7 +640,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
struct ceph_aio_request {
struct kiocb *iocb;
size_t total_len;
- int write;
+ bool write;
+ bool should_dirty;
int error;
struct list_head osd_reqs;
unsigned num_reqs;
@@ -750,7 +751,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
}
}
- ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write);
+ ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty);
ceph_osdc_put_request(req);
if (rc < 0)
@@ -847,6 +848,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
+ bool should_dirty = !write && iter_is_iovec(iter);
if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
@@ -914,6 +916,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (aio_req) {
aio_req->iocb = iocb;
aio_req->write = write;
+ aio_req->should_dirty = should_dirty;
INIT_LIST_HEAD(&aio_req->osd_reqs);
if (write) {
aio_req->mtime = mtime;
@@ -971,7 +974,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
len = ret;
}
- ceph_put_page_vector(pages, num_pages, !write);
+ ceph_put_page_vector(pages, num_pages, should_dirty);
ceph_osdc_put_request(req);
if (ret < 0)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a62d2a9841dc..fb2bc9c15a23 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -225,6 +225,7 @@ static int parse_fsopt_token(char *c, void *private)
return -ENOMEM;
break;
case Opt_mds_namespace:
+ kfree(fsopt->mds_namespace);
fsopt->mds_namespace = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
@@ -232,6 +233,7 @@ static int parse_fsopt_token(char *c, void *private)
return -ENOMEM;
break;
case Opt_fscache_uniq:
+ kfree(fsopt->fscache_uniq);
fsopt->fscache_uniq = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
GFP_KERNEL);
@@ -711,14 +713,17 @@ static int __init init_caches(void)
goto bad_dentry;
ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
-
if (!ceph_file_cachep)
goto bad_file;
- if ((error = ceph_fscache_register()))
- goto bad_file;
+ error = ceph_fscache_register();
+ if (error)
+ goto bad_fscache;
return 0;
+
+bad_fscache:
+ kmem_cache_destroy(ceph_file_cachep);
bad_file:
kmem_cache_destroy(ceph_dentry_cachep);
bad_dentry:
@@ -836,7 +841,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
int err;
unsigned long started = jiffies; /* note the start time */
struct dentry *root;
- int first = 0; /* first vfsmount for this super_block */
dout("mount start %p\n", fsc);
mutex_lock(&fsc->client->mount_mutex);
@@ -861,17 +865,17 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
path = fsc->mount_options->server_path + 1;
dout("mount opening path %s\n", path);
}
+
+ err = ceph_fs_debugfs_init(fsc);
+ if (err < 0)
+ goto out;
+
root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto out;
}
fsc->sb->s_root = dget(root);
- first = 1;
-
- err = ceph_fs_debugfs_init(fsc);
- if (err < 0)
- goto fail;
} else {
root = dget(fsc->sb->s_root);
}
@@ -881,11 +885,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
mutex_unlock(&fsc->client->mount_mutex);
return root;
-fail:
- if (first) {
- dput(fsc->sb->s_root);
- fsc->sb->s_root = NULL;
- }
out:
mutex_unlock(&fsc->client->mount_mutex);
return ERR_PTR(err);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 21b2e5b004eb..1c2086e0fec2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -987,7 +987,7 @@ extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
-
+extern int ceph_drop_caps_for_unlink(struct inode *inode);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
int mds, int drop, int unless, int force);
extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0d0461cf2431..57bc96435feb 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -146,7 +146,7 @@ out:
/* And here is where the userspace process can look up the cookie value
* to retrieve the path.
*/
-SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len)
+static int do_lookup_dcookie(u64 cookie64, char __user *buf, size_t len)
{
unsigned long cookie = (unsigned long)cookie64;
int err = -EINVAL;
@@ -203,13 +203,18 @@ out:
return err;
}
+SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len)
+{
+ return do_lookup_dcookie(cookie64, buf, len);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len)
{
#ifdef __BIG_ENDIAN
- return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
+ return do_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
#else
- return sys_lookup_dcookie(((u64)w1 << 32) | w0, buf, len);
+ return do_lookup_dcookie(((u64)w1 << 32) | w0, buf, len);
#endif
}
#endif
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a0ca9e48e993..1357ef563893 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1274,8 +1274,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
*/
if (dio->is_async && iov_iter_rw(iter) == WRITE) {
retval = 0;
- if ((iocb->ki_filp->f_flags & O_DSYNC) ||
- IS_SYNC(iocb->ki_filp->f_mapping->host))
+ if (iocb->ki_flags & IOCB_DSYNC)
retval = dio_set_defer_completion(dio);
else if (!dio->inode->i_sb->s_dio_done_wq) {
/*
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cff79ea0c01d..5243989a60cc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
struct sockaddr_storage saddr;
- int buflen;
void (*orig_report)(struct sock *) = NULL;
read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
orig_report = listen_sock.sk_error_report;
if (con->sock == NULL ||
- kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+ kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
/* Get the connected socket's peer */
memset(&peeraddr, 0, sizeof(peeraddr));
- if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
- &len, 2)) {
+ len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+ if (len < 0) {
result = -ECONNABORTED;
goto accept_err;
}
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 5f22e74bbade..8e568428c88b 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -8,6 +8,7 @@
*/
#include <linux/efi.h>
+#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/mount.h>
@@ -74,6 +75,11 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
ssize_t size = 0;
int err;
+ while (!__ratelimit(&file->f_cred->user->ratelimit)) {
+ if (!msleep_interruptible(50))
+ return -EINTR;
+ }
+
err = efivar_entry_size(var, &datasize);
/*
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 012f5bd46dfa..08d3bd602f73 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -380,7 +380,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
}
EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
-SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+static int do_eventfd(unsigned int count, int flags)
{
struct eventfd_ctx *ctx;
int fd;
@@ -409,8 +409,13 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
return fd;
}
+SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+{
+ return do_eventfd(count, flags);
+}
+
SYSCALL_DEFINE1(eventfd, unsigned int, count)
{
- return sys_eventfd2(count, 0);
+ return do_eventfd(count, 0);
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0f3494ed3ed0..602ca4285b2e 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1936,7 +1936,7 @@ static void clear_tfile_check_list(void)
/*
* Open an eventpoll file descriptor.
*/
-SYSCALL_DEFINE1(epoll_create1, int, flags)
+static int do_epoll_create(int flags)
{
int error, fd;
struct eventpoll *ep = NULL;
@@ -1979,12 +1979,17 @@ out_free_ep:
return error;
}
+SYSCALL_DEFINE1(epoll_create1, int, flags)
+{
+ return do_epoll_create(flags);
+}
+
SYSCALL_DEFINE1(epoll_create, int, size)
{
if (size <= 0)
return -EINVAL;
- return sys_epoll_create1(0);
+ return do_epoll_create(0);
}
/*
@@ -2148,8 +2153,8 @@ error_return:
* Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_wait(2).
*/
-SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
- int, maxevents, int, timeout)
+static int do_epoll_wait(int epfd, struct epoll_event __user *events,
+ int maxevents, int timeout)
{
int error;
struct fd f;
@@ -2190,6 +2195,12 @@ error_fput:
return error;
}
+SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
+ int, maxevents, int, timeout)
+{
+ return do_epoll_wait(epfd, events, maxevents, timeout);
+}
+
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_pwait(2).
@@ -2214,7 +2225,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
set_current_blocked(&ksigmask);
}
- error = sys_epoll_wait(epfd, events, maxevents, timeout);
+ error = do_epoll_wait(epfd, events, maxevents, timeout);
/*
* If we changed the signal mask, we need to restore the original one.
@@ -2257,7 +2268,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
set_current_blocked(&ksigmask);
}
- err = sys_epoll_wait(epfd, events, maxevents, timeout);
+ err = do_epoll_wait(epfd, events, maxevents, timeout);
/*
* If we changed the signal mask, we need to restore the original one.
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 1e97f1fda90c..d737ff082472 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -607,8 +607,8 @@ static int fixup_compat_flock(struct flock *flock)
return 0;
}
-COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
- compat_ulong_t, arg)
+static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
+ compat_ulong_t arg)
{
struct fd f = fdget_raw(fd);
struct flock flock;
@@ -672,6 +672,12 @@ out_put:
return err;
}
+COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
+ compat_ulong_t, arg)
+{
+ return do_compat_fcntl64(fd, cmd, arg);
+}
+
COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
compat_ulong_t, arg)
{
@@ -684,7 +690,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
case F_OFD_SETLKW:
return -EINVAL;
}
- return compat_sys_fcntl64(fd, cmd, arg);
+ return do_compat_fcntl64(fd, cmd, arg);
}
#endif
diff --git a/fs/file.c b/fs/file.c
index 42f0db4bd0fb..7ffd6e9d103d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,6 +638,7 @@ out_unlock:
spin_unlock(&files->file_lock);
return -EBADF;
}
+EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
void do_close_on_exec(struct files_struct *files)
{
@@ -870,7 +871,7 @@ out_unlock:
return err;
}
-SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
int err = -EBADF;
struct file *file;
@@ -904,6 +905,11 @@ out_unlock:
return err;
}
+SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+{
+ return ksys_dup3(oldfd, newfd, flags);
+}
+
SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
{
if (unlikely(newfd == oldfd)) { /* corner case */
@@ -916,10 +922,10 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
rcu_read_unlock();
return retval;
}
- return sys_dup3(oldfd, newfd, 0);
+ return ksys_dup3(oldfd, newfd, 0);
}
-SYSCALL_DEFINE1(dup, unsigned int, fildes)
+int ksys_dup(unsigned int fildes)
{
int ret = -EBADF;
struct file *file = fget_raw(fildes);
@@ -934,6 +940,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
return ret;
}
+SYSCALL_DEFINE1(dup, unsigned int, fildes)
+{
+ return ksys_dup(fildes);
+}
+
int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
int err;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index ff84258132bb..d705125665f0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -557,9 +557,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
* n_active reaches 0). This makes sure outstanding reads and writes
* have completed.
*/
- if (!atomic_dec_and_test(&cookie->n_active))
- wait_on_atomic_t(&cookie->n_active, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ if (!atomic_dec_and_test(&cookie->n_active)) {
+ wait_var_event(&cookie->n_active,
+ !atomic_read(&cookie->n_active));
+ }
/* Make sure any pending writes are cancelled. */
if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 86d6a4435c87..51f940e76c5e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -807,9 +807,6 @@ do_alloc:
iomap->length = hole_size(inode, lblock, &mp);
else
iomap->length = size - pos;
- } else {
- if (height <= ip->i_height)
- iomap->length = hole_size(inode, lblock, &mp);
}
goto out_release;
}
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index ffaec2e7526c..cb8374af08a6 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -84,7 +84,7 @@ extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd);
extern int make_symlink(const char *from, const char *to);
extern int unlink_file(const char *file);
extern int do_mkdir(const char *file, int mode);
-extern int do_rmdir(const char *file);
+extern int hostfs_do_rmdir(const char *file);
extern int do_mknod(const char *file, int mode, unsigned int major,
unsigned int minor);
extern int link_file(const char *from, const char *to);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index c148e7f4f451..3cd85eb5bbb1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -706,7 +706,7 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
if ((file = dentry_name(dentry)) == NULL)
return -ENOMEM;
- err = do_rmdir(file);
+ err = hostfs_do_rmdir(file);
__putname(file);
return err;
}
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 9c1e0f019880..5ecc4706172b 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -304,7 +304,7 @@ int do_mkdir(const char *file, int mode)
return 0;
}
-int do_rmdir(const char *file)
+int hostfs_do_rmdir(const char *file)
{
int err;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8fe1b0aa2896..b9a254dcc0e7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls. This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value. The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &hugetlb_vm_ops;
/*
- * Offset passed to mmap (before page shift) could have been
- * negative when represented as a (l)off_t.
+ * page based offset in vm_pgoff could be sufficiently large to
+ * overflow a (l)off_t when converted to byte offset.
*/
- if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
+ /* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
diff --git a/fs/internal.h b/fs/internal.h
index df262f41a0ef..980d005b21b4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,7 +55,15 @@ extern void __init chrdev_init(void);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
+long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+ unsigned int dev);
+long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
+long do_rmdir(int dfd, const char __user *pathname);
long do_unlinkat(int dfd, struct filename *name);
+long do_symlinkat(const char __user *oldname, int newdfd,
+ const char __user *newname);
+int do_linkat(int olddfd, const char __user *oldname, int newdfd,
+ const char __user *newname, int flags);
/*
* namespace.c
@@ -111,6 +119,12 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname,
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
const char *, const struct open_flags *);
+long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
+long do_faccessat(int dfd, const char __user *filename, int mode);
+int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
+int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
+ int flag);
+
extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file *filp_clone_open(struct file *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5ace7efb0d04..4823431d1c9d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -689,7 +689,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
return error;
}
-SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
+int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int error;
struct fd f = fdget(fd);
@@ -702,3 +702,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
fdput(f);
return error;
}
+
+SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
+{
+ return ksys_ioctl(fd, cmd, arg);
+}
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig
index f2a0cfcef11d..bcd53a79156f 100644
--- a/fs/minix/Kconfig
+++ b/fs/minix/Kconfig
@@ -18,7 +18,7 @@ config MINIX_FS
config MINIX_FS_NATIVE_ENDIAN
def_bool MINIX_FS
- depends on M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
+ depends on MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED
def_bool MINIX_FS
diff --git a/fs/namei.c b/fs/namei.c
index a3cd028e8a9b..a09419379f5d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -559,9 +559,10 @@ static int __nd_alloc_stack(struct nameidata *nd)
static bool path_connected(const struct path *path)
{
struct vfsmount *mnt = path->mnt;
+ struct super_block *sb = mnt->mnt_sb;
- /* Only bind mounts can have disconnected paths */
- if (mnt->mnt_root == mnt->mnt_sb->s_root)
+ /* Bind mounts and multi-root filesystems can have disconnected paths */
+ if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
return true;
return is_subdir(path->dentry, mnt->mnt_root);
@@ -3721,8 +3722,8 @@ static int may_mknod(umode_t mode)
}
}
-SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
- unsigned, dev)
+long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+ unsigned int dev)
{
struct dentry *dentry;
struct path path;
@@ -3765,9 +3766,15 @@ out:
return error;
}
+SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
+ unsigned int, dev)
+{
+ return do_mknodat(dfd, filename, mode, dev);
+}
+
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
- return sys_mknodat(AT_FDCWD, filename, mode, dev);
+ return do_mknodat(AT_FDCWD, filename, mode, dev);
}
int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -3796,7 +3803,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
EXPORT_SYMBOL(vfs_mkdir);
-SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
+long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
{
struct dentry *dentry;
struct path path;
@@ -3821,9 +3828,14 @@ retry:
return error;
}
+SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
+{
+ return do_mkdirat(dfd, pathname, mode);
+}
+
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
- return sys_mkdirat(AT_FDCWD, pathname, mode);
+ return do_mkdirat(AT_FDCWD, pathname, mode);
}
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -3865,7 +3877,7 @@ out:
}
EXPORT_SYMBOL(vfs_rmdir);
-static long do_rmdir(int dfd, const char __user *pathname)
+long do_rmdir(int dfd, const char __user *pathname)
{
int error = 0;
struct filename *name;
@@ -4101,8 +4113,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
}
EXPORT_SYMBOL(vfs_symlink);
-SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
- int, newdfd, const char __user *, newname)
+long do_symlinkat(const char __user *oldname, int newdfd,
+ const char __user *newname)
{
int error;
struct filename *from;
@@ -4132,9 +4144,15 @@ out_putname:
return error;
}
+SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
+ int, newdfd, const char __user *, newname)
+{
+ return do_symlinkat(oldname, newdfd, newname);
+}
+
SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{
- return sys_symlinkat(oldname, AT_FDCWD, newname);
+ return do_symlinkat(oldname, AT_FDCWD, newname);
}
/**
@@ -4226,8 +4244,8 @@ EXPORT_SYMBOL(vfs_link);
* with linux 2.0, and to avoid hard-linking to directories
* and other special files. --ADM
*/
-SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
- int, newdfd, const char __user *, newname, int, flags)
+int do_linkat(int olddfd, const char __user *oldname, int newdfd,
+ const char __user *newname, int flags)
{
struct dentry *new_dentry;
struct path old_path, new_path;
@@ -4291,9 +4309,15 @@ out:
return error;
}
+SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
+ int, newdfd, const char __user *, newname, int, flags)
+{
+ return do_linkat(olddfd, oldname, newdfd, newname, flags);
+}
+
SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{
- return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
/**
@@ -4471,8 +4495,8 @@ out:
}
EXPORT_SYMBOL(vfs_rename);
-SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
- int, newdfd, const char __user *, newname, unsigned int, flags)
+static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
+ const char __user *newname, unsigned int flags)
{
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
@@ -4614,15 +4638,21 @@ exit:
return error;
}
+SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
+ int, newdfd, const char __user *, newname, unsigned int, flags)
+{
+ return do_renameat2(olddfd, oldname, newdfd, newname, flags);
+}
+
SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname)
{
- return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
+ return do_renameat2(olddfd, oldname, newdfd, newname, 0);
}
SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{
- return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
int vfs_whiteout(struct inode *dir, struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index 9d1374ab6e06..e398f32d7541 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1680,7 +1680,7 @@ static inline bool may_mandlock(void)
* unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
*/
-SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+int ksys_umount(char __user *name, int flags)
{
struct path path;
struct mount *mnt;
@@ -1720,6 +1720,11 @@ out:
return retval;
}
+SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+{
+ return ksys_umount(name, flags);
+}
+
#ifdef __ARCH_WANT_SYS_OLDUMOUNT
/*
@@ -1727,7 +1732,7 @@ out:
*/
SYSCALL_DEFINE1(oldumount, char __user *, name)
{
- return sys_umount(name, 0);
+ return ksys_umount(name, 0);
}
#endif
@@ -3032,8 +3037,8 @@ struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
}
EXPORT_SYMBOL(mount_subtree);
-SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
- char __user *, type, unsigned long, flags, void __user *, data)
+int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
+ unsigned long flags, void __user *data)
{
int ret;
char *kernel_type;
@@ -3066,6 +3071,12 @@ out_type:
return ret;
}
+SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
+ char __user *, type, unsigned long, flags, void __user *, data)
+{
+ return ksys_mount(dev_name, dir_name, type, flags, data);
+}
+
/*
* Return true if path is reachable from root
*
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2435af56b87e..a50d7813e3ea 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -572,7 +572,7 @@ out:
}
static bool
-validate_bitmap_values(unsigned long mask)
+validate_bitmap_values(unsigned int mask)
{
return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
}
@@ -596,17 +596,15 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
goto out;
status = cpu_to_be32(NFS4_OK);
- if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
- &args->craa_type_mask))
+ if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_RDATA_DLG))
flags = FMODE_READ;
- if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
- &args->craa_type_mask))
+ if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_WDATA_DLG))
flags |= FMODE_WRITE;
- if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
- &args->craa_type_mask))
- pnfs_recall_all_layouts(cps->clp);
if (flags)
nfs_expire_unused_delegation_types(cps->clp, flags);
+
+ if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
+ pnfs_recall_all_layouts(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8c10b0562e75..621c517b325c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -86,10 +86,10 @@ struct nfs_direct_req {
struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
int mirror_count;
+ loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
bytes_left, /* bytes left to be sent */
- io_start, /* start of IO */
error; /* any reported error */
struct completion completion; /* wait for i/o completion */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d893543cf3b..d17a90c4fa37 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -85,11 +85,6 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
-int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode)
-{
- return nfs_wait_killable(mode);
-}
-
/**
* nfs_compat_user_ino64 - returns the user-visible inode number
* @fileid: 64-bit fileid
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 49f848fd1f04..7327930ad970 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -873,7 +873,7 @@ static void nfs3_nlm_release_call(void *data)
}
}
-const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
+static const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
.nlmclnt_alloc_call = nfs3_nlm_alloc_call,
.nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
.nlmclnt_release_call = nfs3_nlm_release_call,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 04612c24d394..979631411a0e 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -868,8 +868,10 @@ static int nfs4_set_client(struct nfs_server *server,
if (IS_ERR(clp))
return PTR_ERR(clp);
- if (server->nfs_client == clp)
+ if (server->nfs_client == clp) {
+ nfs_put_client(clp);
return -ELOOP;
+ }
/*
* Query for the lease time on clientid setup or renewal
@@ -1244,11 +1246,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion, net);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
- nfs_put_client(clp);
if (error != 0) {
nfs_server_insert_lists(server);
return error;
}
+ nfs_put_client(clp);
if (server->nfs_client->cl_hostname == NULL)
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18a7626ac638..67d19cd92e44 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -98,8 +98,8 @@ nfs_page_free(struct nfs_page *p)
int
nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
{
- return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable,
- TASK_KILLABLE);
+ return wait_var_event_killable(&l_ctx->io_count,
+ !atomic_read(&l_ctx->io_count));
}
/**
@@ -395,7 +395,7 @@ static void nfs_clear_request(struct nfs_page *req)
}
if (l_ctx != NULL) {
if (atomic_dec_and_test(&l_ctx->io_count)) {
- wake_up_atomic_t(&l_ctx->io_count);
+ wake_up_var(&l_ctx->io_count);
if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c13e826614b5..ee723aa153a3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
void
pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
- struct inode *inode = lo->plh_inode;
+ struct inode *inode;
+ if (!lo)
+ return;
+ inode = lo->plh_inode;
pnfs_layoutreturn_before_put_layout_hdr(lo);
if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
@@ -1241,10 +1244,12 @@ retry:
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (!lo || !pnfs_layout_is_valid(lo) ||
- test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ lo = NULL;
goto out_noroc;
+ }
+ pnfs_get_layout_hdr(lo);
if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
- pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
TASK_UNINTERRUPTIBLE);
@@ -1312,10 +1317,12 @@ out_noroc:
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(args);
+ pnfs_put_layout_hdr(lo);
return true;
}
if (layoutreturn)
pnfs_send_layoutreturn(lo, &stateid, iomode, true);
+ pnfs_put_layout_hdr(lo);
return false;
}
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 03aaa60c7768..32ba2d471853 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -245,7 +245,7 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
{
if (list_empty(pages)) {
if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
- wake_up_atomic_t(&cinfo->mds->rpcs_out);
+ wake_up_var(&cinfo->mds->rpcs_out);
/* don't call nfs_commitdata_release - it tries to put
* the open_context which is not acquired until nfs_init_commit
* which has not been called on @data */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 29bacdc56f6a..5e470e233c83 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2631,6 +2631,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
/* initial superblock/root creation */
mount_info->fill_super(s, mount_info);
nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
+ if (!(server->flags & NFS_MOUNT_UNSHARED))
+ s->s_iflags |= SB_I_MULTIROOT;
}
mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7428a669d7a7..6579f3b367bd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1620,8 +1620,8 @@ static void nfs_writeback_result(struct rpc_task *task,
static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
{
- return wait_on_atomic_t(&cinfo->rpcs_out,
- nfs_wait_atomic_killable, TASK_KILLABLE);
+ return wait_var_event_killable(&cinfo->rpcs_out,
+ !atomic_read(&cinfo->rpcs_out));
}
static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
@@ -1632,7 +1632,7 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{
if (atomic_dec_and_test(&cinfo->rpcs_out))
- wake_up_atomic_t(&cinfo->rpcs_out);
+ wake_up_var(&cinfo->rpcs_out);
}
void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
return status;
}
-int nfs_commit_inode(struct inode *inode, int how)
+static int __nfs_commit_inode(struct inode *inode, int how,
+ struct writeback_control *wbc)
{
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
- int error = 0;
- int res;
+ int ret, nscan;
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
- res = nfs_scan_commit(inode, &head, &cinfo);
- if (res)
- error = nfs_generic_commit_list(inode, &head, how, &cinfo);
+ for (;;) {
+ ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
+ if (ret <= 0)
+ break;
+ ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
+ if (ret < 0)
+ break;
+ ret = 0;
+ if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
+ if (nscan < wbc->nr_to_write)
+ wbc->nr_to_write -= nscan;
+ else
+ wbc->nr_to_write = 0;
+ }
+ if (nscan < INT_MAX)
+ break;
+ cond_resched();
+ }
nfs_commit_end(cinfo.mds);
- if (res == 0)
- return res;
- if (error < 0)
- goto out_error;
- if (!may_wait)
- goto out_mark_dirty;
- error = wait_on_commit(cinfo.mds);
- if (error < 0)
- return error;
- return res;
-out_error:
- res = error;
- /* Note: If we exit without ensuring that the commit is complete,
- * we must mark the inode as dirty. Otherwise, future calls to
- * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
- * that the data is on the disk.
- */
-out_mark_dirty:
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- return res;
+ if (ret || !may_wait)
+ return ret;
+ return wait_on_commit(cinfo.mds);
+}
+
+int nfs_commit_inode(struct inode *inode, int how)
+{
+ return __nfs_commit_inode(inode, how, NULL);
}
EXPORT_SYMBOL_GPL(nfs_commit_inode);
@@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
int flags = FLUSH_SYNC;
int ret = 0;
- /* no commits means nothing needs to be done */
- if (!atomic_long_read(&nfsi->commit_info.ncommit))
- return ret;
-
if (wbc->sync_mode == WB_SYNC_NONE) {
+ /* no commits means nothing needs to be done */
+ if (!atomic_long_read(&nfsi->commit_info.ncommit))
+ goto check_requests_outstanding;
+
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
@@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
flags = 0;
}
- ret = nfs_commit_inode(inode, flags);
- if (ret >= 0) {
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (ret < wbc->nr_to_write)
- wbc->nr_to_write -= ret;
- else
- wbc->nr_to_write = 0;
- }
- return 0;
- }
+ ret = __nfs_commit_inode(inode, flags, wbc);
+ if (!ret) {
+ if (flags & FLUSH_SYNC)
+ return 0;
+ } else if (atomic_long_read(&nfsi->commit_info.ncommit))
+ goto out_mark_dirty;
+
+check_requests_outstanding:
+ if (!atomic_read(&nfsi->commit_info.rpcs_out))
+ return ret;
out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 150521c9671b..61b770e39809 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -268,6 +268,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl)
kfree(nbl);
}
+static void
+remove_blocked_locks(struct nfs4_lockowner *lo)
+{
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct nfsd4_blocked_lock *nbl;
+ LIST_HEAD(reaplist);
+
+ /* Dequeue all blocked locks */
+ spin_lock(&nn->blocked_locks_lock);
+ while (!list_empty(&lo->lo_blocked)) {
+ nbl = list_first_entry(&lo->lo_blocked,
+ struct nfsd4_blocked_lock,
+ nbl_list);
+ list_del_init(&nbl->nbl_list);
+ list_move(&nbl->nbl_lru, &reaplist);
+ }
+ spin_unlock(&nn->blocked_locks_lock);
+
+ /* Now free them */
+ while (!list_empty(&reaplist)) {
+ nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
+ nbl_lru);
+ list_del_init(&nbl->nbl_lru);
+ posix_unblock_lock(&nbl->nbl_lock);
+ free_blocked_lock(nbl);
+ }
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -1866,6 +1895,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
static void
__destroy_client(struct nfs4_client *clp)
{
+ int i;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
struct list_head reaplist;
@@ -1895,6 +1925,16 @@ __destroy_client(struct nfs4_client *clp)
nfs4_get_stateowner(&oo->oo_owner);
release_openowner(oo);
}
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ struct nfs4_stateowner *so, *tmp;
+
+ list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
+ so_strhash) {
+ /* Should be no openowners at this point */
+ WARN_ON_ONCE(so->so_is_open_owner);
+ remove_blocked_locks(lockowner(so));
+ }
+ }
nfsd4_return_all_client_layouts(clp);
nfsd4_shutdown_callback(clp);
if (clp->cl_cb_conn.cb_xprt)
@@ -6355,6 +6395,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
}
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
+ remove_blocked_locks(lo);
nfs4_put_stateowner(&lo->lo_owner);
return status;
@@ -7140,6 +7181,8 @@ nfs4_state_destroy_net(struct net *net)
}
}
+ WARN_ON(!list_empty(&nn->blocked_locks_lru));
+
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&nn->unconf_id_hashtbl[i])) {
clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -7206,7 +7249,6 @@ nfs4_state_shutdown_net(struct net *net)
struct nfs4_delegation *dp = NULL;
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct nfsd4_blocked_lock *nbl;
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -7227,24 +7269,6 @@ nfs4_state_shutdown_net(struct net *net)
nfs4_put_stid(&dp->dl_stid);
}
- BUG_ON(!list_empty(&reaplist));
- spin_lock(&nn->blocked_locks_lock);
- while (!list_empty(&nn->blocked_locks_lru)) {
- nbl = list_first_entry(&nn->blocked_locks_lru,
- struct nfsd4_blocked_lock, nbl_lru);
- list_move(&nbl->nbl_lru, &reaplist);
- list_del_init(&nbl->nbl_list);
- }
- spin_unlock(&nn->blocked_locks_lock);
-
- while (!list_empty(&reaplist)) {
- nbl = list_first_entry(&reaplist,
- struct nfsd4_blocked_lock, nbl_lru);
- list_del_init(&nbl->nbl_lru);
- posix_unblock_lock(&nbl->nbl_lock);
- free_blocked_lock(nbl);
- }
-
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c07eb3d655ea..fa803a58a605 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -820,9 +820,8 @@ out_destroy_group:
return fd;
}
-SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
- __u64, mask, int, dfd,
- const char __user *, pathname)
+static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
@@ -928,13 +927,20 @@ fput_and_out:
return ret;
}
+SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
+ __u64, mask, int, dfd,
+ const char __user *, pathname)
+{
+ return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
__u32, mask0, __u32, mask1, int, dfd,
const char __user *, pathname)
{
- return sys_fanotify_mark(fanotify_fd, flags,
+ return do_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
((__u64)mask0 << 32) | mask1,
#else
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 2c908b31d6c9..43c23653ce2e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -635,7 +635,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
/* inotify syscalls */
-SYSCALL_DEFINE1(inotify_init1, int, flags)
+static int do_inotify_init(int flags)
{
struct fsnotify_group *group;
int ret;
@@ -660,9 +660,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
return ret;
}
+SYSCALL_DEFINE1(inotify_init1, int, flags)
+{
+ return do_inotify_init(flags);
+}
+
SYSCALL_DEFINE0(inotify_init)
{
- return sys_inotify_init1(0);
+ return do_inotify_init(0);
}
SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 36b0772701a0..60702d677bd4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
return fd;
}
+EXPORT_SYMBOL_GPL(open_related_ns);
static long ns_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eac5140aac47..e5076185cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
static int o2net_accept_one(struct socket *sock, int *more)
{
- int ret, slen;
+ int ret;
struct sockaddr_in sin;
struct socket *new_sock = NULL;
struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
- slen = sizeof(sin);
- ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
- &slen, 1);
+ ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
goto out;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index e87279e49ba3..6b92cb241138 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -134,9 +134,10 @@ ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
{
struct ocfs2_filecheck_entry *p;
- if (!atomic_dec_and_test(&entry->fs_count))
- wait_on_atomic_t(&entry->fs_count, atomic_t_wait,
- TASK_UNINTERRUPTIBLE);
+ if (!atomic_dec_and_test(&entry->fs_count)) {
+ wait_var_event(&entry->fs_count,
+ !atomic_read(&entry->fs_count));
+ }
spin_lock(&entry->fs_fcheck->fc_lock);
while (!list_empty(&entry->fs_fcheck->fc_head)) {
@@ -183,7 +184,7 @@ static void
ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
{
if (atomic_dec_and_test(&entry->fs_count))
- wake_up_atomic_t(&entry->fs_count);
+ wake_up_var(&entry->fs_count);
}
static struct ocfs2_filecheck_sysfs_entry *
diff --git a/fs/open.c b/fs/open.c
index 7ea118471dce..d0e955b558ad 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -128,7 +128,7 @@ out:
}
EXPORT_SYMBOL_GPL(vfs_truncate);
-static long do_sys_truncate(const char __user *pathname, loff_t length)
+long do_sys_truncate(const char __user *pathname, loff_t length)
{
unsigned int lookup_flags = LOOKUP_FOLLOW;
struct path path;
@@ -162,7 +162,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length
}
#endif
-static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
struct inode *inode;
struct dentry *dentry;
@@ -333,7 +333,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
EXPORT_SYMBOL_GPL(vfs_fallocate);
-SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
+int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
struct fd f = fdget(fd);
int error = -EBADF;
@@ -345,12 +345,17 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
return error;
}
+SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
+{
+ return ksys_fallocate(fd, mode, offset, len);
+}
+
/*
* access() needs to use the real uid/gid, not the effective uid/gid.
* We do this by temporarily clearing all FS-related capabilities and
* switching the fsuid/fsgid around to the real ones.
*/
-SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
+long do_faccessat(int dfd, const char __user *filename, int mode)
{
const struct cred *old_cred;
struct cred *override_cred;
@@ -426,12 +431,17 @@ out:
return res;
}
+SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
+{
+ return do_faccessat(dfd, filename, mode);
+}
+
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
{
- return sys_faccessat(AT_FDCWD, filename, mode);
+ return do_faccessat(AT_FDCWD, filename, mode);
}
-SYSCALL_DEFINE1(chdir, const char __user *, filename)
+int ksys_chdir(const char __user *filename)
{
struct path path;
int error;
@@ -457,6 +467,11 @@ out:
return error;
}
+SYSCALL_DEFINE1(chdir, const char __user *, filename)
+{
+ return ksys_chdir(filename);
+}
+
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
struct fd f = fdget_raw(fd);
@@ -479,7 +494,7 @@ out:
return error;
}
-SYSCALL_DEFINE1(chroot, const char __user *, filename)
+int ksys_chroot(const char __user *filename)
{
struct path path;
int error;
@@ -512,6 +527,11 @@ out:
return error;
}
+SYSCALL_DEFINE1(chroot, const char __user *, filename)
+{
+ return ksys_chroot(filename);
+}
+
static int chmod_common(const struct path *path, umode_t mode)
{
struct inode *inode = path->dentry->d_inode;
@@ -541,7 +561,7 @@ out_unlock:
return error;
}
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
+int ksys_fchmod(unsigned int fd, umode_t mode)
{
struct fd f = fdget(fd);
int err = -EBADF;
@@ -554,7 +574,12 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
return err;
}
-SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
+{
+ return ksys_fchmod(fd, mode);
+}
+
+int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
{
struct path path;
int error;
@@ -572,9 +597,15 @@ retry:
return error;
}
+SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
+ umode_t, mode)
+{
+ return do_fchmodat(dfd, filename, mode);
+}
+
SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
{
- return sys_fchmodat(AT_FDCWD, filename, mode);
+ return do_fchmodat(AT_FDCWD, filename, mode);
}
static int chown_common(const struct path *path, uid_t user, gid_t group)
@@ -619,8 +650,8 @@ retry_deleg:
return error;
}
-SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
- gid_t, group, int, flag)
+int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
+ int flag)
{
struct path path;
int error = -EINVAL;
@@ -651,18 +682,24 @@ out:
return error;
}
+SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
+ gid_t, group, int, flag)
+{
+ return do_fchownat(dfd, filename, user, group, flag);
+}
+
SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
{
- return sys_fchownat(AT_FDCWD, filename, user, group, 0);
+ return do_fchownat(AT_FDCWD, filename, user, group, 0);
}
SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
{
- return sys_fchownat(AT_FDCWD, filename, user, group,
- AT_SYMLINK_NOFOLLOW);
+ return do_fchownat(AT_FDCWD, filename, user, group,
+ AT_SYMLINK_NOFOLLOW);
}
-SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
+int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
{
struct fd f = fdget(fd);
int error = -EBADF;
@@ -682,6 +719,11 @@ out:
return error;
}
+SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
+{
+ return ksys_fchown(fd, user, group);
+}
+
int open_check_o_direct(struct file *f)
{
/* NB: we're sure to have correct a_ops only after f_op->open */
@@ -1114,7 +1156,7 @@ COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, fla
*/
SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
{
- return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
+ return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
}
#endif
@@ -1163,7 +1205,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
return retval;
}
-EXPORT_SYMBOL(sys_close);
/*
* This routine simulates a hangup on the tty, to arrange that users
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 406e72de88f6..ce6ff5a0a6e4 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,8 @@ config OVERLAY_FS_REDIRECT_DIR
an overlay which has redirects on a kernel that doesn't support this
feature will have unexpected results.
+ If unsure, say N.
+
config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
bool "Overlayfs: follow redirects even if redirects are turned off"
default y
@@ -32,8 +34,13 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
Disable this to get a possibly more secure configuration, but that
might not be backward compatible with previous kernels.
+ If backward compatibility is not an issue, then it is safe and
+ recommended to say N here.
+
For more information, see Documentation/filesystems/overlayfs.txt
+ If unsure, say Y.
+
config OVERLAY_FS_INDEX
bool "Overlayfs: turn on inodes index feature by default"
depends on OVERLAY_FS
@@ -51,6 +58,8 @@ config OVERLAY_FS_INDEX
That is, mounting an overlay which has an inodes index on a kernel
that doesn't support this feature will have unexpected results.
+ If unsure, say N.
+
config OVERLAY_FS_NFS_EXPORT
bool "Overlayfs: turn on NFS export feature by default"
depends on OVERLAY_FS
@@ -72,3 +81,8 @@ config OVERLAY_FS_NFS_EXPORT
Note, that the NFS export feature is not backward compatible.
That is, mounting an overlay which has a full index on a kernel
that doesn't support this feature will have unexpected results.
+
+ Most users should say N here and enable this feature on a case-by-
+ case basis with the "nfs_export=on" mount option.
+
+ Say N unless you fully understand the consequences.
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index bb94ce9da5c8..87bd4148f4fb 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -19,6 +19,142 @@
#include <linux/ratelimit.h>
#include "overlayfs.h"
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+ int err;
+
+ if (ovl_dentry_upper(dentry))
+ return 0;
+
+ err = ovl_want_write(dentry);
+ if (!err) {
+ err = ovl_copy_up(dentry);
+ ovl_drop_write(dentry);
+ }
+
+ if (err) {
+ pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+ dentry, err);
+ }
+
+ return err;
+}
+
+/*
+ * Before encoding a non-upper directory file handle from real layer N, we need
+ * to check if it will be possible to reconnect an overlay dentry from the real
+ * lower decoded dentry. This is done by following the overlay ancestry up to a
+ * "layer N connected" ancestor and verifying that all parents along the way are
+ * "layer N connectable". If an ancestor that is NOT "layer N connectable" is
+ * found, we need to copy up an ancestor, which is "layer N connectable", thus
+ * making that ancestor "layer N connected". For example:
+ *
+ * layer 1: /a
+ * layer 2: /a/b/c
+ *
+ * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
+ * copied up and renamed, upper dir /a will be indexed by lower dir /a from
+ * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
+ * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
+ * dentry from the connected lower dentry /a/b/c.
+ *
+ * To avoid this problem on decode time, we need to copy up an ancestor of
+ * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
+ * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
+ * and when the time comes to decode the file handle from lower dentry /a/b/c,
+ * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
+ * a connected overlay dentry will be accomplished.
+ *
+ * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an
+ * entry /a in the lower layers above layer N and find the indexed dir /a from
+ * layer 1. If that improvement is made, then the check for "layer N connected"
+ * will need to verify there are no redirects in lower layers above N. In the
+ * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
+ * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
+ *
+ * layer 1: /A (redirect = /a)
+ * layer 2: /a/b/c
+ */
+
+/* Return the lowest layer for encoding a connectable file handle */
+static int ovl_connectable_layer(struct dentry *dentry)
+{
+ struct ovl_entry *oe = OVL_E(dentry);
+
+ /* We can get overlay root from root of any layer */
+ if (dentry == dentry->d_sb->s_root)
+ return oe->numlower;
+
+ /*
+ * If it's an unindexed merge dir, then it's not connectable with any
+ * lower layer
+ */
+ if (ovl_dentry_upper(dentry) &&
+ !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ return 0;
+
+ /* We can get upper/overlay path from indexed/lower dentry */
+ return oe->lowerstack[0].layer->idx;
+}
+
+/*
+ * @dentry is "connected" if all ancestors up to root or a "connected" ancestor
+ * have the same uppermost lower layer as the origin's layer. We may need to
+ * copy up a "connectable" ancestor to make it "connected". A "connected" dentry
+ * cannot become non "connected", so cache positive result in dentry flags.
+ *
+ * Return the connected origin layer or < 0 on error.
+ */
+static int ovl_connect_layer(struct dentry *dentry)
+{
+ struct dentry *next, *parent = NULL;
+ int origin_layer;
+ int err = 0;
+
+ if (WARN_ON(dentry == dentry->d_sb->s_root) ||
+ WARN_ON(!ovl_dentry_lower(dentry)))
+ return -EIO;
+
+ origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
+ if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
+ return origin_layer;
+
+ /* Find the topmost origin layer connectable ancestor of @dentry */
+ next = dget(dentry);
+ for (;;) {
+ parent = dget_parent(next);
+ if (WARN_ON(parent == next)) {
+ err = -EIO;
+ break;
+ }
+
+ /*
+ * If @parent is not origin layer connectable, then copy up
+ * @next which is origin layer connectable and we are done.
+ */
+ if (ovl_connectable_layer(parent) < origin_layer) {
+ err = ovl_encode_maybe_copy_up(next);
+ break;
+ }
+
+ /* If @parent is connected or indexed we are done */
+ if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) ||
+ ovl_test_flag(OVL_INDEX, d_inode(parent)))
+ break;
+
+ dput(next);
+ next = parent;
+ }
+
+ dput(parent);
+ dput(next);
+
+ if (!err)
+ ovl_dentry_set_flag(OVL_E_CONNECTED, dentry);
+
+ return err ?: origin_layer;
+}
+
/*
* We only need to encode origin if there is a chance that the same object was
* encoded pre copy up and then we need to stay consistent with the same
@@ -41,73 +177,59 @@
* L = lower file handle
*
* (*) Connecting an overlay dir from real lower dentry is not always
- * possible when there are redirects in lower layers. To mitigate this case,
- * we copy up the lower dir first and then encode an upper dir file handle.
+ * possible when there are redirects in lower layers and non-indexed merge dirs.
+ * To mitigate those case, we may copy up the lower dir ancestor before encode
+ * a lower dir file handle.
+ *
+ * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
*/
-static bool ovl_should_encode_origin(struct dentry *dentry)
+static int ovl_check_encode_origin(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ /* Upper file handle for pure upper */
if (!ovl_dentry_lower(dentry))
- return false;
+ return 0;
/*
- * Decoding a merge dir, whose origin's parent is under a redirected
- * lower dir is not always possible. As a simple aproximation, we do
- * not encode lower dir file handles when overlay has multiple lower
- * layers and origin is below the topmost lower layer.
+ * Upper file handle for non-indexed upper.
*
- * TODO: copy up only the parent that is under redirected lower.
+ * Root is never indexed, so if there's an upper layer, encode upper for
+ * root.
*/
- if (d_is_dir(dentry) && ofs->upper_mnt &&
- OVL_E(dentry)->lowerstack[0].layer->idx > 1)
- return false;
-
- /* Decoding a non-indexed upper from origin is not implemented */
if (ovl_dentry_upper(dentry) &&
!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
- return false;
-
- return true;
-}
-
-static int ovl_encode_maybe_copy_up(struct dentry *dentry)
-{
- int err;
-
- if (ovl_dentry_upper(dentry))
return 0;
- err = ovl_want_write(dentry);
- if (err)
- return err;
-
- err = ovl_copy_up(dentry);
+ /*
+ * Decoding a merge dir, whose origin's ancestor is under a redirected
+ * lower dir or under a non-indexed upper is not always possible.
+ * ovl_connect_layer() will try to make origin's layer "connected" by
+ * copying up a "connectable" ancestor.
+ */
+ if (d_is_dir(dentry) && ofs->upper_mnt)
+ return ovl_connect_layer(dentry);
- ovl_drop_write(dentry);
- return err;
+ /* Lower file handle for indexed and non-upper dir/non-dir */
+ return 1;
}
static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
{
- struct dentry *origin = ovl_dentry_lower(dentry);
struct ovl_fh *fh = NULL;
- int err;
+ int err, enc_lower;
/*
- * If we should not encode a lower dir file handle, copy up and encode
- * an upper dir file handle.
+ * Check if we should encode a lower or upper file handle and maybe
+ * copy up an ancestor to make lower file handle connectable.
*/
- if (!ovl_should_encode_origin(dentry)) {
- err = ovl_encode_maybe_copy_up(dentry);
- if (err)
- goto fail;
-
- origin = NULL;
- }
+ err = enc_lower = ovl_check_encode_origin(dentry);
+ if (enc_lower < 0)
+ goto fail;
- /* Encode an upper or origin file handle */
- fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
+ /* Encode an upper or lower file handle */
+ fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) :
+ ovl_dentry_upper(dentry), !enc_lower);
err = PTR_ERR(fh);
if (IS_ERR(fh))
goto fail;
@@ -355,8 +477,8 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
dput(upper);
}
- if (!this)
- return NULL;
+ if (IS_ERR_OR_NULL(this))
+ return this;
if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
dput(this);
@@ -498,7 +620,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
if (err == -ECHILD) {
this = ovl_lookup_real_ancestor(sb, real,
layer);
- err = IS_ERR(this) ? PTR_ERR(this) : 0;
+ err = PTR_ERR_OR_ZERO(this);
}
if (!err) {
dput(connected);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index fcd97b783fa1..3b1bd469accd 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
return inode;
}
+/*
+ * Does overlay inode need to be hashed by lower inode?
+ */
+static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
+ struct dentry *lower, struct dentry *index)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ /* No, if pure upper */
+ if (!lower)
+ return false;
+
+ /* Yes, if already indexed */
+ if (index)
+ return true;
+
+ /* Yes, if won't be copied up */
+ if (!ofs->upper_mnt)
+ return true;
+
+ /* No, if lower hardlink is or will be broken on copy up */
+ if ((upper || !ovl_indexdir(sb)) &&
+ !d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+ return false;
+
+ /* No, if non-indexed upper with NFS export */
+ if (sb->s_export_op && upper)
+ return false;
+
+ /* Otherwise, hash by lower inode for fsnotify */
+ return true;
+}
+
struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
struct dentry *lowerdentry, struct dentry *index,
unsigned int numlower)
{
- struct ovl_fs *ofs = sb->s_fs_info;
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
- /* Already indexed or could be indexed on copy up? */
- bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
- struct dentry *origin = indexed ? lowerdentry : NULL;
+ bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
bool is_dir;
- if (WARN_ON(upperdentry && indexed && !lowerdentry))
- return ERR_PTR(-EIO);
-
if (!realinode)
realinode = d_inode(lowerdentry);
/*
- * Copy up origin (lower) may exist for non-indexed non-dir upper, but
- * we must not use lower as hash key in that case.
- * Hash non-dir that is or could be indexed by origin inode.
- * Hash dir that is or could be merged by origin inode.
- * Hash pure upper and non-indexed non-dir by upper inode.
- * Hash non-indexed dir by upper inode for NFS export.
+ * Copy up origin (lower) may exist for non-indexed upper, but we must
+ * not use lower as hash key if this is a broken hardlink.
*/
is_dir = S_ISDIR(realinode->i_mode);
- if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
- origin = lowerdentry;
-
- if (upperdentry || origin) {
- struct inode *key = d_inode(origin ?: upperdentry);
+ if (upperdentry || bylower) {
+ struct inode *key = d_inode(bylower ? lowerdentry :
+ upperdentry);
unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
inode = iget5_locked(sb, (unsigned long) key,
@@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
set_nlink(inode, nlink);
} else {
+ /* Lower hardlink that will be broken on copy up */
inode = new_inode(sb);
if (!inode)
goto out_nomem;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index de3e6da1d5a5..70fcfcc684cc 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -913,9 +913,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
stack[ctr].layer = lower.layer;
ctr++;
- if (d.stop)
- break;
-
/*
* Following redirects can have security consequences: it's like
* a symlink into the lower layer without the permission checks.
@@ -933,6 +930,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
goto out_put;
}
+ if (d.stop)
+ break;
+
if (d.redirect && d.redirect[0] == '/' && poe != roe) {
poe = roe;
/* Find the current layer on the root dentry */
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 0df25a9c94bd..225ff1171147 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -40,6 +40,7 @@ enum ovl_inode_flag {
enum ovl_entry_flag {
OVL_E_UPPER_ALIAS,
OVL_E_OPAQUE,
+ OVL_E_CONNECTED,
};
/*
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 9ee37c76091d..7c24619ae7fc 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1359,6 +1359,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
/* Root is always merge -> can have whiteouts */
ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
+ ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
ovl_dentry_lower(root_dentry));
diff --git a/fs/pipe.c b/fs/pipe.c
index 7b1954caf388..39d6f431da83 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -841,7 +841,7 @@ int do_pipe_flags(int *fd, int flags)
* sys_pipe() is the normal C calling standard for creating
* a pipe. It's not the way Unix traditionally does this, though.
*/
-SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
+static int do_pipe2(int __user *fildes, int flags)
{
struct file *files[2];
int fd[2];
@@ -863,9 +863,14 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
return error;
}
+SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
+{
+ return do_pipe2(fildes, flags);
+}
+
SYSCALL_DEFINE1(pipe, int __user *, fildes)
{
- return sys_pipe2(fildes, 0);
+ return do_pipe2(fildes, 0);
}
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9298324325ed..d53246863cfb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,9 +94,6 @@
#include <linux/sched/stat.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
-#ifdef CONFIG_HARDWALL
-#include <asm/hardwall.h>
-#endif
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@@ -3002,9 +2999,6 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tgid_io_accounting),
#endif
-#ifdef CONFIG_HARDWALL
- ONE("hardwall", S_IRUGO, proc_pid_hardwall),
-#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
@@ -3393,9 +3387,6 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tid_io_accounting),
#endif
-#ifdef CONFIG_HARDWALL
- ONE("hardwall", S_IRUGO, proc_pid_hardwall),
-#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
diff --git a/fs/quota/compat.c b/fs/quota/compat.c
index 779caed4f078..c30572857619 100644
--- a/fs/quota/compat.c
+++ b/fs/quota/compat.c
@@ -41,8 +41,9 @@ struct compat_fs_quota_stat {
__u16 qs_iwarnlimit;
};
-asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
- qid_t id, void __user *addr)
+COMPAT_SYSCALL_DEFINE4(quotactl32, unsigned int, cmd,
+ const char __user *, special, qid_t, id,
+ void __user *, addr)
{
unsigned int cmds;
struct if_dqblk __user *dqblk;
@@ -59,7 +60,7 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
case Q_GETQUOTA:
dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
compat_dqblk = addr;
- ret = sys_quotactl(cmd, special, id, dqblk);
+ ret = kernel_quotactl(cmd, special, id, dqblk);
if (ret)
break;
if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
@@ -75,12 +76,12 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
get_user(data, &compat_dqblk->dqb_valid) ||
put_user(data, &dqblk->dqb_valid))
break;
- ret = sys_quotactl(cmd, special, id, dqblk);
+ ret = kernel_quotactl(cmd, special, id, dqblk);
break;
case Q_XGETQSTAT:
fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
compat_fsqstat = addr;
- ret = sys_quotactl(cmd, special, id, fsqstat);
+ ret = kernel_quotactl(cmd, special, id, fsqstat);
if (ret)
break;
ret = -EFAULT;
@@ -113,7 +114,7 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
ret = 0;
break;
default:
- ret = sys_quotactl(cmd, special, id, addr);
+ ret = kernel_quotactl(cmd, special, id, addr);
}
return ret;
}
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 43612e2a73af..860bfbe7a07a 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -833,8 +833,8 @@ static struct super_block *quotactl_block(const char __user *special, int cmd)
* calls. Maybe we need to add the process quotas etc. in the future,
* but we probably should use rlimits for that.
*/
-SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
- qid_t, id, void __user *, addr)
+int kernel_quotactl(unsigned int cmd, const char __user *special,
+ qid_t id, void __user *addr)
{
uint cmds, type;
struct super_block *sb = NULL;
@@ -885,3 +885,9 @@ out:
path_put(pathp);
return ret;
}
+
+SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
+ qid_t, id, void __user *, addr)
+{
+ return kernel_quotactl(cmd, special, id, addr);
+}
diff --git a/fs/read_write.c b/fs/read_write.c
index f8547b82dfb3..c4eabbfc90df 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -301,7 +301,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
}
EXPORT_SYMBOL(vfs_llseek);
-SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
+off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
{
off_t retval;
struct fd f = fdget_pos(fd);
@@ -319,10 +319,15 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
return retval;
}
+SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
+{
+ return ksys_lseek(fd, offset, whence);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
{
- return sys_lseek(fd, offset, whence);
+ return ksys_lseek(fd, offset, whence);
}
#endif
@@ -563,7 +568,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
file->f_pos = pos;
}
-SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
+ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
@@ -578,8 +583,12 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
return ret;
}
-SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
- size_t, count)
+SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
+{
+ return ksys_read(fd, buf, count);
+}
+
+ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
@@ -595,8 +604,14 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
return ret;
}
-SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
- size_t, count, loff_t, pos)
+SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
+ size_t, count)
+{
+ return ksys_write(fd, buf, count);
+}
+
+ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
+ loff_t pos)
{
struct fd f;
ssize_t ret = -EBADF;
@@ -615,8 +630,14 @@ SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
return ret;
}
-SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
- size_t, count, loff_t, pos)
+SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
+ size_t, count, loff_t, pos)
+{
+ return ksys_pread64(fd, buf, count, pos);
+}
+
+ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
+ size_t count, loff_t pos)
{
struct fd f;
ssize_t ret = -EBADF;
@@ -635,6 +656,12 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
return ret;
}
+SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
+ size_t, count, loff_t, pos)
+{
+ return ksys_pwrite64(fd, buf, count, pos);
+}
+
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
loff_t *ppos, int type, rwf_t flags)
{
diff --git a/fs/readdir.c b/fs/readdir.c
index 1b83b0ad183b..d97f548e6323 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -292,8 +292,8 @@ efault:
return -EFAULT;
}
-SYSCALL_DEFINE3(getdents64, unsigned int, fd,
- struct linux_dirent64 __user *, dirent, unsigned int, count)
+int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
+ unsigned int count)
{
struct fd f;
struct linux_dirent64 __user * lastdirent;
@@ -326,6 +326,13 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
return error;
}
+
+SYSCALL_DEFINE3(getdents64, unsigned int, fd,
+ struct linux_dirent64 __user *, dirent, unsigned int, count)
+{
+ return ksys_getdents64(fd, dirent, count);
+}
+
#ifdef CONFIG_COMPAT
struct compat_old_linux_dirent {
compat_ulong_t d_ino;
diff --git a/fs/select.c b/fs/select.c
index b6c36254028a..ba879c51288f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -675,8 +675,8 @@ out_nofds:
return ret;
}
-SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
- fd_set __user *, exp, struct timeval __user *, tvp)
+static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
+ fd_set __user *exp, struct timeval __user *tvp)
{
struct timespec64 end_time, *to = NULL;
struct timeval tv;
@@ -699,6 +699,12 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
return ret;
}
+SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
+ fd_set __user *, exp, struct timeval __user *, tvp)
+{
+ return kern_select(n, inp, outp, exp, tvp);
+}
+
static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
fd_set __user *exp, struct timespec __user *tsp,
const sigset_t __user *sigmask, size_t sigsetsize)
@@ -784,7 +790,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
- return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+ return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
}
#endif
@@ -1259,9 +1265,9 @@ out_nofds:
return ret;
}
-COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
- compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
- struct compat_timeval __user *, tvp)
+static int do_compat_select(int n, compat_ulong_t __user *inp,
+ compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+ struct compat_timeval __user *tvp)
{
struct timespec64 end_time, *to = NULL;
struct compat_timeval tv;
@@ -1284,6 +1290,13 @@ COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
return ret;
}
+COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
+ compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
+ struct compat_timeval __user *, tvp)
+{
+ return do_compat_select(n, inp, outp, exp, tvp);
+}
+
struct compat_sel_arg_struct {
compat_ulong_t n;
compat_uptr_t inp;
@@ -1298,8 +1311,8 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
- return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
- compat_ptr(a.exp), compat_ptr(a.tvp));
+ return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
+ compat_ptr(a.exp), compat_ptr(a.tvp));
}
static long do_compat_pselect(int n, compat_ulong_t __user *inp,
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9990957264e3..d2187a813376 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -118,13 +118,22 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
#endif
#ifdef BUS_MCEERR_AO
- /*
+ /*
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
if (kinfo->si_signo == SIGBUS &&
- (kinfo->si_code == BUS_MCEERR_AR ||
- kinfo->si_code == BUS_MCEERR_AO))
+ kinfo->si_code == BUS_MCEERR_AO)
+ err |= __put_user((short) kinfo->si_addr_lsb,
+ &uinfo->ssi_addr_lsb);
+#endif
+#ifdef BUS_MCEERR_AR
+ /*
+ * Other callers might not initialize the si_lsb field,
+ * so check explicitly for the right codes here.
+ */
+ if (kinfo->si_signo == SIGBUS &&
+ kinfo->si_code == BUS_MCEERR_AR)
err |= __put_user((short) kinfo->si_addr_lsb,
&uinfo->ssi_addr_lsb);
#endif
@@ -247,8 +256,8 @@ static const struct file_operations signalfd_fops = {
.llseek = noop_llseek,
};
-SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
- size_t, sizemask, int, flags)
+static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask,
+ int flags)
{
sigset_t sigmask;
struct signalfd_ctx *ctx;
@@ -301,17 +310,22 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
return ufd;
}
+SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
+ size_t, sizemask, int, flags)
+{
+ return do_signalfd4(ufd, user_mask, sizemask, flags);
+}
+
SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask,
size_t, sizemask)
{
- return sys_signalfd4(ufd, user_mask, sizemask, 0);
+ return do_signalfd4(ufd, user_mask, sizemask, 0);
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
- const compat_sigset_t __user *,sigmask,
- compat_size_t, sigsetsize,
- int, flags)
+static long do_compat_signalfd4(int ufd,
+ const compat_sigset_t __user *sigmask,
+ compat_size_t sigsetsize, int flags)
{
sigset_t tmp;
sigset_t __user *ksigmask;
@@ -324,13 +338,21 @@ COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
return -EFAULT;
- return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
+ return do_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
+}
+
+COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd,
+ const compat_sigset_t __user *, sigmask,
+ compat_size_t, sigsetsize,
+ int, flags)
+{
+ return do_compat_signalfd4(ufd, sigmask, sigsetsize, flags);
}
COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd,
const compat_sigset_t __user *,sigmask,
compat_size_t, sigsetsize)
{
- return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
+ return do_compat_signalfd4(ufd, sigmask, sigsetsize, 0);
}
#endif
diff --git a/fs/splice.c b/fs/splice.c
index 39e2dc01ac12..005d09cf3fa8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1331,8 +1331,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
* Currently we punt and implement it as a normal copy, see pipe_to_user().
*
*/
-SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
- unsigned long, nr_segs, unsigned int, flags)
+static long do_vmsplice(int fd, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
{
struct fd f;
long error;
@@ -1358,6 +1358,12 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
return error;
}
+SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
+ unsigned long, nr_segs, unsigned int, flags)
+{
+ return do_vmsplice(fd, iov, nr_segs, flags);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
unsigned int, nr_segs, unsigned int, flags)
@@ -1375,7 +1381,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
put_user(v.iov_len, &iov[i].iov_len))
return -EFAULT;
}
- return sys_vmsplice(fd, iov, nr_segs, flags);
+ return do_vmsplice(fd, iov, nr_segs, flags);
}
#endif
diff --git a/fs/stat.c b/fs/stat.c
index 873785dae022..f8e6fb2c3657 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -379,8 +379,8 @@ SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
return error;
}
-SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
- char __user *, buf, int, bufsiz)
+static int do_readlinkat(int dfd, const char __user *pathname,
+ char __user *buf, int bufsiz)
{
struct path path;
int error;
@@ -415,10 +415,16 @@ retry:
return error;
}
+SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
+ char __user *, buf, int, bufsiz)
+{
+ return do_readlinkat(dfd, pathname, buf, bufsiz);
+}
+
SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
int, bufsiz)
{
- return sys_readlinkat(AT_FDCWD, path, buf, bufsiz);
+ return do_readlinkat(AT_FDCWD, path, buf, bufsiz);
}
diff --git a/fs/sync.c b/fs/sync.c
index 6e0a2cbaf6de..9908a114d506 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -105,7 +105,7 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
* just write metadata (such as inodes or bitmaps) to block device page cache
* and do not sync it on their own in ->sync_fs().
*/
-SYSCALL_DEFINE0(sync)
+void ksys_sync(void)
{
int nowait = 0, wait = 1;
@@ -117,6 +117,11 @@ SYSCALL_DEFINE0(sync)
iterate_bdevs(fdatawait_one_bdev, NULL);
if (unlikely(laptop_mode))
laptop_sync_completion();
+}
+
+SYSCALL_DEFINE0(sync)
+{
+ ksys_sync();
return 0;
}
@@ -280,8 +285,8 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
* already-instantiated disk blocks, there are no guarantees here that the data
* will be available after a crash.
*/
-SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
- unsigned int, flags)
+int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
+ unsigned int flags)
{
int ret;
struct fd f;
@@ -359,10 +364,16 @@ out:
return ret;
}
+SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
+ unsigned int, flags)
+{
+ return ksys_sync_file_range(fd, offset, nbytes, flags);
+}
+
/* It would be nice if people remember that not all the world's an i386
when they introduce new system calls */
SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
loff_t, offset, loff_t, nbytes)
{
- return sys_sync_file_range(fd, offset, nbytes, flags);
+ return ksys_sync_file_range(fd, offset, nbytes, flags);
}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 8664db25a9a6..215c225b2ca1 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
{
return sysfs_do_create_link(kobj, target, name, 0);
}
+EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn);
/**
* sysfs_delete_link - remove symlink in object's directory.
diff --git a/fs/utimes.c b/fs/utimes.c
index e4b3d7c2c9f5..69d4b6ba1bfb 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -184,8 +184,8 @@ SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
}
-SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
- struct timeval __user *, utimes)
+static long do_futimesat(int dfd, const char __user *filename,
+ struct timeval __user *utimes)
{
struct timeval times[2];
struct timespec64 tstimes[2];
@@ -212,10 +212,17 @@ SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0);
}
+
+SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename,
+ struct timeval __user *, utimes)
+{
+ return do_futimesat(dfd, filename, utimes);
+}
+
SYSCALL_DEFINE2(utimes, char __user *, filename,
struct timeval __user *, utimes)
{
- return sys_futimesat(AT_FDCWD, filename, utimes);
+ return do_futimesat(AT_FDCWD, filename, utimes);
}
#ifdef CONFIG_COMPAT
@@ -253,7 +260,8 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena
return do_utimes(dfd, filename, t ? tv : NULL, flags);
}
-COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t)
+static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
+ struct compat_timeval __user *t)
{
struct timespec64 tv[2];
@@ -272,8 +280,15 @@ COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filena
return do_utimes(dfd, filename, t ? tv : NULL, 0);
}
+COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd,
+ const char __user *, filename,
+ struct compat_timeval __user *, t)
+{
+ return do_compat_futimesat(dfd, filename, t);
+}
+
COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t)
{
- return compat_sys_futimesat(AT_FDCWD, filename, t);
+ return do_compat_futimesat(AT_FDCWD, filename, t);
}
#endif
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index fd975524f460..05c66e05ae20 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -767,7 +767,7 @@ int
xfs_scrub_agfl(
struct xfs_scrub_context *sc)
{
- struct xfs_scrub_agfl_info sai = { 0 };
+ struct xfs_scrub_agfl_info sai;
struct xfs_agf *agf;
xfs_agnumber_t agno;
unsigned int agflcount;
@@ -795,6 +795,7 @@ xfs_scrub_agfl(
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
goto out;
}
+ memset(&sai, 0, sizeof(sai));
sai.sz_entries = agflcount;
sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
if (!sai.entries) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 66e1edbfb2b2..046469fcc1b8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -955,15 +955,29 @@ static inline bool imap_needs_alloc(struct inode *inode,
(IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
}
+static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps)
+{
+ return nimaps &&
+ imap->br_startblock != HOLESTARTBLOCK &&
+ imap->br_state != XFS_EXT_UNWRITTEN;
+}
+
static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
{
/*
- * COW writes will allocate delalloc space, so we need to make sure
- * to take the lock exclusively here.
+ * COW writes may allocate delalloc space or convert unwritten COW
+ * extents, so we need to make sure to take the lock exclusively here.
*/
if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
return true;
- if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
+
+ /*
+ * Extents not yet cached requires exclusive access, don't block.
+ * This is an opencoded xfs_ilock_data_map_shared() to cater for the
+ * non-blocking behaviour.
+ */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ !(ip->i_df.if_flags & XFS_IFEXTENTS))
return true;
return false;
}
@@ -993,16 +1007,18 @@ xfs_file_iomap_begin(
return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
}
- if (need_excl_ilock(ip, flags)) {
+ if (need_excl_ilock(ip, flags))
lockmode = XFS_ILOCK_EXCL;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- } else {
- lockmode = xfs_ilock_data_map_shared(ip);
- }
+ else
+ lockmode = XFS_ILOCK_SHARED;
- if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
- error = -EAGAIN;
- goto out_unlock;
+ if (flags & IOMAP_NOWAIT) {
+ if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
+ return -EAGAIN;
+ if (!xfs_ilock_nowait(ip, lockmode))
+ return -EAGAIN;
+ } else {
+ xfs_ilock(ip, lockmode);
}
ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -1024,7 +1040,9 @@ xfs_file_iomap_begin(
goto out_unlock;
}
- if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+ if (xfs_is_reflink_inode(ip) &&
+ ((flags & IOMAP_WRITE) ||
+ ((flags & IOMAP_ZERO) && needs_cow_for_zeroing(&imap, nimaps)))) {
if (flags & IOMAP_DIRECT) {
/*
* A reflinked inode will result in CoW alloc.
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 3a55d6fc271b..7a39f40645f7 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -23,6 +23,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
@@ -456,10 +457,12 @@ xfs_cui_recover(
* transaction. Normally, any work that needs to be deferred
* gets attached to the same defer_ops that scheduled the
* refcount update. However, we're in log recovery here, so we
- * we create our own defer_ops and use that to finish up any
- * work that doesn't fit.
+ * we use the passed in defer_ops and to finish up any work that
+ * doesn't fit. We need to reserve enough blocks to handle a
+ * full btree split on either end of the refcount range.
*/
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
if (error)
return error;
cudp = xfs_trans_get_cud(tp, cuip);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index f3b139c9aa16..49d3124863a8 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -23,6 +23,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
@@ -470,7 +471,8 @@ xfs_rui_recover(
}
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
if (error)
return error;
rudp = xfs_trans_get_rud(tp, ruip);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7aba628dc527..93588ea3d3d2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -250,6 +250,7 @@ xfs_parseargs(
return -EINVAL;
break;
case Opt_logdev:
+ kfree(mp->m_logname);
mp->m_logname = match_strdup(args);
if (!mp->m_logname)
return -ENOMEM;
@@ -258,6 +259,7 @@ xfs_parseargs(
xfs_warn(mp, "%s option not allowed on this system", p);
return -EINVAL;
case Opt_rtdev:
+ kfree(mp->m_rtname);
mp->m_rtname = match_strdup(args);
if (!mp->m_rtname)
return -ENOMEM;