summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-22 04:19:09 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-22 04:19:09 +0300
commitd9a185f8b49678775ef56ecbdbc7b76970302897 (patch)
tree7ace1b26133e5d796af09e5d71d6531bcb69865c /fs
parentc22fc16d172fba4d19ffd8f2aa8fe67edba63895 (diff)
parent989974c804574d250ac92d44e220081959ac8ac1 (diff)
downloadlinux-d9a185f8b49678775ef56ecbdbc7b76970302897.tar.xz
Merge tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi: "This contains two new features: - Stack file operations: this allows removal of several hacks from the VFS, proper interaction of read-only open files with copy-up, possibility to implement fs modifying ioctls properly, and others. - Metadata only copy-up: when file is on lower layer and only metadata is modified (except size) then only copy up the metadata and continue to use the data from the lower file" * tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (66 commits) ovl: Enable metadata only feature ovl: Do not do metacopy only for ioctl modifying file attr ovl: Do not do metadata only copy-up for truncate operation ovl: add helper to force data copy-up ovl: Check redirect on index as well ovl: Set redirect on upper inode when it is linked ovl: Set redirect on metacopy files upon rename ovl: Do not set dentry type ORIGIN for broken hardlinks ovl: Add an inode flag OVL_CONST_INO ovl: Treat metacopy dentries as type OVL_PATH_MERGE ovl: Check redirects for metacopy files ovl: Move some dir related ovl_lookup_single() code in else block ovl: Do not expose metacopy only dentry from d_real() ovl: Open file with data except for the case of fsync ovl: Add helper ovl_inode_realdata() ovl: Store lower data inode in ovl_inode ovl: Fix ovl_getattr() to get number of blocks from lower ovl: Add helper ovl_dentry_lowerdata() to get lower data dentry ovl: Copy up meta inode data from lowest data inode ovl: Modify ovl_lookup() and friends to lookup metacopy dentry ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/ioctl.c11
-rw-r--r--fs/file_table.c69
-rw-r--r--fs/inode.c46
-rw-r--r--fs/internal.h11
-rw-r--r--fs/ioctl.c1
-rw-r--r--fs/locks.c20
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c69
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/open.c44
-rw-r--r--fs/overlayfs/Kconfig19
-rw-r--r--fs/overlayfs/Makefile4
-rw-r--r--fs/overlayfs/copy_up.c190
-rw-r--r--fs/overlayfs/dir.c105
-rw-r--r--fs/overlayfs/export.c3
-rw-r--r--fs/overlayfs/file.c511
-rw-r--r--fs/overlayfs/inode.c175
-rw-r--r--fs/overlayfs/namei.c195
-rw-r--r--fs/overlayfs/overlayfs.h47
-rw-r--r--fs/overlayfs/ovl_entry.h6
-rw-r--r--fs/overlayfs/readdir.c19
-rw-r--r--fs/overlayfs/super.c103
-rw-r--r--fs/overlayfs/util.c252
-rw-r--r--fs/read_write.c94
-rw-r--r--fs/xattr.c9
-rw-r--r--fs/xfs/xfs_file.c29
27 files changed, 1531 insertions, 525 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 318be7864072..53af9f5253f4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3217,8 +3217,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
- struct file *dst_file, u64 dst_loff);
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+ struct file *dst_file, loff_t dst_loff,
+ u64 olen);
/* file.c */
int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d3a5d2a41e5f..63600dc2ac4c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3592,13 +3592,13 @@ out_unlock:
return ret;
}
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
- struct file *dst_file, u64 dst_loff)
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+ struct file *dst_file, loff_t dst_loff,
+ u64 olen)
{
struct inode *src = file_inode(src_file);
struct inode *dst = file_inode(dst_file);
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
- ssize_t res;
if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
@@ -3609,10 +3609,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
return -EINVAL;
}
- res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
- if (res)
- return res;
- return olen;
+ return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
}
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/file_table.c b/fs/file_table.c
index d6eccd04d703..e49af4caf15d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f)
{
security_file_free(f);
- percpu_counter_dec(&nr_files);
+ if (!(f->f_mode & FMODE_NOACCOUNT))
+ percpu_counter_dec(&nr_files);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
@@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write,
}
#endif
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+ struct file *f;
+ int error;
+
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ if (unlikely(!f))
+ return ERR_PTR(-ENOMEM);
+
+ f->f_cred = get_cred(cred);
+ error = security_file_alloc(f);
+ if (unlikely(error)) {
+ file_free_rcu(&f->f_u.fu_rcuhead);
+ return ERR_PTR(error);
+ }
+
+ atomic_long_set(&f->f_count, 1);
+ rwlock_init(&f->f_owner.lock);
+ spin_lock_init(&f->f_lock);
+ mutex_init(&f->f_pos_lock);
+ eventpoll_init_file(f);
+ f->f_flags = flags;
+ f->f_mode = OPEN_FMODE(flags);
+ /* f->f_version: 0 */
+
+ return f;
+}
+
/* Find an unused file structure and return a pointer to it.
* Returns an error pointer if some error happend e.g. we over file
* structures limit, run out of memory or operation is not permitted.
@@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
{
static long old_max;
struct file *f;
- int error;
/*
* Privileged users can go above max_files
@@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
goto over;
}
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
- if (unlikely(!f))
- return ERR_PTR(-ENOMEM);
-
- f->f_cred = get_cred(cred);
- error = security_file_alloc(f);
- if (unlikely(error)) {
- file_free_rcu(&f->f_u.fu_rcuhead);
- return ERR_PTR(error);
- }
+ f = __alloc_file(flags, cred);
+ if (!IS_ERR(f))
+ percpu_counter_inc(&nr_files);
- atomic_long_set(&f->f_count, 1);
- rwlock_init(&f->f_owner.lock);
- spin_lock_init(&f->f_lock);
- mutex_init(&f->f_pos_lock);
- eventpoll_init_file(f);
- f->f_flags = flags;
- f->f_mode = OPEN_FMODE(flags);
- /* f->f_version: 0 */
- percpu_counter_inc(&nr_files);
return f;
over:
@@ -150,6 +162,21 @@ over:
return ERR_PTR(-ENFILE);
}
+/*
+ * Variant of alloc_empty_file() that doesn't check and modify nr_files.
+ *
+ * Should not be used unless there's a very good reason to do so.
+ */
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+{
+ struct file *f = __alloc_file(flags, cred);
+
+ if (!IS_ERR(f))
+ f->f_mode |= FMODE_NOACCOUNT;
+
+ return f;
+}
+
/**
* alloc_file - allocate and initialize a 'struct file'
*
diff --git a/fs/inode.c b/fs/inode.c
index a06de4454232..42f6d25f32a5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1596,49 +1596,16 @@ sector_t bmap(struct inode *inode, sector_t block)
EXPORT_SYMBOL(bmap);
/*
- * Update times in overlayed inode from underlying real inode
- */
-static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
- bool rcu)
-{
- struct dentry *upperdentry;
-
- /*
- * Nothing to do if in rcu or if non-overlayfs
- */
- if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
- return;
-
- upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
-
- /*
- * If file is on lower then we can't update atime, so no worries about
- * stale mtime/ctime.
- */
- if (upperdentry) {
- struct inode *realinode = d_inode(upperdentry);
-
- if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
- !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
- inode->i_mtime = realinode->i_mtime;
- inode->i_ctime = realinode->i_ctime;
- }
- }
-}
-
-/*
* With relative atime, only update atime if the previous atime is
* earlier than either the ctime or mtime or if at least a day has
* passed since the last atime update.
*/
-static int relatime_need_update(const struct path *path, struct inode *inode,
- struct timespec now, bool rcu)
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+ struct timespec now)
{
- if (!(path->mnt->mnt_flags & MNT_RELATIME))
+ if (!(mnt->mnt_flags & MNT_RELATIME))
return 1;
-
- update_ovl_inode_times(path->dentry, inode, rcu);
/*
* Is mtime younger than atime? If yes, update atime:
*/
@@ -1709,8 +1676,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
* This function automatically handles read only file systems and media,
* as well as the "noatime" flag and inode specific "noatime" markers.
*/
-bool __atime_needs_update(const struct path *path, struct inode *inode,
- bool rcu)
+bool atime_needs_update(const struct path *path, struct inode *inode)
{
struct vfsmount *mnt = path->mnt;
struct timespec64 now;
@@ -1736,7 +1702,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
now = current_time(inode);
- if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu))
+ if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
return false;
if (timespec64_equal(&inode->i_atime, &now))
@@ -1751,7 +1717,7 @@ void touch_atime(const struct path *path)
struct inode *inode = d_inode(path->dentry);
struct timespec64 now;
- if (!__atime_needs_update(path, inode, false))
+ if (!atime_needs_update(path, inode))
return;
if (!sb_start_write_trylock(inode->i_sb))
diff --git a/fs/internal.h b/fs/internal.h
index 50a28fc71300..d410186bc369 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -82,10 +82,8 @@ extern void __init mnt_init(void);
extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *);
-extern int mnt_want_write_file_path(struct file *);
extern void __mnt_drop_write(struct vfsmount *);
extern void __mnt_drop_write_file(struct file *);
-extern void mnt_drop_write_file_path(struct file *);
/*
* fs_struct.c
@@ -96,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
* file_table.c
*/
extern struct file *alloc_empty_file(int, const struct cred *);
+extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
/*
* super.c
@@ -136,13 +135,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);
extern int dentry_needs_remove_privs(struct dentry *dentry);
-extern bool __atime_needs_update(const struct path *, struct inode *, bool);
-static inline bool atime_needs_update_rcu(const struct path *path,
- struct inode *inode)
-{
- return __atime_needs_update(path, inode, true);
-}
-
/*
* fs-writeback.c
*/
@@ -185,7 +177,6 @@ extern const struct dentry_operations ns_dentry_operations;
*/
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/*
* iomap support:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index b445b13fc59b..3212c29235ce 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
out:
return error;
}
+EXPORT_SYMBOL(vfs_ioctl);
static int ioctl_fibmap(struct file *filp, int __user *p)
{
diff --git a/fs/locks.c b/fs/locks.c
index 5086bde5a18e..2ecb4db8c840 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,11 +139,6 @@
#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
#define IS_REMOTELCK(fl) (fl->fl_pid <= 0)
-static inline bool is_remote_lock(struct file *filp)
-{
- return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
-}
-
static bool lease_breaking(struct file_lock *fl)
{
return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -1651,8 +1646,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) &&
- (atomic_read(&d_real_inode(dentry)->i_writecount) > 0))
+ if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
return -EAGAIN;
if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
@@ -1873,7 +1867,7 @@ EXPORT_SYMBOL(generic_setlease);
int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
{
- if (filp->f_op->setlease && is_remote_lock(filp))
+ if (filp->f_op->setlease)
return filp->f_op->setlease(filp, arg, lease, priv);
else
return generic_setlease(filp, arg, lease, priv);
@@ -2020,7 +2014,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
if (error)
goto out_free;
- if (f.file->f_op->flock && is_remote_lock(f.file))
+ if (f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
(can_sleep) ? F_SETLKW : F_SETLK,
lock);
@@ -2046,7 +2040,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock && is_remote_lock(filp))
+ if (filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
return 0;
@@ -2196,7 +2190,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
- if (filp->f_op->lock && is_remote_lock(filp))
+ if (filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
return posix_lock_file(filp, fl, conf);
@@ -2518,7 +2512,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
if (list_empty(&flctx->flc_flock))
return;
- if (filp->f_op->flock && is_remote_lock(filp))
+ if (filp->f_op->flock)
filp->f_op->flock(filp, F_SETLKW, &fl);
else
flock_lock_inode(inode, &fl);
@@ -2605,7 +2599,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock && is_remote_lock(filp))
+ if (filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
diff --git a/fs/namei.c b/fs/namei.c
index 3cd396277cd3..ae6aa9ae757c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd)
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
- } else if (atime_needs_update_rcu(&last->link, inode)) {
+ } else if (atime_needs_update(&last->link, inode)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
diff --git a/fs/namespace.c b/fs/namespace.c
index bd2f4c68506a..725d6935fab9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -431,74 +431,20 @@ int __mnt_want_write_file(struct file *file)
}
/**
- * mnt_want_write_file_path - get write access to a file's mount
- * @file: the file who's mount on which to take a write
- *
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
- *
- * Called by the vfs for cases when we have an open file at hand, but will do an
- * inode operation on it (important distinction for files opened on overlayfs,
- * since the file operations will come from the real underlying file, while
- * inode operations come from the overlay).
- */
-int mnt_want_write_file_path(struct file *file)
-{
- int ret;
-
- sb_start_write(file->f_path.mnt->mnt_sb);
- ret = __mnt_want_write_file(file);
- if (ret)
- sb_end_write(file->f_path.mnt->mnt_sb);
- return ret;
-}
-
-static inline int may_write_real(struct file *file)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct dentry *upperdentry;
-
- /* Writable file? */
- if (file->f_mode & FMODE_WRITER)
- return 0;
-
- /* Not overlayfs? */
- if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
- return 0;
-
- /* File refers to upper, writable layer? */
- upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
- if (upperdentry &&
- (file_inode(file) == d_inode(upperdentry) ||
- file_inode(file) == d_inode(dentry)))
- return 0;
-
- /* Lower layer: can't write to real file, sorry... */
- return -EPERM;
-}
-
-/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
* This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
- *
- * Mostly called by filesystems from their ioctl operation before performing
- * modification. On overlayfs this needs to check if the file is on a read-only
- * lower layer and deny access in that case.
*/
int mnt_want_write_file(struct file *file)
{
int ret;
- ret = may_write_real(file);
- if (!ret) {
- sb_start_write(file_inode(file)->i_sb);
- ret = __mnt_want_write_file(file);
- if (ret)
- sb_end_write(file_inode(file)->i_sb);
- }
+ sb_start_write(file_inode(file)->i_sb);
+ ret = __mnt_want_write_file(file);
+ if (ret)
+ sb_end_write(file_inode(file)->i_sb);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_file);
@@ -538,14 +484,9 @@ void __mnt_drop_write_file(struct file *file)
__mnt_drop_write(file->f_path.mnt);
}
-void mnt_drop_write_file_path(struct file *file)
-{
- mnt_drop_write(file->f_path.mnt);
-}
-
void mnt_drop_write_file(struct file *file)
{
- __mnt_drop_write(file->f_path.mnt);
+ __mnt_drop_write_file(file);
sb_end_write(file_inode(file)->i_sb);
}
EXPORT_SYMBOL(mnt_drop_write_file);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 255f758af03a..9fa35cb6f6e0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in,
len, false);
}
-static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
- u64 loff,
- u64 len,
- struct file *dst_file,
- u64 dst_loff)
+static int ocfs2_file_dedupe_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len)
{
- int error;
-
- error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, true);
- if (error)
- return error;
- return len;
}
const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/open.c b/fs/open.c
index d98e19239bb7..0285ce7dbd51 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -68,7 +68,6 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
long vfs_truncate(const struct path *path, loff_t length)
{
struct inode *inode;
- struct dentry *upperdentry;
long error;
inode = path->dentry->d_inode;
@@ -91,17 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length)
if (IS_APPEND(inode))
goto mnt_drop_write_and_out;
- /*
- * If this is an overlayfs then do as if opening the file so we get
- * write access on the upper inode, not on the overlay inode. For
- * non-overlay filesystems d_real() is an identity function.
- */
- upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
- error = PTR_ERR(upperdentry);
- if (IS_ERR(upperdentry))
- goto mnt_drop_write_and_out;
-
- error = get_write_access(upperdentry->d_inode);
+ error = get_write_access(inode);
if (error)
goto mnt_drop_write_and_out;
@@ -120,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
error = do_truncate(path->dentry, length, 0, NULL);
put_write_and_out:
- put_write_access(upperdentry->d_inode);
+ put_write_access(inode);
mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
out:
@@ -707,12 +696,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
if (!f.file)
goto out;
- error = mnt_want_write_file_path(f.file);
+ error = mnt_want_write_file(f.file);
if (error)
goto out_fput;
audit_file(f.file);
error = chown_common(&f.file->f_path, user, group);
- mnt_drop_write_file_path(f.file);
+ mnt_drop_write_file(f.file);
out_fput:
fdput(f);
out:
@@ -887,13 +876,8 @@ EXPORT_SYMBOL(file_path);
*/
int vfs_open(const struct path *path, struct file *file)
{
- struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
-
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
file->f_path = *path;
- return do_dentry_open(file, d_backing_inode(dentry), NULL);
+ return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
}
struct file *dentry_open(const struct path *path, int flags,
@@ -919,6 +903,24 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
+struct file *open_with_fake_path(const struct path *path, int flags,
+ struct inode *inode, const struct cred *cred)
+{
+ struct file *f = alloc_empty_file_noaccount(flags, cred);
+ if (!IS_ERR(f)) {
+ int error;
+
+ f->f_path = *path;
+ error = do_dentry_open(f, inode, NULL);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ }
+ return f;
+}
+EXPORT_SYMBOL(open_with_fake_path);
+
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 9384164253ac..2ef91be2a04e 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -64,6 +64,7 @@ config OVERLAY_FS_NFS_EXPORT
bool "Overlayfs: turn on NFS export feature by default"
depends on OVERLAY_FS
depends on OVERLAY_FS_INDEX
+ depends on !OVERLAY_FS_METACOPY
help
If this config option is enabled then overlay filesystems will use
the index directory to decode overlay NFS file handles by default.
@@ -103,3 +104,21 @@ config OVERLAY_FS_XINO_AUTO
For more information, see Documentation/filesystems/overlayfs.txt
If unsure, say N.
+
+config OVERLAY_FS_METACOPY
+ bool "Overlayfs: turn on metadata only copy up feature by default"
+ depends on OVERLAY_FS
+ select OVERLAY_FS_REDIRECT_DIR
+ help
+ If this config option is enabled then overlay filesystems will
+ copy up only metadata where appropriate and data copy up will
+ happen when a file is opened for WRITE operation. It is still
+ possible to turn off this feature globally with the "metacopy=off"
+ module option or on a filesystem instance basis with the
+ "metacopy=off" mount option.
+
+ Note, that this feature is not backward compatible. That is,
+ mounting an overlay which has metacopy only inodes on a kernel
+ that doesn't support this feature will have unexpected results.
+
+ If unsure, say N.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 30802347a020..46e1ff8ac056 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,5 +4,5 @@
obj-$(CONFIG_OVERLAY_FS) += overlay.o
-overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
- export.o
+overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
+ copy_up.o export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index ddaddb4ce4c3..296037afecdb 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -25,35 +25,20 @@
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
-static bool __read_mostly ovl_check_copy_up;
-module_param_named(check_copy_up, ovl_check_copy_up, bool,
- S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(ovl_check_copy_up,
- "Warn on copy-up when causing process also has a R/O fd open");
-
-static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
+static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
{
- const struct dentry *dentry = data;
-
- if (file_inode(f) == d_inode(dentry))
- pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
- f, fd, current->pid, current->comm);
+ pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
return 0;
}
-/*
- * Check the fds open by this process and warn if something like the following
- * scenario is about to occur:
- *
- * fd1 = open("foo", O_RDONLY);
- * fd2 = open("foo", O_RDWR);
- */
-static void ovl_do_check_copy_up(struct dentry *dentry)
+static int ovl_ccup_get(char *buf, const struct kernel_param *param)
{
- if (ovl_check_copy_up)
- iterate_fd(current->files, 0, ovl_check_fd, dentry);
+ return sprintf(buf, "N\n");
}
+module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
+MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing");
+
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
@@ -195,6 +180,16 @@ out_fput:
return error;
}
+static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
+{
+ struct iattr attr = {
+ .ia_valid = ATTR_SIZE,
+ .ia_size = stat->size,
+ };
+
+ return notify_change(upperdentry, &attr, NULL);
+}
+
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
@@ -403,6 +398,7 @@ struct ovl_copy_up_ctx {
bool tmpfile;
bool origin;
bool indexed;
+ bool metacopy;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -505,28 +501,10 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
int err;
- if (S_ISREG(c->stat.mode)) {
- struct path upperpath;
-
- ovl_path_upper(c->dentry, &upperpath);
- BUG_ON(upperpath.dentry != NULL);
- upperpath.dentry = temp;
-
- err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
- if (err)
- return err;
- }
-
err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
return err;
- inode_lock(temp->d_inode);
- err = ovl_set_attr(temp, &c->stat);
- inode_unlock(temp->d_inode);
- if (err)
- return err;
-
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
@@ -540,7 +518,34 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
return err;
}
- return 0;
+ if (S_ISREG(c->stat.mode) && !c->metacopy) {
+ struct path upperpath, datapath;
+
+ ovl_path_upper(c->dentry, &upperpath);
+ BUG_ON(upperpath.dentry != NULL);
+ upperpath.dentry = temp;
+
+ ovl_path_lowerdata(c->dentry, &datapath);
+ err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ if (err)
+ return err;
+ }
+
+ if (c->metacopy) {
+ err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
+ NULL, 0, -EOPNOTSUPP);
+ if (err)
+ return err;
+ }
+
+ inode_lock(temp->d_inode);
+ if (c->metacopy)
+ err = ovl_set_size(temp, &c->stat);
+ if (!err)
+ err = ovl_set_attr(temp, &c->stat);
+ inode_unlock(temp->d_inode);
+
+ return err;
}
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
@@ -575,6 +580,8 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
if (err)
goto out;
+ if (!c->metacopy)
+ ovl_set_upperdata(d_inode(c->dentry));
inode = d_inode(c->dentry);
ovl_inode_update(inode, newdentry);
if (S_ISDIR(inode->i_mode))
@@ -677,6 +684,49 @@ out:
return err;
}
+static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
+ int flags)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+ if (!ofs->config.metacopy)
+ return false;
+
+ if (!S_ISREG(mode))
+ return false;
+
+ if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
+ return false;
+
+ return true;
+}
+
+/* Copy up data of an inode which was copied up metadata only in the past. */
+static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
+{
+ struct path upperpath, datapath;
+ int err;
+
+ ovl_path_upper(c->dentry, &upperpath);
+ if (WARN_ON(upperpath.dentry == NULL))
+ return -EIO;
+
+ ovl_path_lowerdata(c->dentry, &datapath);
+ if (WARN_ON(datapath.dentry == NULL))
+ return -EIO;
+
+ err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ if (err)
+ return err;
+
+ err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
+ if (err)
+ return err;
+
+ ovl_set_upperdata(d_inode(c->dentry));
+ return err;
+}
+
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
int flags)
{
@@ -698,6 +748,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (err)
return err;
+ ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
+
if (parent) {
ovl_path_upper(parent, &parentpath);
ctx.destdir = parentpath.dentry;
@@ -719,9 +771,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (IS_ERR(ctx.link))
return PTR_ERR(ctx.link);
}
- ovl_do_check_copy_up(ctx.lowerpath.dentry);
- err = ovl_copy_up_start(dentry);
+ err = ovl_copy_up_start(dentry, flags);
/* err < 0: interrupted, err > 0: raced with another copy-up */
if (unlikely(err)) {
if (err > 0)
@@ -731,6 +782,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
err = ovl_do_copy_up(&ctx);
if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
err = ovl_link_up(&ctx);
+ if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+ err = ovl_copy_up_meta_inode_data(&ctx);
ovl_copy_up_end(dentry);
}
do_delayed_call(&done);
@@ -756,21 +809,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
struct dentry *next;
struct dentry *parent = NULL;
- /*
- * Check if copy-up has happened as well as for upper alias (in
- * case of hard links) is there.
- *
- * Both checks are lockless:
- * - false negatives: will recheck under oi->lock
- * - false positives:
- * + ovl_dentry_upper() uses memory barriers to ensure the
- * upper dentry is up-to-date
- * + ovl_dentry_has_upper_alias() relies on locking of
- * upper parent i_rwsem to prevent reordering copy-up
- * with rename.
- */
- if (ovl_dentry_upper(dentry) &&
- (ovl_dentry_has_upper_alias(dentry) || disconnected))
+ if (ovl_already_copied_up(dentry, flags))
break;
next = dget(dentry);
@@ -795,6 +834,41 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
return err;
}
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
+{
+ /* Copy up of disconnected dentry does not set upper alias */
+ if (ovl_already_copied_up(dentry, flags))
+ return false;
+
+ if (special_file(d_inode(dentry)->i_mode))
+ return false;
+
+ if (!ovl_open_flags_need_copy_up(flags))
+ return false;
+
+ return true;
+}
+
+int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
+{
+ int err = 0;
+
+ if (ovl_open_need_copy_up(dentry, file_flags)) {
+ err = ovl_want_write(dentry);
+ if (!err) {
+ err = ovl_copy_up_flags(dentry, file_flags);
+ ovl_drop_write(dentry);
+ }
+ }
+
+ return err;
+}
+
+int ovl_copy_up_with_data(struct dentry *dentry)
+{
+ return ovl_copy_up_flags(dentry, O_WRONLY);
+}
+
int ovl_copy_up(struct dentry *dentry)
{
return ovl_copy_up_flags(dentry, 0);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f480b1a2cd2e..ec350d4d921c 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,6 +24,8 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
+static int ovl_set_redirect(struct dentry *dentry, bool samedir);
+
int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
@@ -242,7 +244,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
.newinode = inode,
};
- ovl_dentry_version_inc(dentry->d_parent, false);
+ ovl_dir_modified(dentry->d_parent, false);
ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
/*
@@ -657,6 +659,12 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
+ if (ovl_is_metacopy_dentry(old)) {
+ err = ovl_set_redirect(old, false);
+ if (err)
+ goto out_drop_write;
+ }
+
err = ovl_nlink_start(old, &locked);
if (err)
goto out_drop_write;
@@ -722,7 +730,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
if (err)
goto out_d_drop;
- ovl_dentry_version_inc(dentry->d_parent, true);
+ ovl_dir_modified(dentry->d_parent, true);
out_d_drop:
d_drop(dentry);
out_dput_upper:
@@ -767,7 +775,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
err = vfs_rmdir(dir, upper);
else
err = vfs_unlink(dir, upper, NULL);
- ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
+ ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
/*
* Keeping this dentry hashed would mean having to release
@@ -797,6 +805,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
int err;
bool locked = false;
const struct cred *old_cred;
+ struct dentry *upperdentry;
bool lower_positive = ovl_lower_positive(dentry);
LIST_HEAD(list);
@@ -832,6 +841,17 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
drop_nlink(dentry->d_inode);
}
ovl_nlink_end(dentry, locked);
+
+ /*
+ * Copy ctime
+ *
+ * Note: we fail to update ctime if there was no copy-up, only a
+ * whiteout
+ */
+ upperdentry = ovl_dentry_upper(dentry);
+ if (upperdentry)
+ ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
+
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -862,13 +882,13 @@ static bool ovl_can_move(struct dentry *dentry)
!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
}
-static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
+static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
{
char *buf, *ret;
struct dentry *d, *tmp;
int buflen = ovl_redirect_max + 1;
- if (samedir) {
+ if (!abs_redirect) {
ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
GFP_KERNEL);
goto out;
@@ -922,15 +942,43 @@ out:
return ret ? ret : ERR_PTR(-ENOMEM);
}
+static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
+{
+ struct dentry *lowerdentry;
+
+ if (!samedir)
+ return true;
+
+ if (d_is_dir(dentry))
+ return false;
+
+ /*
+ * For non-dir hardlinked files, we need absolute redirects
+ * in general as two upper hardlinks could be in different
+ * dirs. We could put a relative redirect now and convert
+ * it to absolute redirect later. But when nlink > 1 and
+ * indexing is on, that means relative redirect needs to be
+ * converted to absolute during copy up of another lower
+ * hardllink as well.
+ *
+ * So without optimizing too much, just check if lower is
+ * a hard link or not. If lower is hard link, put absolute
+ * redirect.
+ */
+ lowerdentry = ovl_dentry_lower(dentry);
+ return (d_inode(lowerdentry)->i_nlink > 1);
+}
+
static int ovl_set_redirect(struct dentry *dentry, bool samedir)
{
int err;
const char *redirect = ovl_dentry_get_redirect(dentry);
+ bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
- if (redirect && (samedir || redirect[0] == '/'))
+ if (redirect && (!absolute_redirect || redirect[0] == '/'))
return 0;
- redirect = ovl_get_redirect(dentry, samedir);
+ redirect = ovl_get_redirect(dentry, absolute_redirect);
if (IS_ERR(redirect))
return PTR_ERR(redirect);
@@ -1106,22 +1154,20 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_dput;
err = 0;
- if (is_dir) {
- if (ovl_type_merge_or_lower(old))
- err = ovl_set_redirect(old, samedir);
- else if (!old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
- if (err)
- goto out_dput;
- }
- if (!overwrite && new_is_dir) {
- if (ovl_type_merge_or_lower(new))
- err = ovl_set_redirect(new, samedir);
- else if (!new_opaque && ovl_type_merge(old->d_parent))
- err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
- if (err)
- goto out_dput;
- }
+ if (ovl_type_merge_or_lower(old))
+ err = ovl_set_redirect(old, samedir);
+ else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
+ err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
+ if (err)
+ goto out_dput;
+
+ if (!overwrite && ovl_type_merge_or_lower(new))
+ err = ovl_set_redirect(new, samedir);
+ else if (!overwrite && new_is_dir && !new_opaque &&
+ ovl_type_merge(old->d_parent))
+ err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
+ if (err)
+ goto out_dput;
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
new_upperdir->d_inode, newdentry, flags);
@@ -1138,10 +1184,15 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
drop_nlink(d_inode(new));
}
- ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
- (!overwrite && ovl_type_origin(new)));
- ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
- (d_inode(new) && ovl_type_origin(new)));
+ ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
+ (!overwrite && ovl_type_origin(new)));
+ ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
+ (d_inode(new) && ovl_type_origin(new)));
+
+ /* copy ctime: */
+ ovl_copyattr(d_inode(olddentry), d_inode(old));
+ if (d_inode(new) && ovl_dentry_upper(new))
+ ovl_copyattr(d_inode(newdentry), d_inode(new));
out_dput:
dput(newdentry);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 9941ece61a14..8fa37cd7818a 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -317,6 +317,9 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
return ERR_CAST(inode);
}
+ if (upper)
+ ovl_set_flag(OVL_UPPERDATA, inode);
+
dentry = d_find_any_alias(inode);
if (!dentry) {
dentry = d_alloc_anon(inode->i_sb);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
new file mode 100644
index 000000000000..32e9282893c9
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/xattr.h>
+#include <linux/uio.h>
+#include "overlayfs.h"
+
+static char ovl_whatisit(struct inode *inode, struct inode *realinode)
+{
+ if (realinode != ovl_inode_upper(inode))
+ return 'l';
+ if (ovl_has_upperdata(inode))
+ return 'u';
+ else
+ return 'm';
+}
+
+static struct file *ovl_open_realfile(const struct file *file,
+ struct inode *realinode)
+{
+ struct inode *inode = file_inode(file);
+ struct file *realfile;
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
+ realinode, current_cred());
+ revert_creds(old_cred);
+
+ pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
+ file, file, ovl_whatisit(inode, realinode), file->f_flags,
+ realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
+
+ return realfile;
+}
+
+#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
+
+static int ovl_change_flags(struct file *file, unsigned int flags)
+{
+ struct inode *inode = file_inode(file);
+ int err;
+
+ /* No atime modificaton on underlying */
+ flags |= O_NOATIME;
+
+ /* If some flag changed that cannot be changed then something's amiss */
+ if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
+ return -EIO;
+
+ flags &= OVL_SETFL_MASK;
+
+ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
+ return -EPERM;
+
+ if (flags & O_DIRECT) {
+ if (!file->f_mapping->a_ops ||
+ !file->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
+
+ if (file->f_op->check_flags) {
+ err = file->f_op->check_flags(flags);
+ if (err)
+ return err;
+ }
+
+ spin_lock(&file->f_lock);
+ file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
+ spin_unlock(&file->f_lock);
+
+ return 0;
+}
+
+static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
+ bool allow_meta)
+{
+ struct inode *inode = file_inode(file);
+ struct inode *realinode;
+
+ real->flags = 0;
+ real->file = file->private_data;
+
+ if (allow_meta)
+ realinode = ovl_inode_real(inode);
+ else
+ realinode = ovl_inode_realdata(inode);
+
+ /* Has it been copied up since we'd opened it? */
+ if (unlikely(file_inode(real->file) != realinode)) {
+ real->flags = FDPUT_FPUT;
+ real->file = ovl_open_realfile(file, realinode);
+
+ return PTR_ERR_OR_ZERO(real->file);
+ }
+
+ /* Did the flags change since open? */
+ if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
+ return ovl_change_flags(real->file, file->f_flags);
+
+ return 0;
+}
+
+static int ovl_real_fdget(const struct file *file, struct fd *real)
+{
+ return ovl_real_fdget_meta(file, real, false);
+}
+
+static int ovl_open(struct inode *inode, struct file *file)
+{
+ struct dentry *dentry = file_dentry(file);
+ struct file *realfile;
+ int err;
+
+ err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+ if (err)
+ return err;
+
+ /* No longer need these flags, so don't pass them on to underlying fs */
+ file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+ realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
+
+ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
+ file->f_mapping = realfile->f_mapping;
+
+ file->private_data = realfile;
+
+ return 0;
+}
+
+static int ovl_release(struct inode *inode, struct file *file)
+{
+ fput(file->private_data);
+
+ return 0;
+}
+
+static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *realinode = ovl_inode_real(file_inode(file));
+
+ return generic_file_llseek_size(file, offset, whence,
+ realinode->i_sb->s_maxbytes,
+ i_size_read(realinode));
+}
+
+static void ovl_file_accessed(struct file *file)
+{
+ struct inode *inode, *upperinode;
+
+ if (file->f_flags & O_NOATIME)
+ return;
+
+ inode = file_inode(file);
+ upperinode = ovl_inode_upper(inode);
+
+ if (!upperinode)
+ return;
+
+ if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
+ !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+ inode->i_mtime = upperinode->i_mtime;
+ inode->i_ctime = upperinode->i_ctime;
+ }
+
+ touch_atime(&file->f_path);
+}
+
+static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+{
+ int ifl = iocb->ki_flags;
+ rwf_t flags = 0;
+
+ if (ifl & IOCB_NOWAIT)
+ flags |= RWF_NOWAIT;
+ if (ifl & IOCB_HIPRI)
+ flags |= RWF_HIPRI;
+ if (ifl & IOCB_DSYNC)
+ flags |= RWF_DSYNC;
+ if (ifl & IOCB_SYNC)
+ flags |= RWF_SYNC;
+
+ return flags;
+}
+
+static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ revert_creds(old_cred);
+
+ ovl_file_accessed(file);
+
+ fdput(real);
+
+ return ret;
+}
+
+static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ goto out_unlock;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ revert_creds(old_cred);
+
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+
+ fdput(real);
+
+out_unlock:
+ inode_unlock(inode);
+
+ return ret;
+}
+
+static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ int ret;
+
+ ret = ovl_real_fdget_meta(file, &real, !datasync);
+ if (ret)
+ return ret;
+
+ /* Don't sync lower file for fear of receiving EROFS error */
+ if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_fsync_range(real.file, start, end, datasync);
+ revert_creds(old_cred);
+ }
+
+ fdput(real);
+
+ return ret;
+}
+
+static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct file *realfile = file->private_data;
+ const struct cred *old_cred;
+ int ret;
+
+ if (!realfile->f_op->mmap)
+ return -ENODEV;
+
+ if (WARN_ON(file != vma->vm_file))
+ return -EIO;
+
+ vma->vm_file = get_file(realfile);
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = call_mmap(vma->vm_file, vma);
+ revert_creds(old_cred);
+
+ if (ret) {
+ /* Drop reference count from new vm_file value */
+ fput(realfile);
+ } else {
+ /* Drop reference count from previous vm_file value */
+ fput(file);
+ }
+
+ ovl_file_accessed(file);
+
+ return ret;
+}
+
+static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ struct fd real;
+ const struct cred *old_cred;
+ int ret;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_fallocate(real.file, mode, offset, len);
+ revert_creds(old_cred);
+
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+
+ fdput(real);
+
+ return ret;
+}
+
+static long ovl_real_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ long ret;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_ioctl(real.file, cmd, arg);
+ revert_creds(old_cred);
+
+ fdput(real);
+
+ return ret;
+}
+
+static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long ret;
+ struct inode *inode = file_inode(file);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ ret = ovl_real_ioctl(file, cmd, arg);
+ break;
+
+ case FS_IOC_SETFLAGS:
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ ret = ovl_copy_up_with_data(file_dentry(file));
+ if (!ret) {
+ ret = ovl_real_ioctl(file, cmd, arg);
+
+ inode_lock(inode);
+ ovl_copyflags(ovl_inode_real(inode), inode);
+ inode_unlock(inode);
+ }
+
+ mnt_drop_write_file(file);
+ break;
+
+ default:
+ ret = -ENOTTY;
+ }
+
+ return ret;
+}
+
+static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
+ break;
+
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
+ break;
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return ovl_ioctl(file, cmd, arg);
+}
+
+enum ovl_copyop {
+ OVL_COPY,
+ OVL_CLONE,
+ OVL_DEDUPE,
+};
+
+static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ u64 len, unsigned int flags, enum ovl_copyop op)
+{
+ struct inode *inode_out = file_inode(file_out);
+ struct fd real_in, real_out;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ ret = ovl_real_fdget(file_out, &real_out);
+ if (ret)
+ return ret;
+
+ ret = ovl_real_fdget(file_in, &real_in);
+ if (ret) {
+ fdput(real_out);
+ return ret;
+ }
+
+ old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
+ switch (op) {
+ case OVL_COPY:
+ ret = vfs_copy_file_range(real_in.file, pos_in,
+ real_out.file, pos_out, len, flags);
+ break;
+
+ case OVL_CLONE:
+ ret = vfs_clone_file_range(real_in.file, pos_in,
+ real_out.file, pos_out, len);
+ break;
+
+ case OVL_DEDUPE:
+ ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
+ real_out.file, pos_out, len);
+ break;
+ }
+ revert_creds(old_cred);
+
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode_out), inode_out);
+
+ fdput(real_in);
+ fdput(real_out);
+
+ return ret;
+}
+
+static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
+ OVL_COPY);
+}
+
+static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
+{
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
+ OVL_CLONE);
+}
+
+static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
+{
+ /*
+ * Don't copy up because of a dedupe request, this wouldn't make sense
+ * most of the time (data would be duplicated instead of deduplicated).
+ */
+ if (!ovl_inode_upper(file_inode(file_in)) ||
+ !ovl_inode_upper(file_inode(file_out)))
+ return -EPERM;
+
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
+ OVL_DEDUPE);
+}
+
+const struct file_operations ovl_file_operations = {
+ .open = ovl_open,
+ .release = ovl_release,
+ .llseek = ovl_llseek,
+ .read_iter = ovl_read_iter,
+ .write_iter = ovl_write_iter,
+ .fsync = ovl_fsync,
+ .mmap = ovl_mmap,
+ .fallocate = ovl_fallocate,
+ .unlocked_ioctl = ovl_ioctl,
+ .compat_ioctl = ovl_compat_ioctl,
+
+ .copy_file_range = ovl_copy_file_range,
+ .clone_file_range = ovl_clone_file_range,
+ .dedupe_file_range = ovl_dedupe_file_range,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ed16a898caeb..e0bb217c01e2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -19,18 +19,10 @@
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
int err;
+ bool full_copy_up = false;
struct dentry *upperdentry;
const struct cred *old_cred;
- /*
- * Check for permissions before trying to copy-up. This is redundant
- * since it will be rechecked later by ->setattr() on upper dentry. But
- * without this, copy-up can be triggered by just about anybody.
- *
- * We don't initialize inode->size, which just means that
- * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
- * check for a swapfile (which this won't be anyway).
- */
err = setattr_prepare(dentry, attr);
if (err)
return err;
@@ -39,10 +31,33 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
goto out;
- err = ovl_copy_up(dentry);
+ if (attr->ia_valid & ATTR_SIZE) {
+ struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+ err = -ETXTBSY;
+ if (atomic_read(&realinode->i_writecount) < 0)
+ goto out_drop_write;
+
+ /* Truncate should trigger data copy up as well */
+ full_copy_up = true;
+ }
+
+ if (!full_copy_up)
+ err = ovl_copy_up(dentry);
+ else
+ err = ovl_copy_up_with_data(dentry);
if (!err) {
+ struct inode *winode = NULL;
+
upperdentry = ovl_dentry_upper(dentry);
+ if (attr->ia_valid & ATTR_SIZE) {
+ winode = d_inode(upperdentry);
+ err = get_write_access(winode);
+ if (err)
+ goto out_drop_write;
+ }
+
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
attr->ia_valid &= ~ATTR_MODE;
@@ -53,7 +68,11 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (!err)
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
inode_unlock(upperdentry->d_inode);
+
+ if (winode)
+ put_write_access(winode);
}
+out_drop_write:
ovl_drop_write(dentry);
out:
return err;
@@ -133,6 +152,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
bool samefs = ovl_same_sb(dentry->d_sb);
struct ovl_layer *lower_layer = NULL;
int err;
+ bool metacopy_blocks = false;
+
+ metacopy_blocks = ovl_is_metacopy_dentry(dentry);
type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
@@ -154,7 +176,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
lower_layer = ovl_layer_lower(dentry);
} else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
- u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
+ u32 lowermask = STATX_INO | STATX_BLOCKS |
+ (!is_dir ? STATX_NLINK : 0);
ovl_path_lower(dentry, &realpath);
err = vfs_getattr(&realpath, &lowerstat,
@@ -183,6 +206,35 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
stat->ino = lowerstat.ino;
lower_layer = ovl_layer_lower(dentry);
}
+
+ /*
+ * If we are querying a metacopy dentry and lower
+ * dentry is data dentry, then use the blocks we
+ * queried just now. We don't have to do additional
+ * vfs_getattr(). If lower itself is metacopy, then
+ * additional vfs_getattr() is unavoidable.
+ */
+ if (metacopy_blocks &&
+ realpath.dentry == ovl_dentry_lowerdata(dentry)) {
+ stat->blocks = lowerstat.blocks;
+ metacopy_blocks = false;
+ }
+ }
+
+ if (metacopy_blocks) {
+ /*
+ * If lower is not same as lowerdata or if there was
+ * no origin on upper, we can end up here.
+ */
+ struct kstat lowerdatastat;
+ u32 lowermask = STATX_BLOCKS;
+
+ ovl_path_lowerdata(dentry, &realpath);
+ err = vfs_getattr(&realpath, &lowerdatastat,
+ lowermask, flags);
+ if (err)
+ goto out;
+ stat->blocks = lowerdatastat.blocks;
}
}
@@ -304,6 +356,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
}
revert_creds(old_cred);
+ /* copy c/mtime */
+ ovl_copyattr(d_inode(realdentry), inode);
+
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -384,38 +439,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
return acl;
}
-static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
-{
- /* Copy up of disconnected dentry does not set upper alias */
- if (ovl_dentry_upper(dentry) &&
- (ovl_dentry_has_upper_alias(dentry) ||
- (dentry->d_flags & DCACHE_DISCONNECTED)))
- return false;
-
- if (special_file(d_inode(dentry)->i_mode))
- return false;
-
- if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
- return false;
-
- return true;
-}
-
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
-{
- int err = 0;
-
- if (ovl_open_need_copy_up(dentry, file_flags)) {
- err = ovl_want_write(dentry);
- if (!err) {
- err = ovl_copy_up_flags(dentry, file_flags);
- ovl_drop_write(dentry);
- }
- }
-
- return err;
-}
-
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
{
if (flags & S_ATIME) {
@@ -433,6 +456,23 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
return 0;
}
+static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ int err;
+ struct inode *realinode = ovl_inode_real(inode);
+ const struct cred *old_cred;
+
+ if (!realinode->i_op->fiemap)
+ return -EOPNOTSUPP;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
+ revert_creds(old_cred);
+
+ return err;
+}
+
static const struct inode_operations ovl_file_inode_operations = {
.setattr = ovl_setattr,
.permission = ovl_permission,
@@ -440,6 +480,7 @@ static const struct inode_operations ovl_file_inode_operations = {
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
+ .fiemap = ovl_fiemap,
};
static const struct inode_operations ovl_symlink_inode_operations = {
@@ -450,6 +491,15 @@ static const struct inode_operations ovl_symlink_inode_operations = {
.update_time = ovl_update_time,
};
+static const struct inode_operations ovl_special_inode_operations = {
+ .setattr = ovl_setattr,
+ .permission = ovl_permission,
+ .getattr = ovl_getattr,
+ .listxattr = ovl_listxattr,
+ .get_acl = ovl_get_acl,
+ .update_time = ovl_update_time,
+};
+
/*
* It is possible to stack overlayfs instance on top of another
* overlayfs instance as lower layer. We need to annonate the
@@ -520,6 +570,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
switch (mode & S_IFMT) {
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
+ inode->i_fop = &ovl_file_operations;
break;
case S_IFDIR:
@@ -532,7 +583,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
break;
default:
- inode->i_op = &ovl_file_inode_operations;
+ inode->i_op = &ovl_special_inode_operations;
init_special_inode(inode, mode, rdev);
break;
}
@@ -769,8 +820,9 @@ struct inode *ovl_get_inode(struct super_block *sb,
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
oip->index);
int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
- bool is_dir;
+ bool is_dir, metacopy = false;
unsigned long ino = 0;
+ int err = -ENOMEM;
if (!realinode)
realinode = d_inode(lowerdentry);
@@ -787,7 +839,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
inode = ovl_iget5(sb, oip->newinode, key);
if (!inode)
- goto out_nomem;
+ goto out_err;
if (!(inode->i_state & I_NEW)) {
/*
* Verify that the underlying files stored in the inode
@@ -796,11 +848,12 @@ struct inode *ovl_get_inode(struct super_block *sb,
if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
true)) {
iput(inode);
- inode = ERR_PTR(-ESTALE);
- goto out;
+ err = -ESTALE;
+ goto out_err;
}
dput(upperdentry);
+ kfree(oip->redirect);
goto out;
}
@@ -812,11 +865,13 @@ struct inode *ovl_get_inode(struct super_block *sb,
} else {
/* Lower hardlink that will be broken on copy up */
inode = new_inode(sb);
- if (!inode)
- goto out_nomem;
+ if (!inode) {
+ err = -ENOMEM;
+ goto out_err;
+ }
}
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
- ovl_inode_init(inode, upperdentry, lowerdentry);
+ ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
@@ -824,6 +879,20 @@ struct inode *ovl_get_inode(struct super_block *sb,
if (oip->index)
ovl_set_flag(OVL_INDEX, inode);
+ if (upperdentry) {
+ err = ovl_check_metacopy_xattr(upperdentry);
+ if (err < 0)
+ goto out_err;
+ metacopy = err;
+ if (!metacopy)
+ ovl_set_flag(OVL_UPPERDATA, inode);
+ }
+
+ OVL_I(inode)->redirect = oip->redirect;
+
+ if (bylower)
+ ovl_set_flag(OVL_CONST_INO, inode);
+
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
@@ -837,7 +906,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
out:
return inode;
-out_nomem:
- inode = ERR_PTR(-ENOMEM);
+out_err:
+ inode = ERR_PTR(err);
goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c993dd8db739..f28711846dd6 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -24,38 +24,20 @@ struct ovl_lookup_data {
bool stop;
bool last;
char *redirect;
+ bool metacopy;
};
static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
- char *s, *next, *buf = NULL;
+ char *buf;
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
- if (res < 0) {
- if (res == -ENODATA || res == -EOPNOTSUPP)
- return 0;
- goto fail;
- }
- buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
+ buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
+ if (IS_ERR_OR_NULL(buf))
+ return PTR_ERR(buf);
- if (res == 0)
- goto invalid;
-
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
- if (res < 0)
- goto fail;
- if (res == 0)
- goto invalid;
if (buf[0] == '/') {
- for (s = buf; *s++ == '/'; s = next) {
- next = strchrnul(s, '/');
- if (s == next)
- goto invalid;
- }
/*
* One of the ancestor path elements in an absolute path
* lookup in ovl_lookup_layer() could have been opaque and
@@ -66,9 +48,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
*/
d->stop = false;
} else {
- if (strchr(buf, '/') != NULL)
- goto invalid;
-
+ res = strlen(buf) + 1;
memmove(buf + prelen, buf, res);
memcpy(buf, d->name.name, prelen);
}
@@ -80,16 +60,6 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
d->name.len = strlen(d->redirect);
return 0;
-
-err_free:
- kfree(buf);
- return 0;
-fail:
- pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
- goto err_free;
-invalid:
- pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
- goto err_free;
}
static int ovl_acceptable(void *ctx, struct dentry *dentry)
@@ -252,28 +222,39 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
d->stop = d->opaque = true;
goto put_and_out;
}
- if (!d_can_lookup(this)) {
+ /*
+ * This dentry should be a regular file if previous layer lookup
+ * found a metacopy dentry.
+ */
+ if (last_element && d->metacopy && !d_is_reg(this)) {
d->stop = true;
- if (d->is_dir)
- goto put_and_out;
-
- /*
- * NB: handle failure to lookup non-last element when non-dir
- * redirects become possible
- */
- WARN_ON(!last_element);
- goto out;
+ goto put_and_out;
}
- if (last_element)
- d->is_dir = true;
- if (d->last)
- goto out;
+ if (!d_can_lookup(this)) {
+ if (d->is_dir || !last_element) {
+ d->stop = true;
+ goto put_and_out;
+ }
+ err = ovl_check_metacopy_xattr(this);
+ if (err < 0)
+ goto out_err;
- if (ovl_is_opaquedir(this)) {
- d->stop = true;
+ d->metacopy = err;
+ d->stop = !d->metacopy;
+ if (!d->metacopy || d->last)
+ goto out;
+ } else {
if (last_element)
- d->opaque = true;
- goto out;
+ d->is_dir = true;
+ if (d->last)
+ goto out;
+
+ if (ovl_is_opaquedir(this)) {
+ d->stop = true;
+ if (last_element)
+ d->opaque = true;
+ goto out;
+ }
}
err = ovl_check_redirect(this, d, prelen, post);
if (err)
@@ -823,7 +804,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
- struct ovl_path *stack = NULL;
+ struct ovl_path *stack = NULL, *origin_path = NULL;
struct dentry *upperdir, *upperdentry = NULL;
struct dentry *origin = NULL;
struct dentry *index = NULL;
@@ -834,6 +815,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct dentry *this;
unsigned int i;
int err;
+ bool metacopy = false;
struct ovl_lookup_data d = {
.name = dentry->d_name,
.is_dir = false,
@@ -841,6 +823,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
.stop = false,
.last = ofs->config.redirect_follow ? false : !poe->numlower,
.redirect = NULL,
+ .metacopy = false,
};
if (dentry->d_name.len > ofs->namelen)
@@ -859,7 +842,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
if (upperdentry && !d.is_dir) {
- BUG_ON(!d.stop || d.redirect);
+ unsigned int origin_ctr = 0;
+
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
@@ -870,9 +854,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
+ err = ovl_check_origin(ofs, upperdentry, &origin_path,
+ &origin_ctr);
if (err)
goto out_put_upper;
+
+ if (d.metacopy)
+ metacopy = true;
}
if (d.redirect) {
@@ -913,7 +901,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* If no origin fh is stored in upper of a merge dir, store fh
* of lower dir and set upper parent "impure".
*/
- if (upperdentry && !ctr && !ofs->noxattr) {
+ if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
err = ovl_fix_origin(dentry, this, upperdentry);
if (err) {
dput(this);
@@ -925,18 +913,35 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* When "verify_lower" feature is enabled, do not merge with a
* lower dir that does not match a stored origin xattr. In any
* case, only verified origin is used for index lookup.
+ *
+ * For non-dir dentry, if index=on, then ensure origin
+ * matches the dentry found using path based lookup,
+ * otherwise error out.
*/
- if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
+ if (upperdentry && !ctr &&
+ ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
+ (!d.is_dir && ofs->config.index && origin_path))) {
err = ovl_verify_origin(upperdentry, this, false);
if (err) {
dput(this);
- break;
+ if (d.is_dir)
+ break;
+ goto out_put;
}
-
- /* Bless lower dir as verified origin */
origin = this;
}
+ if (d.metacopy)
+ metacopy = true;
+ /*
+ * Do not store intermediate metacopy dentries in chain,
+ * except top most lower metacopy dentry
+ */
+ if (d.metacopy && ctr) {
+ dput(this);
+ continue;
+ }
+
stack[ctr].dentry = this;
stack[ctr].layer = lower.layer;
ctr++;
@@ -968,13 +973,48 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
+ if (metacopy) {
+ /*
+ * Found a metacopy dentry but did not find corresponding
+ * data dentry
+ */
+ if (d.metacopy) {
+ err = -EIO;
+ goto out_put;
+ }
+
+ err = -EPERM;
+ if (!ofs->config.metacopy) {
+ pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
+ dentry);
+ goto out_put;
+ }
+ } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
+ if (WARN_ON(stack != NULL)) {
+ err = -EIO;
+ goto out_put;
+ }
+ stack = origin_path;
+ ctr = 1;
+ origin_path = NULL;
+ }
+
/*
* Lookup index by lower inode and verify it matches upper inode.
* We only trust dir index if we verified that lower dir matches
* origin, otherwise dir index entries may be inconsistent and we
- * ignore them. Always lookup index of non-dir and non-upper.
+ * ignore them.
+ *
+ * For non-dir upper metacopy dentry, we already set "origin" if we
+ * verified that lower matched upper origin. If upper origin was
+ * not present (because lower layer did not support fh encode/decode),
+ * or indexing is not enabled, do not set "origin" and skip looking up
+ * index. This case should be handled in same way as a non-dir upper
+ * without ORIGIN is handled.
+ *
+ * Always lookup index of non-dir non-metacopy and non-upper.
*/
- if (ctr && (!upperdentry || !d.is_dir))
+ if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
origin = stack[0].dentry;
if (origin && ovl_indexdir(dentry->d_sb) &&
@@ -1000,8 +1040,15 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
- else if (index)
+ else if (index) {
upperdentry = dget(index);
+ upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
+ if (IS_ERR(upperredirect)) {
+ err = PTR_ERR(upperredirect);
+ upperredirect = NULL;
+ goto out_free_oe;
+ }
+ }
if (upperdentry || ctr) {
struct ovl_inode_params oip = {
@@ -1009,22 +1056,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
.lowerpath = stack,
.index = index,
.numlower = ctr,
+ .redirect = upperredirect,
+ .lowerdata = (ctr > 1 && !d.is_dir) ?
+ stack[ctr - 1].dentry : NULL,
};
inode = ovl_get_inode(dentry->d_sb, &oip);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
-
- /*
- * NB: handle redirected hard links when non-dir redirects
- * become possible
- */
- WARN_ON(OVL_I(inode)->redirect);
- OVL_I(inode)->redirect = upperredirect;
}
revert_creds(old_cred);
+ if (origin_path) {
+ dput(origin_path->dentry);
+ kfree(origin_path);
+ }
dput(index);
kfree(stack);
kfree(d.redirect);
@@ -1039,6 +1086,10 @@ out_put:
dput(stack[i].dentry);
kfree(stack);
out_put_upper:
+ if (origin_path) {
+ dput(origin_path->dentry);
+ kfree(origin_path);
+ }
dput(upperdentry);
kfree(upperredirect);
out:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7538b9b56237..f61839e1054c 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include <linux/fs.h>
#include "ovl_entry.h"
enum ovl_path_type {
@@ -28,6 +29,7 @@ enum ovl_path_type {
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
+#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
@@ -35,6 +37,9 @@ enum ovl_inode_flag {
/* Non-merge dir that may contain whiteout entries */
OVL_WHITEOUTS,
OVL_INDEX,
+ OVL_UPPERDATA,
+ /* Inode number will remain constant over copy up. */
+ OVL_CONST_INO,
};
enum ovl_entry_flag {
@@ -190,6 +195,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
return ret;
}
+static inline bool ovl_open_flags_need_copy_up(int flags)
+{
+ if (!flags)
+ return false;
+
+ return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
+}
+
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
@@ -206,15 +219,19 @@ bool ovl_dentry_weird(struct dentry *dentry);
enum ovl_path_type ovl_path_type(struct dentry *dentry);
void ovl_path_upper(struct dentry *dentry, struct path *path);
void ovl_path_lower(struct dentry *dentry, struct path *path);
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_lowerdata(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
+struct inode *ovl_inode_realdata(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
@@ -225,18 +242,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
void ovl_dentry_set_upper_alias(struct dentry *dentry);
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
+bool ovl_has_upperdata(struct inode *inode);
+void ovl_set_upperdata(struct inode *inode);
bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry);
+ struct dentry *lowerdentry, struct dentry *lowerdata);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
-void ovl_dentry_version_inc(struct dentry *dentry, bool impurity);
+void ovl_dir_modified(struct dentry *dentry, bool impurity);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
-int ovl_copy_up_start(struct dentry *dentry);
+int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_already_copied_up(struct dentry *dentry, int flags);
bool ovl_check_origin_xattr(struct dentry *dentry);
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
@@ -252,6 +274,9 @@ bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
+int ovl_check_metacopy_xattr(struct dentry *dentry);
+bool ovl_is_metacopy_dentry(struct dentry *dentry);
+char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
@@ -324,7 +349,6 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
bool ovl_is_private_xattr(const char *name);
@@ -334,6 +358,8 @@ struct ovl_inode_params {
struct ovl_path *lowerpath;
struct dentry *index;
unsigned int numlower;
+ char *redirect;
+ struct dentry *lowerdata;
};
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
@@ -348,6 +374,14 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
to->i_atime = from->i_atime;
to->i_mtime = from->i_mtime;
to->i_ctime = from->i_ctime;
+ i_size_write(to, i_size_read(from));
+}
+
+static inline void ovl_copyflags(struct inode *from, struct inode *to)
+{
+ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
+
+ inode_set_flags(to, from->i_flags & mask, mask);
}
/* dir.c */
@@ -368,9 +402,14 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
int ovl_cleanup(struct inode *dir, struct dentry *dentry);
struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
+/* file.c */
+extern const struct file_operations ovl_file_operations;
+
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
+int ovl_copy_up_with_data(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
+int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 41655a7d6894..ec237035333a 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -19,6 +19,7 @@ struct ovl_config {
bool index;
bool nfs_export;
int xino;
+ bool metacopy;
};
struct ovl_sb {
@@ -88,7 +89,10 @@ static inline struct ovl_entry *OVL_E(struct dentry *dentry)
}
struct ovl_inode {
- struct ovl_dir_cache *cache;
+ union {
+ struct ovl_dir_cache *cache; /* directory */
+ struct inode *lowerdata; /* regular file */
+ };
const char *redirect;
u64 version;
unsigned long flags;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index ef1fe42ff7bb..cc8303a806b4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -668,6 +668,21 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
}
+static bool ovl_is_impure_dir(struct file *file)
+{
+ struct ovl_dir_file *od = file->private_data;
+ struct inode *dir = d_inode(file->f_path.dentry);
+
+ /*
+ * Only upper dir can be impure, but if we are in the middle of
+ * iterating a lower real dir, dir could be copied up and marked
+ * impure. We only want the impure cache if we started iterating
+ * a real upper dir to begin with.
+ */
+ return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
+
+}
+
static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
{
int err;
@@ -696,7 +711,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
rdt.parent_ino = stat.ino;
}
- if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) {
+ if (ovl_is_impure_dir(file)) {
rdt.cache = ovl_cache_get_impure(&file->f_path);
if (IS_ERR(rdt.cache))
return PTR_ERR(rdt.cache);
@@ -727,7 +742,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
*/
if (ovl_xino_bits(dentry->d_sb) ||
(ovl_same_sb(dentry->d_sb) &&
- (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
+ (ovl_is_impure_dir(file) ||
OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
return ovl_iterate_real(file, ctx);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 704b37311467..2e0fc93c2c06 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -64,6 +64,11 @@ static void ovl_entry_stack_free(struct ovl_entry *oe)
dput(oe->lowerstack[i].dentry);
}
+static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
+module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
+MODULE_PARM_DESC(ovl_metacopy_def,
+ "Default to on or off for the metadata only copy up feature");
+
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -74,31 +79,14 @@ static void ovl_dentry_release(struct dentry *dentry)
}
}
-static int ovl_check_append_only(struct inode *inode, int flag)
-{
- /*
- * This test was moot in vfs may_open() because overlay inode does
- * not have the S_APPEND flag, so re-check on real upper inode
- */
- if (IS_APPEND(inode)) {
- if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
- return -EPERM;
- if (flag & O_TRUNC)
- return -EPERM;
- }
-
- return 0;
-}
-
static struct dentry *ovl_d_real(struct dentry *dentry,
- const struct inode *inode,
- unsigned int open_flags, unsigned int flags)
+ const struct inode *inode)
{
struct dentry *real;
- int err;
- if (flags & D_REAL_UPPER)
- return ovl_dentry_upper(dentry);
+ /* It's an overlay file */
+ if (inode && d_inode(dentry) == inode)
+ return dentry;
if (!d_is_reg(dentry)) {
if (!inode || inode == d_inode(dentry))
@@ -106,28 +94,19 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
goto bug;
}
- if (open_flags) {
- err = ovl_open_maybe_copy_up(dentry, open_flags);
- if (err)
- return ERR_PTR(err);
- }
-
real = ovl_dentry_upper(dentry);
- if (real && (!inode || inode == d_inode(real))) {
- if (!inode) {
- err = ovl_check_append_only(d_inode(real), open_flags);
- if (err)
- return ERR_PTR(err);
- }
+ if (real && (inode == d_inode(real)))
+ return real;
+
+ if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
return real;
- }
- real = ovl_dentry_lower(dentry);
+ real = ovl_dentry_lowerdata(dentry);
if (!real)
goto bug;
/* Handle recursion */
- real = d_real(real, inode, open_flags, 0);
+ real = d_real(real, inode);
if (!inode || inode == d_inode(real))
return real;
@@ -205,6 +184,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
oi->flags = 0;
oi->__upperdentry = NULL;
oi->lower = NULL;
+ oi->lowerdata = NULL;
mutex_init(&oi->lock);
return &oi->vfs_inode;
@@ -223,8 +203,11 @@ static void ovl_destroy_inode(struct inode *inode)
dput(oi->__upperdentry);
iput(oi->lower);
+ if (S_ISDIR(inode->i_mode))
+ ovl_dir_cache_free(inode);
+ else
+ iput(oi->lowerdata);
kfree(oi->redirect);
- ovl_dir_cache_free(inode);
mutex_destroy(&oi->lock);
call_rcu(&inode->i_rcu, ovl_i_callback);
@@ -376,6 +359,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
"on" : "off");
if (ofs->config.xino != ovl_xino_def())
seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
+ if (ofs->config.metacopy != ovl_metacopy_def)
+ seq_printf(m, ",metacopy=%s",
+ ofs->config.metacopy ? "on" : "off");
return 0;
}
@@ -413,6 +399,8 @@ enum {
OPT_XINO_ON,
OPT_XINO_OFF,
OPT_XINO_AUTO,
+ OPT_METACOPY_ON,
+ OPT_METACOPY_OFF,
OPT_ERR,
};
@@ -429,6 +417,8 @@ static const match_table_t ovl_tokens = {
{OPT_XINO_ON, "xino=on"},
{OPT_XINO_OFF, "xino=off"},
{OPT_XINO_AUTO, "xino=auto"},
+ {OPT_METACOPY_ON, "metacopy=on"},
+ {OPT_METACOPY_OFF, "metacopy=off"},
{OPT_ERR, NULL}
};
@@ -481,6 +471,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
char *p;
+ int err;
config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
if (!config->redirect_mode)
@@ -555,6 +546,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->xino = OVL_XINO_AUTO;
break;
+ case OPT_METACOPY_ON:
+ config->metacopy = true;
+ break;
+
+ case OPT_METACOPY_OFF:
+ config->metacopy = false;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
@@ -569,7 +568,20 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->workdir = NULL;
}
- return ovl_parse_redirect_mode(config, config->redirect_mode);
+ err = ovl_parse_redirect_mode(config, config->redirect_mode);
+ if (err)
+ return err;
+
+ /* metacopy feature with upper requires redirect_dir=on */
+ if (config->upperdir && config->metacopy && !config->redirect_dir) {
+ pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
+ config->metacopy = false;
+ } else if (config->metacopy && !config->redirect_follow) {
+ pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
+ config->metacopy = false;
+ }
+
+ return 0;
}
#define OVL_WORKDIR_NAME "work"
@@ -1042,7 +1054,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
if (err) {
ofs->noxattr = true;
ofs->config.index = false;
- pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
+ ofs->config.metacopy = false;
+ pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
err = 0;
} else {
vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
@@ -1064,7 +1077,6 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
-
out:
mnt_drop_write(mnt);
return err;
@@ -1375,6 +1387,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ofs->config.index = ovl_index_def;
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
+ ofs->config.metacopy = ovl_metacopy_def;
err = ovl_parse_opt((char *) data, &ofs->config);
if (err)
goto out_err;
@@ -1445,6 +1458,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
}
}
+ if (ofs->config.metacopy && ofs->config.nfs_export) {
+ pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+ ofs->config.nfs_export = false;
+ }
+
if (ofs->config.nfs_export)
sb->s_export_op = &ovl_export_operations;
@@ -1455,7 +1473,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ovl_super_operations;
sb->s_xattr = ovl_xattr_handlers;
sb->s_fs_info = ofs;
- sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
+ sb->s_flags |= SB_POSIXACL;
err = -ENOMEM;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
@@ -1474,8 +1492,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
/* Root is always merge -> can have whiteouts */
ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
+ ovl_set_upperdata(d_inode(root_dentry));
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
- ovl_dentry_lower(root_dentry));
+ ovl_dentry_lower(root_dentry), NULL);
sb->s_root = root_dentry;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 6f1078028c66..8cfb62cc8672 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -133,8 +133,10 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
* Non-dir dentry can hold lower dentry of its copy up origin.
*/
if (oe->numlower) {
- type |= __OVL_PATH_ORIGIN;
- if (d_is_dir(dentry))
+ if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
+ type |= __OVL_PATH_ORIGIN;
+ if (d_is_dir(dentry) ||
+ !ovl_has_upperdata(d_inode(dentry)))
type |= __OVL_PATH_MERGE;
}
} else {
@@ -164,6 +166,18 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
}
}
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (oe->numlower) {
+ path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
+ path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
+ } else {
+ *path = (struct path) { };
+ }
+}
+
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
{
enum ovl_path_type type = ovl_path_type(dentry);
@@ -195,6 +209,19 @@ struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
return oe->numlower ? oe->lowerstack[0].layer : NULL;
}
+/*
+ * ovl_dentry_lower() could return either a data dentry or metacopy dentry
+ * dependig on what is stored in lowerstack[0]. At times we need to find
+ * lower dentry which has data (and not metacopy dentry). This helper
+ * returns the lower data dentry.
+ */
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
+}
+
struct dentry *ovl_dentry_real(struct dentry *dentry)
{
return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
@@ -222,6 +249,26 @@ struct inode *ovl_inode_real(struct inode *inode)
return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
}
+/* Return inode which contains lower data. Do not return metacopy */
+struct inode *ovl_inode_lowerdata(struct inode *inode)
+{
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ return NULL;
+
+ return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
+}
+
+/* Return real inode which contains data. Does not return metacopy inode */
+struct inode *ovl_inode_realdata(struct inode *inode)
+{
+ struct inode *upperinode;
+
+ upperinode = ovl_inode_upper(inode);
+ if (upperinode && ovl_has_upperdata(inode))
+ return upperinode;
+
+ return ovl_inode_lowerdata(inode);
+}
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
{
@@ -279,6 +326,62 @@ void ovl_dentry_set_upper_alias(struct dentry *dentry)
ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
}
+static bool ovl_should_check_upperdata(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return false;
+
+ if (!ovl_inode_lower(inode))
+ return false;
+
+ return true;
+}
+
+bool ovl_has_upperdata(struct inode *inode)
+{
+ if (!ovl_should_check_upperdata(inode))
+ return true;
+
+ if (!ovl_test_flag(OVL_UPPERDATA, inode))
+ return false;
+ /*
+ * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
+ * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
+ * if setting of OVL_UPPERDATA is visible, then effects of writes
+ * before that are visible too.
+ */
+ smp_rmb();
+ return true;
+}
+
+void ovl_set_upperdata(struct inode *inode)
+{
+ /*
+ * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
+ * if OVL_UPPERDATA flag is visible, then effects of write operations
+ * before it are visible as well.
+ */
+ smp_wmb();
+ ovl_set_flag(OVL_UPPERDATA, inode);
+}
+
+/* Caller should hold ovl_inode->lock */
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
+{
+ if (!ovl_open_flags_need_copy_up(flags))
+ return false;
+
+ return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
+}
+
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
+{
+ if (!ovl_open_flags_need_copy_up(flags))
+ return false;
+
+ return !ovl_has_upperdata(d_inode(dentry));
+}
+
bool ovl_redirect_dir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
@@ -300,7 +403,7 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
}
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry)
+ struct dentry *lowerdentry, struct dentry *lowerdata)
{
struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
@@ -308,8 +411,11 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
OVL_I(inode)->__upperdentry = upperdentry;
if (lowerdentry)
OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
+ if (lowerdata)
+ OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
ovl_copyattr(realinode, inode);
+ ovl_copyflags(realinode, inode);
if (!inode->i_ino)
inode->i_ino = realinode->i_ino;
}
@@ -333,7 +439,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
}
}
-void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
+static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
@@ -348,6 +454,14 @@ void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
OVL_I(inode)->version++;
}
+void ovl_dir_modified(struct dentry *dentry, bool impurity)
+{
+ /* Copy mtime/ctime */
+ ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
+
+ ovl_dentry_version_inc(dentry, impurity);
+}
+
u64 ovl_dentry_version_get(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
@@ -368,13 +482,51 @@ struct file *ovl_path_open(struct path *path, int flags)
return dentry_open(path, flags | O_NOATIME, current_cred());
}
-int ovl_copy_up_start(struct dentry *dentry)
+/* Caller should hold ovl_inode->lock */
+static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
+{
+ bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+ if (ovl_dentry_upper(dentry) &&
+ (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+ !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+ return true;
+
+ return false;
+}
+
+bool ovl_already_copied_up(struct dentry *dentry, int flags)
+{
+ bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+ /*
+ * Check if copy-up has happened as well as for upper alias (in
+ * case of hard links) is there.
+ *
+ * Both checks are lockless:
+ * - false negatives: will recheck under oi->lock
+ * - false positives:
+ * + ovl_dentry_upper() uses memory barriers to ensure the
+ * upper dentry is up-to-date
+ * + ovl_dentry_has_upper_alias() relies on locking of
+ * upper parent i_rwsem to prevent reordering copy-up
+ * with rename.
+ */
+ if (ovl_dentry_upper(dentry) &&
+ (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+ !ovl_dentry_needs_data_copy_up(dentry, flags))
+ return true;
+
+ return false;
+}
+
+int ovl_copy_up_start(struct dentry *dentry, int flags)
{
struct ovl_inode *oi = OVL_I(d_inode(dentry));
int err;
err = mutex_lock_interruptible(&oi->lock);
- if (!err && ovl_dentry_has_upper_alias(dentry)) {
+ if (!err && ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
mutex_unlock(&oi->lock);
}
@@ -675,3 +827,91 @@ err:
pr_err("overlayfs: failed to lock workdir+upperdir\n");
return -EIO;
}
+
+/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
+int ovl_check_metacopy_xattr(struct dentry *dentry)
+{
+ int res;
+
+ /* Only regular files can have metacopy xattr */
+ if (!S_ISREG(d_inode(dentry)->i_mode))
+ return 0;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+ if (res < 0) {
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return 0;
+ goto out;
+ }
+
+ return 1;
+out:
+ pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+ return res;
+}
+
+bool ovl_is_metacopy_dentry(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (!d_is_reg(dentry))
+ return false;
+
+ if (ovl_dentry_upper(dentry)) {
+ if (!ovl_has_upperdata(d_inode(dentry)))
+ return true;
+ return false;
+ }
+
+ return (oe->numlower > 1);
+}
+
+char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
+{
+ int res;
+ char *s, *next, *buf = NULL;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
+ if (res < 0) {
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return NULL;
+ goto fail;
+ }
+
+ buf = kzalloc(res + padding + 1, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ if (res == 0)
+ goto invalid;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
+ if (res < 0)
+ goto fail;
+ if (res == 0)
+ goto invalid;
+
+ if (buf[0] == '/') {
+ for (s = buf; *s++ == '/'; s = next) {
+ next = strchrnul(s, '/');
+ if (s == next)
+ goto invalid;
+ }
+ } else {
+ if (strchr(buf, '/') != NULL)
+ goto invalid;
+ }
+
+ return buf;
+
+err_free:
+ kfree(buf);
+ return ERR_PTR(res);
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
+ goto err_free;
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
+ res = -EINVAL;
+ goto err_free;
+}
diff --git a/fs/read_write.c b/fs/read_write.c
index 153f8f690490..39b4a21dd933 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1964,6 +1964,44 @@ out_error:
}
EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
+int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+ struct file *dst_file, loff_t dst_pos, u64 len)
+{
+ s64 ret;
+
+ ret = mnt_want_write_file(dst_file);
+ if (ret)
+ return ret;
+
+ ret = clone_verify_area(dst_file, dst_pos, len, true);
+ if (ret < 0)
+ goto out_drop_write;
+
+ ret = -EINVAL;
+ if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
+ goto out_drop_write;
+
+ ret = -EXDEV;
+ if (src_file->f_path.mnt != dst_file->f_path.mnt)
+ goto out_drop_write;
+
+ ret = -EISDIR;
+ if (S_ISDIR(file_inode(dst_file)->i_mode))
+ goto out_drop_write;
+
+ ret = -EINVAL;
+ if (!dst_file->f_op->dedupe_file_range)
+ goto out_drop_write;
+
+ ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
+ dst_file, dst_pos, len);
+out_drop_write:
+ mnt_drop_write_file(dst_file);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range_one);
+
int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
struct file_dedupe_range_info *info;
@@ -1972,11 +2010,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
u64 len;
int i;
int ret;
- bool is_admin = capable(CAP_SYS_ADMIN);
u16 count = same->dest_count;
- struct file *dst_file;
- loff_t dst_off;
- ssize_t deduped;
+ int deduped;
if (!(file->f_mode & FMODE_READ))
return -EINVAL;
@@ -2003,6 +2038,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
if (off + len > i_size_read(src))
return -EINVAL;
+ /* Arbitrary 1G limit on a single dedupe request, can be raised. */
+ len = min_t(u64, len, 1 << 30);
+
/* pre-format output fields to sane values */
for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;
@@ -2010,54 +2048,28 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
}
for (i = 0, info = same->info; i < count; i++, info++) {
- struct inode *dst;
struct fd dst_fd = fdget(info->dest_fd);
+ struct file *dst_file = dst_fd.file;
- dst_file = dst_fd.file;
if (!dst_file) {
info->status = -EBADF;
goto next_loop;
}
- dst = file_inode(dst_file);
-
- ret = mnt_want_write_file(dst_file);
- if (ret) {
- info->status = ret;
- goto next_fdput;
- }
-
- dst_off = info->dest_offset;
- ret = clone_verify_area(dst_file, dst_off, len, true);
- if (ret < 0) {
- info->status = ret;
- goto next_file;
- }
- ret = 0;
if (info->reserved) {
info->status = -EINVAL;
- } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
- info->status = -EINVAL;
- } else if (file->f_path.mnt != dst_file->f_path.mnt) {
- info->status = -EXDEV;
- } else if (S_ISDIR(dst->i_mode)) {
- info->status = -EISDIR;
- } else if (dst_file->f_op->dedupe_file_range == NULL) {
- info->status = -EINVAL;
- } else {
- deduped = dst_file->f_op->dedupe_file_range(file, off,
- len, dst_file,
- info->dest_offset);
- if (deduped == -EBADE)
- info->status = FILE_DEDUPE_RANGE_DIFFERS;
- else if (deduped < 0)
- info->status = deduped;
- else
- info->bytes_deduped += deduped;
+ goto next_fdput;
}
-next_file:
- mnt_drop_write_file(dst_file);
+ deduped = vfs_dedupe_file_range_one(file, off, dst_file,
+ info->dest_offset, len);
+ if (deduped == -EBADE)
+ info->status = FILE_DEDUPE_RANGE_DIFFERS;
+ else if (deduped < 0)
+ info->status = deduped;
+ else
+ info->bytes_deduped = len;
+
next_fdput:
fdput(dst_fd);
next_loop:
diff --git a/fs/xattr.c b/fs/xattr.c
index f9cb1db187b7..3a24027c062d 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -23,7 +23,6 @@
#include <linux/posix_acl_xattr.h>
#include <linux/uaccess.h>
-#include "internal.h"
static const char *
strcmp_prefix(const char *a, const char *a_prefix)
@@ -501,10 +500,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
if (!f.file)
return error;
audit_file(f.file);
- error = mnt_want_write_file_path(f.file);
+ error = mnt_want_write_file(f.file);
if (!error) {
error = setxattr(f.file->f_path.dentry, name, value, size, flags);
- mnt_drop_write_file_path(f.file);
+ mnt_drop_write_file(f.file);
}
fdput(f);
return error;
@@ -733,10 +732,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
if (!f.file)
return error;
audit_file(f.file);
- error = mnt_want_write_file_path(f.file);
+ error = mnt_want_write_file(f.file);
if (!error) {
error = removexattr(f.file->f_path.dentry, name);
- mnt_drop_write_file_path(f.file);
+ mnt_drop_write_file(f.file);
}
fdput(f);
return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5eaef2c17293..61a5ad2600e8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -931,31 +931,16 @@ xfs_file_clone_range(
len, false);
}
-STATIC ssize_t
+STATIC int
xfs_file_dedupe_range(
- struct file *src_file,
- u64 loff,
- u64 len,
- struct file *dst_file,
- u64 dst_loff)
+ struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len)
{
- struct inode *srci = file_inode(src_file);
- u64 max_dedupe;
- int error;
-
- /*
- * Since we have to read all these pages in to compare them, cut
- * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
- * That means we won't do more than MAX_RW_COUNT IO per request.
- */
- max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
- if (len > max_dedupe)
- len = max_dedupe;
- error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, true);
- if (error)
- return error;
- return len;
}
STATIC int