summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 19:28:55 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 19:28:55 +0300
commit6b1c776d3efbda31085b6a9f3bc7f774511fafd9 (patch)
treed306d6c2841b88b0a78ccdaf3532bb1e1f8260a0
parent58c7ffc0747a3a9145629d4966291f0586703767 (diff)
parentf4439de118283159ff165e52036134a278ebf990 (diff)
downloadlinux-6b1c776d3efbda31085b6a9f3bc7f774511fafd9.tar.xz
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi: "This work from Amir introduces the inodes index feature, which provides: - hardlinks are not broken on copy up - infrastructure for overlayfs NFS export This also fixes constant st_ino for samefs case for lower hardlinks" * 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (33 commits) ovl: mark parent impure and restore timestamp on ovl_link_up() ovl: document copying layers restrictions with inodes index ovl: cleanup orphan index entries ovl: persistent overlay inode nlink for indexed inodes ovl: implement index dir copy up ovl: move copy up lock out ovl: rearrange copy up ovl: add flag for upper in ovl_entry ovl: use struct copy_up_ctx as function argument ovl: base tmpfile in workdir too ovl: factor out ovl_copy_up_inode() helper ovl: extract helper to get temp file in copy up ovl: defer upper dir lock to tempfile link ovl: hash overlay non-dir inodes by copy up origin ovl: cleanup bad and stale index entries on mount ovl: lookup index entry for copy up origin ovl: verify index dir matches upper dir ovl: verify upper root dir matches lower root dir ovl: introduce the inodes index dir feature ovl: generalize ovl_create_workdir() ...
-rw-r--r--Documentation/filesystems/overlayfs.txt34
-rw-r--r--fs/overlayfs/Kconfig20
-rw-r--r--fs/overlayfs/copy_up.c410
-rw-r--r--fs/overlayfs/dir.c52
-rw-r--r--fs/overlayfs/inode.c215
-rw-r--r--fs/overlayfs/namei.c368
-rw-r--r--fs/overlayfs/overlayfs.h58
-rw-r--r--fs/overlayfs/ovl_entry.h36
-rw-r--r--fs/overlayfs/readdir.c50
-rw-r--r--fs/overlayfs/super.c247
-rw-r--r--fs/overlayfs/util.c345
-rw-r--r--include/linux/fs.h4
12 files changed, 1456 insertions, 383 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index c9e884b52698..36f528a7fdd6 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -201,6 +201,40 @@ rightmost one and going left. In the above example lower1 will be the
top, lower2 the middle and lower3 the bottom layer.
+Sharing and copying layers
+--------------------------
+
+Lower layers may be shared among several overlay mounts and that is indeed
+a very common practice. An overlay mount may use the same lower layer
+path as another overlay mount and it may use a lower layer path that is
+beneath or above the path of another overlay lower layer path.
+
+Using an upper layer path and/or a workdir path that are already used by
+another overlay mount is not allowed and will fail with EBUSY. Using
+partially overlapping paths is not allowed but will not fail with EBUSY.
+
+Mounting an overlay using an upper layer path, where the upper layer path
+was previously used by another mounted overlay in combination with a
+different lower layer path, is allowed, unless the "inodes index" feature
+is enabled.
+
+With the "inodes index" feature, on the first time mount, an NFS file
+handle of the lower layer root directory, along with the UUID of the lower
+filesystem, are encoded and stored in the "trusted.overlay.origin" extended
+attribute on the upper layer root directory. On subsequent mount attempts,
+the lower root directory file handle and lower filesystem UUID are compared
+to the stored origin in upper root directory. On failure to verify the
+lower root origin, mount will fail with ESTALE. An overlayfs mount with
+"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
+does not support NFS export, lower filesystem does not have a valid UUID or
+if the upper filesystem does not support extended attributes.
+
+It is quite a common practice to copy overlay layers to a different
+directory tree on the same or different underlying filesystem, and even
+to a different machine. With the "inodes index" feature, trying to mount
+the copied layers will fail the verification of the lower root file handle.
+
+
Non-standard behavior
---------------------
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index c0c9683934b7..cbfc196e5dc5 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -23,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR
Note, that redirects are not backward compatible. That is, mounting
an overlay which has redirects on a kernel that doesn't support this
feature will have unexpected results.
+
+config OVERLAY_FS_INDEX
+ bool "Overlayfs: turn on inodes index feature by default"
+ depends on OVERLAY_FS
+ help
+ If this config option is enabled then overlay filesystems will use
+ the inodes index dir to map lower inodes to upper inodes by default.
+ In this case it is still possible to turn off index globally with the
+ "index=off" module option or on a filesystem instance basis with the
+ "index=off" mount option.
+
+ The inodes index feature prevents breaking of lower hardlinks on copy
+ up.
+
+ Note, that the inodes index feature is read-only backward compatible.
+ That is, mounting an overlay which has an index dir on a kernel that
+ doesn't support this feature read-only, will not have any negative
+ outcomes. However, mounting the same overlay with an old kernel
+ read-write and then mounting it again with a new kernel, will have
+ unexpected results.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index e5869f91b3ab..acb6f97deb97 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -233,12 +233,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
void *buf;
int buflen = MAX_HANDLE_SZ;
+ uuid_t *uuid = &lower->d_sb->s_uuid;
buf = kmalloc(buflen, GFP_TEMPORARY);
if (!buf)
@@ -271,6 +272,14 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
fh->magic = OVL_FH_MAGIC;
fh->type = fh_type;
fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ /*
+ * When we will want to decode an overlay dentry from this handle
+ * and all layers are on the same fs, if we get a disconncted real
+ * dentry when we decode fid, the only way to tell if we should assign
+ * it to upperdentry or to lowerstack is by checking this flag.
+ */
+ if (is_upper)
+ fh->flags |= OVL_FH_FLAG_PATH_UPPER;
fh->len = fh_len;
fh->uuid = *uuid;
memcpy(fh->fid, buf, buflen);
@@ -283,7 +292,6 @@ out:
static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
- struct super_block *sb = lower->d_sb;
const struct ovl_fh *fh = NULL;
int err;
@@ -292,9 +300,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
- if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
- !uuid_is_null(&sb->s_uuid)) {
- fh = ovl_encode_fh(lower, &sb->s_uuid);
+ if (ovl_can_decode_fh(lower->d_sb)) {
+ fh = ovl_encode_fh(lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
@@ -309,84 +316,156 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
return err;
}
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
- struct dentry *dentry, struct path *lowerpath,
- struct kstat *stat, const char *link,
- struct kstat *pstat, bool tmpfile)
+struct ovl_copy_up_ctx {
+ struct dentry *parent;
+ struct dentry *dentry;
+ struct path lowerpath;
+ struct kstat stat;
+ struct kstat pstat;
+ const char *link;
+ struct dentry *destdir;
+ struct qstr destname;
+ struct dentry *workdir;
+ bool tmpfile;
+ bool origin;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
+{
+ int err;
+ struct dentry *upper;
+ struct dentry *upperdir = ovl_dentry_upper(c->parent);
+ struct inode *udir = d_inode(upperdir);
+
+ /* Mark parent "impure" because it may now contain non-pure upper */
+ err = ovl_set_impure(c->parent, upperdir);
+ if (err)
+ return err;
+
+ err = ovl_set_nlink_lower(c->dentry);
+ if (err)
+ return err;
+
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ upper = lookup_one_len(c->dentry->d_name.name, upperdir,
+ c->dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (!IS_ERR(upper)) {
+ err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
+ true);
+ dput(upper);
+
+ if (!err) {
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, &c->pstat);
+ ovl_dentry_set_upper_alias(c->dentry);
+ }
+ }
+ inode_unlock(udir);
+ ovl_set_nlink_upper(c->dentry);
+
+ return err;
+}
+
+static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+ struct dentry **newdentry)
{
- struct inode *wdir = workdir->d_inode;
- struct inode *udir = upperdir->d_inode;
- struct dentry *newdentry = NULL;
- struct dentry *upper = NULL;
- struct dentry *temp = NULL;
int err;
+ struct dentry *upper;
+ struct inode *udir = d_inode(c->destdir);
+
+ upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ if (IS_ERR(upper))
+ return PTR_ERR(upper);
+
+ if (c->tmpfile)
+ err = ovl_do_link(temp, udir, upper, true);
+ else
+ err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
+
+ if (!err)
+ *newdentry = dget(c->tmpfile ? upper : temp);
+ dput(upper);
+
+ return err;
+}
+
+static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
+{
+ int err;
+ struct dentry *temp;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
struct cattr cattr = {
/* Can't properly set mode on creation because of the umask */
- .mode = stat->mode & S_IFMT,
- .rdev = stat->rdev,
- .link = link
+ .mode = c->stat.mode & S_IFMT,
+ .rdev = c->stat.rdev,
+ .link = c->link
};
- err = security_inode_copy_up(dentry, &new_creds);
+ err = security_inode_copy_up(c->dentry, &new_creds);
if (err < 0)
goto out;
if (new_creds)
old_creds = override_creds(new_creds);
- if (tmpfile)
- temp = ovl_do_tmpfile(upperdir, stat->mode);
- else
- temp = ovl_lookup_temp(workdir);
- err = 0;
- if (IS_ERR(temp)) {
- err = PTR_ERR(temp);
- temp = NULL;
+ if (c->tmpfile) {
+ temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+ if (IS_ERR(temp))
+ goto temp_err;
+ } else {
+ temp = ovl_lookup_temp(c->workdir);
+ if (IS_ERR(temp))
+ goto temp_err;
+
+ err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
+ NULL, true);
+ if (err) {
+ dput(temp);
+ goto out;
+ }
}
-
- if (!err && !tmpfile)
- err = ovl_create_real(wdir, temp, &cattr, NULL, true);
-
+ err = 0;
+ *tempp = temp;
+out:
if (new_creds) {
revert_creds(old_creds);
put_cred(new_creds);
}
- if (err)
- goto out;
+ return err;
- if (S_ISREG(stat->mode)) {
+temp_err:
+ err = PTR_ERR(temp);
+ goto out;
+}
+
+static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+ int err;
+
+ if (S_ISREG(c->stat.mode)) {
struct path upperpath;
- ovl_path_upper(dentry, &upperpath);
+ ovl_path_upper(c->dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
upperpath.dentry = temp;
- if (tmpfile) {
- inode_unlock(udir);
- err = ovl_copy_up_data(lowerpath, &upperpath,
- stat->size);
- inode_lock_nested(udir, I_MUTEX_PARENT);
- } else {
- err = ovl_copy_up_data(lowerpath, &upperpath,
- stat->size);
- }
-
+ err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
if (err)
- goto out_cleanup;
+ return err;
}
- err = ovl_copy_xattr(lowerpath->dentry, temp);
+ err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
- goto out_cleanup;
+ return err;
inode_lock(temp->d_inode);
- err = ovl_set_attr(temp, stat);
+ err = ovl_set_attr(temp, &c->stat);
inode_unlock(temp->d_inode);
if (err)
- goto out_cleanup;
+ return err;
/*
* Store identifier of lower inode in upper inode xattr to
@@ -395,41 +474,48 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
* Don't set origin when we are breaking the association with a lower
* hard link.
*/
- if (S_ISDIR(stat->mode) || stat->nlink == 1) {
- err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+ if (c->origin) {
+ err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
if (err)
- goto out_cleanup;
+ return err;
}
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- if (IS_ERR(upper)) {
- err = PTR_ERR(upper);
- upper = NULL;
- goto out_cleanup;
- }
+ return 0;
+}
- if (tmpfile)
- err = ovl_do_link(temp, udir, upper, true);
- else
- err = ovl_do_rename(wdir, temp, udir, upper, 0);
+static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+{
+ struct inode *udir = c->destdir->d_inode;
+ struct dentry *newdentry = NULL;
+ struct dentry *temp = NULL;
+ int err;
+
+ err = ovl_get_tmpfile(c, &temp);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up_inode(c, temp);
if (err)
goto out_cleanup;
- newdentry = dget(tmpfile ? upper : temp);
- ovl_dentry_update(dentry, newdentry);
- ovl_inode_update(d_inode(dentry), d_inode(newdentry));
+ if (c->tmpfile) {
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ err = ovl_install_temp(c, temp, &newdentry);
+ inode_unlock(udir);
+ } else {
+ err = ovl_install_temp(c, temp, &newdentry);
+ }
+ if (err)
+ goto out_cleanup;
- /* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, pstat);
+ ovl_inode_update(d_inode(c->dentry), newdentry);
out:
dput(temp);
- dput(upper);
return err;
out_cleanup:
- if (!tmpfile)
- ovl_cleanup(wdir, temp);
+ if (!c->tmpfile)
+ ovl_cleanup(d_inode(c->workdir), temp);
goto out;
}
@@ -442,78 +528,119 @@ out_cleanup:
* is possible that the copy up will lock the old parent. At that point
* the file will have already been copied up anyway.
*/
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+{
+ int err;
+ struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+ bool indexed = false;
+
+ if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
+ c->stat.nlink > 1)
+ indexed = true;
+
+ if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+ c->origin = true;
+
+ if (indexed) {
+ c->destdir = ovl_indexdir(c->dentry->d_sb);
+ err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+ if (err)
+ return err;
+ } else {
+ /*
+ * Mark parent "impure" because it may now contain non-pure
+ * upper
+ */
+ err = ovl_set_impure(c->parent, c->destdir);
+ if (err)
+ return err;
+ }
+
+ /* Should we copyup with O_TMPFILE or with workdir? */
+ if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
+ c->tmpfile = true;
+ err = ovl_copy_up_locked(c);
+ } else {
+ err = -EIO;
+ if (lock_rename(c->workdir, c->destdir) != NULL) {
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ } else {
+ err = ovl_copy_up_locked(c);
+ unlock_rename(c->workdir, c->destdir);
+ }
+ }
+
+ if (indexed) {
+ if (!err)
+ ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+ kfree(c->destname.name);
+ } else if (!err) {
+ struct inode *udir = d_inode(c->destdir);
+
+ /* Restore timestamps on parent (best effort) */
+ inode_lock(udir);
+ ovl_set_timestamps(c->destdir, &c->pstat);
+ inode_unlock(udir);
+
+ ovl_dentry_set_upper_alias(c->dentry);
+ }
+
+ return err;
+}
+
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
- struct path *lowerpath, struct kstat *stat)
+ int flags)
{
- DEFINE_DELAYED_CALL(done);
- struct dentry *workdir = ovl_workdir(dentry);
int err;
- struct kstat pstat;
+ DEFINE_DELAYED_CALL(done);
struct path parentpath;
- struct dentry *lowerdentry = lowerpath->dentry;
- struct dentry *upperdir;
- const char *link = NULL;
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_copy_up_ctx ctx = {
+ .parent = parent,
+ .dentry = dentry,
+ .workdir = ovl_workdir(dentry),
+ };
- if (WARN_ON(!workdir))
+ if (WARN_ON(!ctx.workdir))
return -EROFS;
- ovl_do_check_copy_up(lowerdentry);
-
- ovl_path_upper(parent, &parentpath);
- upperdir = parentpath.dentry;
-
- /* Mark parent "impure" because it may now contain non-pure upper */
- err = ovl_set_impure(parent, upperdir);
+ ovl_path_lower(dentry, &ctx.lowerpath);
+ err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+ STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
- err = vfs_getattr(&parentpath, &pstat,
+ ovl_path_upper(parent, &parentpath);
+ ctx.destdir = parentpath.dentry;
+ ctx.destname = dentry->d_name;
+
+ err = vfs_getattr(&parentpath, &ctx.pstat,
STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
- if (S_ISLNK(stat->mode)) {
- link = vfs_get_link(lowerdentry, &done);
- if (IS_ERR(link))
- return PTR_ERR(link);
- }
-
- /* Should we copyup with O_TMPFILE or with workdir? */
- if (S_ISREG(stat->mode) && ofs->tmpfile) {
- err = ovl_copy_up_start(dentry);
- /* err < 0: interrupted, err > 0: raced with another copy-up */
- if (unlikely(err)) {
- pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
- if (err > 0)
- err = 0;
- goto out_done;
- }
-
- inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
- err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link, &pstat, true);
- inode_unlock(upperdir->d_inode);
- ovl_copy_up_end(dentry);
- goto out_done;
- }
+ /* maybe truncate regular file. this has no effect on dirs */
+ if (flags & O_TRUNC)
+ ctx.stat.size = 0;
- err = -EIO;
- if (lock_rename(workdir, upperdir) != NULL) {
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- goto out_unlock;
+ if (S_ISLNK(ctx.stat.mode)) {
+ ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+ if (IS_ERR(ctx.link))
+ return PTR_ERR(ctx.link);
}
- if (ovl_dentry_upper(dentry)) {
- /* Raced with another copy-up? Nothing to do, then... */
- err = 0;
- goto out_unlock;
+ ovl_do_check_copy_up(ctx.lowerpath.dentry);
+
+ err = ovl_copy_up_start(dentry);
+ /* err < 0: interrupted, err > 0: raced with another copy-up */
+ if (unlikely(err)) {
+ if (err > 0)
+ err = 0;
+ } else {
+ if (!ovl_dentry_upper(dentry))
+ err = ovl_do_copy_up(&ctx);
+ if (!err && !ovl_dentry_has_upper_alias(dentry))
+ err = ovl_link_up(&ctx);
+ ovl_copy_up_end(dentry);
}
-
- err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link, &pstat, false);
-out_unlock:
- unlock_rename(workdir, upperdir);
-out_done:
do_delayed_call(&done);
return err;
@@ -527,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
while (!err) {
struct dentry *next;
struct dentry *parent;
- struct path lowerpath;
- struct kstat stat;
- enum ovl_path_type type = ovl_path_type(dentry);
- if (OVL_TYPE_UPPER(type))
+ /*
+ * Check if copy-up has happened as well as for upper alias (in
+ * case of hard links) is there.
+ *
+ * Both checks are lockless:
+ * - false negatives: will recheck under oi->lock
+ * - false positives:
+ * + ovl_dentry_upper() uses memory barriers to ensure the
+ * upper dentry is up-to-date
+ * + ovl_dentry_has_upper_alias() relies on locking of
+ * upper parent i_rwsem to prevent reordering copy-up
+ * with rename.
+ */
+ if (ovl_dentry_upper(dentry) &&
+ ovl_dentry_has_upper_alias(dentry))
break;
next = dget(dentry);
@@ -539,22 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
for (;;) {
parent = dget_parent(next);
- type = ovl_path_type(parent);
- if (OVL_TYPE_UPPER(type))
+ if (ovl_dentry_upper(parent))
break;
dput(next);
next = parent;
}
- ovl_path_lower(next, &lowerpath);
- err = vfs_getattr(&lowerpath, &stat,
- STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
- /* maybe truncate regular file. this has no effect on dirs */
- if (flags & O_TRUNC)
- stat.size = 0;
- if (!err)
- err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
+ err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a63a71656e9b..641d9ee97f91 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
-void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
@@ -39,6 +39,8 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
+
+ return err;
}
struct dentry *ovl_lookup_temp(struct dentry *workdir)
@@ -154,12 +156,13 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
{
ovl_dentry_version_inc(dentry->d_parent);
- ovl_dentry_update(dentry, newdentry);
+ ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
- ovl_inode_update(inode, d_inode(newdentry));
+ ovl_inode_update(inode, newdentry);
ovl_copyattr(newdentry->d_inode, inode);
} else {
- WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
+ WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+ dput(newdentry);
inc_nlink(inode);
}
d_instantiate(dentry, inode);
@@ -588,6 +591,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
int err;
+ bool locked = false;
struct inode *inode;
err = ovl_want_write(old);
@@ -598,6 +602,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
+ err = ovl_nlink_start(old, &locked);
+ if (err)
+ goto out_drop_write;
+
inode = d_inode(old);
ihold(inode);
@@ -605,12 +613,18 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
+ ovl_nlink_end(old, locked);
out_drop_write:
ovl_drop_write(old);
out:
return err;
}
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+ return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
{
struct dentry *workdir = ovl_workdir(dentry);
@@ -646,7 +660,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && ovl_dentry_upper(dentry) &&
- upper != ovl_dentry_upper(dentry))) {
+ !ovl_matches_upper(dentry, upper))) {
goto out_dput_upper;
}
@@ -707,7 +721,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
- (!opaquedir && upper != ovl_dentry_upper(dentry)))
+ (!opaquedir && !ovl_matches_upper(dentry, upper)))
goto out_dput_upper;
if (is_dir)
@@ -735,8 +749,8 @@ out:
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
- enum ovl_path_type type;
int err;
+ bool locked = false;
const struct cred *old_cred;
err = ovl_want_write(dentry);
@@ -747,7 +761,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (err)
goto out_drop_write;
- type = ovl_path_type(dentry);
+ err = ovl_nlink_start(dentry, &locked);
+ if (err)
+ goto out_drop_write;
old_cred = ovl_override_creds(dentry->d_sb);
if (!ovl_lower_positive(dentry))
@@ -761,6 +777,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
else
drop_nlink(dentry->d_inode);
}
+ ovl_nlink_end(dentry, locked);
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -883,6 +900,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unsigned int flags)
{
int err;
+ bool locked = false;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
struct dentry *olddentry;
@@ -926,6 +944,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
+ } else {
+ err = ovl_nlink_start(new, &locked);
+ if (err)
+ goto out_drop_write;
}
old_cred = ovl_override_creds(old->d_sb);
@@ -985,7 +1007,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_unlock;
err = -ESTALE;
- if (olddentry != ovl_dentry_upper(old))
+ if (!ovl_matches_upper(old, olddentry))
goto out_dput_old;
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
@@ -998,12 +1020,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
new_opaque = ovl_dentry_is_opaque(new);
err = -ESTALE;
- if (ovl_dentry_upper(new)) {
+ if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
if (newdentry != opaquedir)
goto out_dput;
} else {
- if (newdentry != ovl_dentry_upper(new))
+ if (!ovl_matches_upper(new, newdentry))
goto out_dput;
}
} else {
@@ -1046,6 +1068,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (cleanup_whiteout)
ovl_cleanup(old_upperdir->d_inode, newdentry);
+ if (overwrite && d_inode(new)) {
+ if (new_is_dir)
+ clear_nlink(d_inode(new));
+ else
+ drop_nlink(d_inode(new));
+ }
+
ovl_dentry_version_inc(old->d_parent);
ovl_dentry_version_inc(new->d_parent);
@@ -1057,6 +1086,7 @@ out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
revert_creds(old_cred);
+ ovl_nlink_end(new, locked);
out_drop_write:
ovl_drop_write(old);
out:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index d613e2c41242..69f4fc26ee39 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -12,6 +12,7 @@
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
#include "overlayfs.h"
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
@@ -96,11 +97,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
WARN_ON_ONCE(stat->dev != lowerstat.dev);
/*
- * Lower hardlinks are broken on copy up to different
+ * Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
* for those different files, even for the same fs case.
+ * With inodes index enabled, it is safe to use st_ino
+ * of an indexed hardlinked origin. The index validates
+ * that the upper hardlink is not broken.
*/
- if (is_dir || lowerstat.nlink == 1)
+ if (is_dir || lowerstat.nlink == 1 ||
+ ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->ino = lowerstat.ino;
}
stat->dev = dentry->d_sb->s_dev;
@@ -126,6 +131,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (is_dir && OVL_TYPE_MERGE(type))
stat->nlink = 1;
+ /*
+ * Return the overlay inode nlinks for indexed upper inodes.
+ * Overlay inode nlink counts the union of the upper hardlinks
+ * and non-covered lower hardlinks. It does not include the upper
+ * index hardlink.
+ */
+ if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ stat->nlink = dentry->d_inode->i_nlink;
+
out:
revert_creds(old_cred);
@@ -134,8 +148,8 @@ out:
int ovl_permission(struct inode *inode, int mask)
{
- bool is_upper;
- struct inode *realinode = ovl_inode_real(inode, &is_upper);
+ struct inode *upperinode = ovl_inode_upper(inode);
+ struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
const struct cred *old_cred;
int err;
@@ -154,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask)
return err;
old_cred = ovl_override_creds(inode->i_sb);
- if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+ if (!upperinode &&
+ !special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
@@ -286,7 +301,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
- struct inode *realinode = ovl_inode_real(inode, NULL);
+ struct inode *realinode = ovl_inode_real(inode);
const struct cred *old_cred;
struct posix_acl *acl;
@@ -300,13 +315,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
return acl;
}
-static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
- struct dentry *realdentry)
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
- if (OVL_TYPE_UPPER(type))
+ if (ovl_dentry_upper(dentry) &&
+ ovl_dentry_has_upper_alias(dentry))
return false;
- if (special_file(realdentry->d_inode->i_mode))
+ if (special_file(d_inode(dentry)->i_mode))
return false;
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
@@ -318,11 +333,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
{
int err = 0;
- struct path realpath;
- enum ovl_path_type type;
- type = ovl_path_real(dentry, &realpath);
- if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
+ if (ovl_open_need_copy_up(dentry, file_flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, file_flags);
@@ -440,6 +452,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
}
}
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER (1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+ struct dentry *realdentry, const char *format)
+{
+ struct inode *inode = d_inode(dentry);
+ struct inode *realinode = d_inode(realdentry);
+ char buf[13];
+ int len;
+
+ len = snprintf(buf, sizeof(buf), format,
+ (int) (inode->i_nlink - realinode->i_nlink));
+
+ return ovl_do_setxattr(ovl_dentry_upper(dentry),
+ OVL_XATTR_NLINK, buf, len, 0);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+ return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+ return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+ struct dentry *upperdentry,
+ unsigned int fallback)
+{
+ int nlink_diff;
+ int nlink;
+ char buf[13];
+ int err;
+
+ if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+ return fallback;
+
+ err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+ if (err < 0)
+ goto fail;
+
+ buf[err] = '\0';
+ if ((buf[0] != 'L' && buf[0] != 'U') ||
+ (buf[1] != '+' && buf[1] != '-'))
+ goto fail;
+
+ err = kstrtoint(buf + 1, 10, &nlink_diff);
+ if (err < 0)
+ goto fail;
+
+ nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
+ nlink += nlink_diff;
+
+ if (nlink <= 0)
+ goto fail;
+
+ return nlink;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+ upperdentry, err);
+ return fallback;
+}
+
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -453,27 +562,87 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
static int ovl_inode_test(struct inode *inode, void *data)
{
- return ovl_inode_real(inode, NULL) == data;
+ return inode->i_private == data;
}
static int ovl_inode_set(struct inode *inode, void *data)
{
- inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
+ inode->i_private = data;
return 0;
}
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
+static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
+ struct dentry *upperdentry)
+{
+ struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL;
+
+ /* Lower (origin) inode must match, even if NULL */
+ if (ovl_inode_lower(inode) != lowerinode)
+ return false;
+
+ /*
+ * Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
+ * This happens when finding a lower alias for a copied up hard link.
+ */
+ if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
+ return false;
+ return true;
+}
+
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
{
+ struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+ struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
- inode = iget5_locked(sb, (unsigned long) realinode,
- ovl_inode_test, ovl_inode_set, realinode);
- if (inode && inode->i_state & I_NEW) {
- ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
- set_nlink(inode, realinode->i_nlink);
- unlock_new_inode(inode);
+ if (!realinode)
+ realinode = d_inode(lowerdentry);
+
+ if (!S_ISDIR(realinode->i_mode) &&
+ (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
+ struct inode *key = d_inode(lowerdentry ?: upperdentry);
+ unsigned int nlink;
+
+ inode = iget5_locked(dentry->d_sb, (unsigned long) key,
+ ovl_inode_test, ovl_inode_set, key);
+ if (!inode)
+ goto out_nomem;
+ if (!(inode->i_state & I_NEW)) {
+ /*
+ * Verify that the underlying files stored in the inode
+ * match those in the dentry.
+ */
+ if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
+ goto out;
+ }
+
+ dput(upperdentry);
+ goto out;
+ }
+
+ nlink = ovl_get_nlink(lowerdentry, upperdentry,
+ realinode->i_nlink);
+ set_nlink(inode, nlink);
+ } else {
+ inode = new_inode(dentry->d_sb);
+ if (!inode)
+ goto out_nomem;
}
+ ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+ ovl_inode_init(inode, upperdentry, lowerdentry);
+
+ if (upperdentry && ovl_is_impuredir(upperdentry))
+ ovl_set_flag(OVL_IMPURE, inode);
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
+out:
return inode;
+
+out_nomem:
+ inode = ERR_PTR(-ENOMEM);
+ goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index de0d4f742f36..9bc0e580a5b3 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -88,13 +88,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
return 1;
}
-static struct dentry *ovl_get_origin(struct dentry *dentry,
- struct vfsmount *mnt)
+static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
{
int res;
struct ovl_fh *fh = NULL;
- struct dentry *origin = NULL;
- int bytes;
res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
if (res < 0) {
@@ -106,7 +103,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_TEMPORARY);
+ fh = kzalloc(res, GFP_TEMPORARY);
if (!fh)
return ERR_PTR(-ENOMEM);
@@ -129,7 +126,29 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
(fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
goto out;
- bytes = (fh->len - offsetof(struct ovl_fh, fid));
+ return fh;
+
+out:
+ kfree(fh);
+ return NULL;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ goto out;
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ goto out;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+ struct vfsmount *mnt)
+{
+ struct dentry *origin = NULL;
+ struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+ int bytes;
+
+ if (IS_ERR_OR_NULL(fh))
+ return (struct dentry *)fh;
/*
* Make sure that the stored uuid matches the uuid of the lower
@@ -138,6 +157,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
goto out;
+ bytes = (fh->len - offsetof(struct ovl_fh, fid));
origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
bytes >> 2, (int)fh->type,
ovl_acceptable, NULL);
@@ -149,21 +169,17 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
}
if (ovl_dentry_weird(origin) ||
- ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
- dput(origin);
- origin = NULL;
+ ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
goto invalid;
- }
out:
kfree(fh);
return origin;
-fail:
- pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
- goto out;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
+ dput(origin);
+ origin = NULL;
goto out;
}
@@ -269,34 +285,31 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
}
-static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
+static int ovl_check_origin(struct dentry *upperdentry,
+ struct path *lowerstack, unsigned int numlower,
struct path **stackp, unsigned int *ctrp)
{
- struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
- struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct vfsmount *mnt;
- struct dentry *origin;
+ struct dentry *origin = NULL;
+ int i;
+
+
+ for (i = 0; i < numlower; i++) {
+ mnt = lowerstack[i].mnt;
+ origin = ovl_get_origin(upperdentry, mnt);
+ if (IS_ERR(origin))
+ return PTR_ERR(origin);
- if (!same_sb || !roe->numlower)
+ if (origin)
+ break;
+ }
+
+ if (!origin)
return 0;
- /*
- * Since all layers are on the same fs, we use the first layer for
- * decoding the file handle. We may get a disconnected dentry,
- * which is fine, because we only need to hold the origin inode in
- * cache and use its inode number. We may even get a connected dentry,
- * that is not under the first layer's root. That is also fine for
- * using it's inode number - it's the same as if we held a reference
- * to a dentry in first layer that was moved under us.
- */
- mnt = roe->lowerstack[0].mnt;
-
- origin = ovl_get_origin(upperdentry, mnt);
- if (IS_ERR_OR_NULL(origin))
- return PTR_ERR(origin);
-
- BUG_ON(*stackp || *ctrp);
- *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+ BUG_ON(*ctrp);
+ if (!*stackp)
+ *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
if (!*stackp) {
dput(origin);
return -ENOMEM;
@@ -308,6 +321,215 @@ static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
}
/*
+ * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+{
+ struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+ int err = 0;
+
+ if (!ofh)
+ return -ENODATA;
+
+ if (IS_ERR(ofh))
+ return PTR_ERR(ofh);
+
+ if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ err = -ESTALE;
+
+ kfree(ofh);
+ return err;
+}
+
+/*
+ * Verify that an inode matches the origin file handle stored in upper inode.
+ *
+ * If @set is true and there is no stored file handle, encode and store origin
+ * file handle in OVL_XATTR_ORIGIN.
+ *
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+ struct dentry *origin, bool is_upper, bool set)
+{
+ struct inode *inode;
+ struct ovl_fh *fh;
+ int err;
+
+ fh = ovl_encode_fh(origin, is_upper);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
+ goto fail;
+
+ err = ovl_verify_origin_fh(dentry, fh);
+ if (set && err == -ENODATA)
+ err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+ if (err)
+ goto fail;
+
+out:
+ kfree(fh);
+ return err;
+
+fail:
+ inode = d_inode(origin);
+ pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
+ origin, inode ? inode->i_ino : 0, err);
+ goto out;
+}
+
+/*
+ * Verify that an index entry name matches the origin file handle stored in
+ * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
+ * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
+ */
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+ unsigned int numlower)
+{
+ struct ovl_fh *fh = NULL;
+ size_t len;
+ struct path origin = { };
+ struct path *stack = &origin;
+ unsigned int ctr = 0;
+ int err;
+
+ if (!d_inode(index))
+ return 0;
+
+ err = -EISDIR;
+ if (d_is_dir(index))
+ goto fail;
+
+ err = -EINVAL;
+ if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ goto fail;
+
+ err = -ENOMEM;
+ len = index->d_name.len / 2;
+ fh = kzalloc(len, GFP_TEMPORARY);
+ if (!fh)
+ goto fail;
+
+ err = -EINVAL;
+ if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+ goto fail;
+
+ err = ovl_verify_origin_fh(index, fh);
+ if (err)
+ goto fail;
+
+ err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
+ if (!err && !ctr)
+ err = -ESTALE;
+ if (err)
+ goto fail;
+
+ /* Check if index is orphan and don't warn before cleaning it */
+ if (d_inode(index)->i_nlink == 1 &&
+ ovl_get_nlink(index, origin.dentry, 0) == 0)
+ err = -ENOENT;
+
+ dput(origin.dentry);
+out:
+ kfree(fh);
+ return err;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n",
+ index, err);
+ goto out;
+}
+
+/*
+ * Lookup in indexdir for the index entry of a lower real inode or a copy up
+ * origin inode. The index entry name is the hex representation of the lower
+ * inode file handle.
+ *
+ * If the index dentry in negative, then either no lower aliases have been
+ * copied up yet, or aliases have been copied up in older kernels and are
+ * not indexed.
+ *
+ * If the index dentry for a copy up origin inode is positive, but points
+ * to an inode different than the upper inode, then either the upper inode
+ * has been copied up and not indexed or it was indexed, but since then
+ * index dir was cleared. Either way, that index cannot be used to indentify
+ * the overlay inode.
+ */
+int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+{
+ int err;
+ struct ovl_fh *fh;
+ char *n, *s;
+
+ fh = ovl_encode_fh(origin, false);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
+ err = -ENOMEM;
+ n = kzalloc(fh->len * 2, GFP_TEMPORARY);
+ if (n) {
+ s = bin2hex(n, fh, fh->len);
+ *name = (struct qstr) QSTR_INIT(n, s - n);
+ err = 0;
+ }
+ kfree(fh);
+
+ return err;
+
+}
+
+static struct dentry *ovl_lookup_index(struct dentry *dentry,
+ struct dentry *upper,
+ struct dentry *origin)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct dentry *index;
+ struct inode *inode;
+ struct qstr name;
+ int err;
+
+ err = ovl_get_index_name(origin, &name);
+ if (err)
+ return ERR_PTR(err);
+
+ index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ if (IS_ERR(index)) {
+ pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+ "overlayfs: mount with '-o index=off' to disable inodes index.\n",
+ d_inode(origin)->i_ino, name.len, name.name,
+ err);
+ goto out;
+ }
+
+ if (d_is_negative(index)) {
+ if (upper && d_inode(origin)->i_nlink > 1) {
+ pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
+ d_inode(origin)->i_ino);
+ goto fail;
+ }
+
+ dput(index);
+ index = NULL;
+ } else if (upper && d_inode(index) != d_inode(upper)) {
+ inode = d_inode(index);
+ pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n",
+ d_inode(index)->i_ino,
+ d_inode(upper)->i_ino);
+ goto fail;
+ }
+
+out:
+ kfree(name.name);
+ return index;
+
+fail:
+ dput(index);
+ index = ERR_PTR(-EIO);
+ goto out;
+}
+
+/*
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
*/
@@ -338,10 +560,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
+ struct dentry *index = NULL;
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
- bool upperimpure = false;
char *upperredirect = NULL;
struct dentry *this;
unsigned int i;
@@ -359,7 +581,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
old_cred = ovl_override_creds(dentry->d_sb);
- upperdir = ovl_upperdentry_dereference(poe);
+ upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
err = ovl_lookup_layer(upperdir, &d, &upperdentry);
if (err)
@@ -372,8 +594,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
if (upperdentry && !d.is_dir) {
BUG_ON(!d.stop || d.redirect);
- err = ovl_check_origin(dentry, upperdentry,
- &stack, &ctr);
+ /*
+ * Lookup copy up origin by decoding origin file handle.
+ * We may get a disconnected dentry, which is fine,
+ * because we only need to hold the origin inode in
+ * cache and use its inode number. We may even get a
+ * connected dentry, that is not under any of the lower
+ * layers root. That is also fine for using it's inode
+ * number - it's the same as if we held a reference
+ * to a dentry in lower layer that was moved under us.
+ */
+ err = ovl_check_origin(upperdentry, roe->lowerstack,
+ roe->numlower, &stack, &ctr);
if (err)
goto out;
}
@@ -386,8 +618,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
poe = roe;
}
upperopaque = d.opaque;
- if (upperdentry && d.is_dir)
- upperimpure = ovl_is_impuredir(upperdentry);
}
if (!d.stop && poe->numlower) {
@@ -428,48 +658,56 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
+ /* Lookup index by lower inode and verify it matches upper inode */
+ if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
+ struct dentry *origin = stack[0].dentry;
+
+ index = ovl_lookup_index(dentry, upperdentry, origin);
+ if (IS_ERR(index)) {
+ err = PTR_ERR(index);
+ index = NULL;
+ goto out_put;
+ }
+ }
+
oe = ovl_alloc_entry(ctr);
err = -ENOMEM;
if (!oe)
goto out_put;
- if (upperdentry || ctr) {
- struct dentry *realdentry;
- struct inode *realinode;
+ oe->opaque = upperopaque;
+ memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+ dentry->d_fsdata = oe;
- realdentry = upperdentry ? upperdentry : stack[0].dentry;
- realinode = d_inode(realdentry);
+ if (upperdentry)
+ ovl_dentry_set_upper_alias(dentry);
+ else if (index)
+ upperdentry = dget(index);
- err = -ENOMEM;
- if (upperdentry && !d_is_dir(upperdentry)) {
- inode = ovl_get_inode(dentry->d_sb, realinode);
- } else {
- inode = ovl_new_inode(dentry->d_sb, realinode->i_mode,
- realinode->i_rdev);
- if (inode)
- ovl_inode_init(inode, realinode, !!upperdentry);
- }
- if (!inode)
+ if (upperdentry || ctr) {
+ inode = ovl_get_inode(dentry, upperdentry);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out_free_oe;
- ovl_copyattr(realdentry->d_inode, inode);
+
+ OVL_I(inode)->redirect = upperredirect;
+ if (index)
+ ovl_set_flag(OVL_INDEX, inode);
}
revert_creds(old_cred);
- oe->opaque = upperopaque;
- oe->impure = upperimpure;
- oe->redirect = upperredirect;
- oe->__upperdentry = upperdentry;
- memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+ dput(index);
kfree(stack);
kfree(d.redirect);
- dentry->d_fsdata = oe;
d_add(dentry, inode);
return NULL;
out_free_oe:
+ dentry->d_fsdata = NULL;
kfree(oe);
out_put:
+ dput(index);
for (i = 0; i < ctr; i++)
dput(stack[i].dentry);
kfree(stack);
@@ -499,7 +737,7 @@ bool ovl_lower_positive(struct dentry *dentry)
return oe->opaque;
/* Negative upper -> positive lower */
- if (!oe->__upperdentry)
+ if (!ovl_dentry_upper(dentry))
return true;
/* Positive upper -> have to look up lower to see whether it exists */
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 10863b4105fa..60d26605e039 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -25,6 +25,12 @@ enum ovl_path_type {
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
+#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
+
+enum ovl_flag {
+ OVL_IMPURE,
+ OVL_INDEX,
+};
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
@@ -38,6 +44,8 @@ enum ovl_path_type {
/* CPU byte order required for fid decoding: */
#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0)
#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1)
+/* Is the real inode encoded in fid an upper inode? */
+#define OVL_FH_FLAG_PATH_UPPER (1 << 2)
#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
@@ -60,8 +68,6 @@ struct ovl_fh {
u8 fid[0]; /* file identifier */
} __packed;
-#define OVL_ISUPPER_MASK 1UL
-
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
@@ -175,22 +181,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
return ret;
}
-static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
-{
- unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
-
- if (is_upper)
- *is_upper = x & OVL_ISUPPER_MASK;
-
- return (struct inode *) (x & ~OVL_ISUPPER_MASK);
-}
-
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
struct super_block *ovl_same_sb(struct super_block *sb);
+bool ovl_can_decode_fh(struct super_block *sb);
+struct dentry *ovl_indexdir(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
bool ovl_dentry_weird(struct dentry *dentry);
@@ -201,19 +199,22 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct inode *ovl_inode_upper(struct inode *inode);
+struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_real(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
bool ovl_dentry_is_opaque(struct dentry *dentry);
-bool ovl_dentry_is_impure(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+void ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_inode_init(struct inode *inode, struct inode *realinode,
- bool is_upper);
-void ovl_inode_update(struct inode *inode, struct inode *upperinode);
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+ struct dentry *lowerdentry);
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dentry_version_inc(struct dentry *dentry);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
@@ -225,6 +226,12 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
const char *name, const void *value, size_t size,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+void ovl_set_flag(unsigned long flag, struct inode *inode);
+bool ovl_test_flag(unsigned long flag, struct inode *inode);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
+int ovl_nlink_start(struct dentry *dentry, bool *locked);
+void ovl_nlink_end(struct dentry *dentry, bool locked);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
@@ -233,6 +240,11 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
/* namei.c */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+ struct dentry *origin, bool is_upper, bool set);
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+ unsigned int numlower);
+int ovl_get_index_name(struct dentry *origin, struct qstr *name);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
@@ -245,8 +257,15 @@ void ovl_cache_free(struct list_head *list);
int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+ struct path *lowerstack, unsigned int numlower);
/* inode.c */
+int ovl_set_nlink_upper(struct dentry *dentry);
+int ovl_set_nlink_lower(struct dentry *dentry);
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+ struct dentry *upperdentry,
+ unsigned int fallback);
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
@@ -262,7 +281,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
@@ -284,10 +303,11 @@ struct cattr {
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct cattr *attr,
struct dentry *hardlink, bool debug);
-void ovl_cleanup(struct inode *dir, struct dentry *dentry);
+int ovl_cleanup(struct inode *dir, struct dentry *dentry);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 34bc4a9f5c61..878a750986dd 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,7 @@ struct ovl_config {
char *workdir;
bool default_permissions;
bool redirect_dir;
+ bool index;
};
/* private information held for overlayfs's superblock */
@@ -21,7 +22,12 @@ struct ovl_fs {
struct vfsmount *upper_mnt;
unsigned numlower;
struct vfsmount **lower_mnt;
+ /* workbasedir is the path at workdir= mount option */
+ struct dentry *workbasedir;
+ /* workdir is the 'work' directory under workbasedir */
struct dentry *workdir;
+ /* index directory listing overlay inodes by origin file handle */
+ struct dentry *indexdir;
long namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config;
@@ -29,22 +35,16 @@ struct ovl_fs {
const struct cred *creator_cred;
bool tmpfile;
bool noxattr;
- wait_queue_head_t copyup_wq;
/* sb common to all layers */
struct super_block *same_sb;
};
/* private information held for every overlayfs dentry */
struct ovl_entry {
- struct dentry *__upperdentry;
- struct ovl_dir_cache *cache;
union {
struct {
- u64 version;
- const char *redirect;
+ unsigned long has_upper;
bool opaque;
- bool impure;
- bool copying;
};
struct rcu_head rcu;
};
@@ -54,7 +54,25 @@ struct ovl_entry {
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
-static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
+struct ovl_inode {
+ struct ovl_dir_cache *cache;
+ const char *redirect;
+ u64 version;
+ unsigned long flags;
+ struct inode vfs_inode;
+ struct dentry *__upperdentry;
+ struct inode *lower;
+
+ /* synchronize copy up and more */
+ struct mutex lock;
+};
+
+static inline struct ovl_inode *OVL_I(struct inode *inode)
+{
+ return container_of(inode, struct ovl_inode, vfs_inode);
+}
+
+static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
{
- return lockless_dereference(oe->__upperdentry);
+ return lockless_dereference(oi->__upperdentry);
}
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index f241b4ee3d8a..0298463cf9c3 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -667,3 +667,53 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
ovl_cleanup(dir, dentry);
}
}
+
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+ struct path *lowerstack, unsigned int numlower)
+{
+ int err;
+ struct inode *dir = dentry->d_inode;
+ struct path path = { .mnt = mnt, .dentry = dentry };
+ LIST_HEAD(list);
+ struct ovl_cache_entry *p;
+ struct ovl_readdir_data rdd = {
+ .ctx.actor = ovl_fill_merge,
+ .dentry = NULL,
+ .list = &list,
+ .root = RB_ROOT,
+ .is_lowest = false,
+ };
+
+ err = ovl_dir_read(&path, &rdd);
+ if (err)
+ goto out;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ list_for_each_entry(p, &list, l_node) {
+ struct dentry *index;
+
+ if (p->name[0] == '.') {
+ if (p->len == 1)
+ continue;
+ if (p->len == 2 && p->name[1] == '.')
+ continue;
+ }
+ index = lookup_one_len(p->name, dentry, p->len);
+ if (IS_ERR(index)) {
+ err = PTR_ERR(index);
+ break;
+ }
+ if (ovl_verify_index(index, lowerstack, numlower)) {
+ err = ovl_cleanup(dir, index);
+ if (err)
+ break;
+ }
+ dput(index);
+ }
+ inode_unlock(dir);
+out:
+ ovl_cache_free(&list);
+ if (err)
+ pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+ return err;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4882ffb37bae..44dc2d6ffe0f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -34,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(ovl_redirect_dir_def,
"Default to on or off for the redirect_dir feature");
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(ovl_index_def,
+ "Default to on or off for the inodes index feature");
+
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -41,8 +46,6 @@ static void ovl_dentry_release(struct dentry *dentry)
if (oe) {
unsigned int i;
- dput(oe->__upperdentry);
- kfree(oe->redirect);
for (i = 0; i < oe->numlower; i++)
dput(oe->lowerstack[i].dentry);
kfree_rcu(oe, rcu);
@@ -165,12 +168,52 @@ static const struct dentry_operations ovl_reval_dentry_operations = {
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
+static struct kmem_cache *ovl_inode_cachep;
+
+static struct inode *ovl_alloc_inode(struct super_block *sb)
+{
+ struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+
+ oi->cache = NULL;
+ oi->redirect = NULL;
+ oi->version = 0;
+ oi->flags = 0;
+ oi->__upperdentry = NULL;
+ oi->lower = NULL;
+ mutex_init(&oi->lock);
+
+ return &oi->vfs_inode;
+}
+
+static void ovl_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+
+ kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+}
+
+static void ovl_destroy_inode(struct inode *inode)
+{
+ struct ovl_inode *oi = OVL_I(inode);
+
+ dput(oi->__upperdentry);
+ kfree(oi->redirect);
+ mutex_destroy(&oi->lock);
+
+ call_rcu(&inode->i_rcu, ovl_i_callback);
+}
+
static void ovl_put_super(struct super_block *sb)
{
struct ovl_fs *ufs = sb->s_fs_info;
unsigned i;
+ dput(ufs->indexdir);
dput(ufs->workdir);
+ ovl_inuse_unlock(ufs->workbasedir);
+ dput(ufs->workbasedir);
+ if (ufs->upper_mnt)
+ ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
mntput(ufs->upper_mnt);
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
@@ -228,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
return err;
}
+/* Will this overlay be forced to mount/remount ro? */
+static bool ovl_force_readonly(struct ovl_fs *ufs)
+{
+ return (!ufs->upper_mnt || !ufs->workdir);
+}
+
/**
* ovl_show_options
*
@@ -249,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ufs->config.redirect_dir != ovl_redirect_dir_def)
seq_printf(m, ",redirect_dir=%s",
ufs->config.redirect_dir ? "on" : "off");
+ if (ufs->config.index != ovl_index_def)
+ seq_printf(m, ",index=%s",
+ ufs->config.index ? "on" : "off");
return 0;
}
@@ -256,19 +308,21 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ufs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
+ if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
return -EROFS;
return 0;
}
static const struct super_operations ovl_super_operations = {
+ .alloc_inode = ovl_alloc_inode,
+ .destroy_inode = ovl_destroy_inode,
+ .drop_inode = generic_delete_inode,
.put_super = ovl_put_super,
.sync_fs = ovl_sync_fs,
.statfs = ovl_statfs,
.show_options = ovl_show_options,
.remount_fs = ovl_remount,
- .drop_inode = generic_delete_inode,
};
enum {
@@ -278,6 +332,8 @@ enum {
OPT_DEFAULT_PERMISSIONS,
OPT_REDIRECT_DIR_ON,
OPT_REDIRECT_DIR_OFF,
+ OPT_INDEX_ON,
+ OPT_INDEX_OFF,
OPT_ERR,
};
@@ -288,6 +344,8 @@ static const match_table_t ovl_tokens = {
{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
{OPT_REDIRECT_DIR_ON, "redirect_dir=on"},
{OPT_REDIRECT_DIR_OFF, "redirect_dir=off"},
+ {OPT_INDEX_ON, "index=on"},
+ {OPT_INDEX_OFF, "index=off"},
{OPT_ERR, NULL}
};
@@ -360,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->redirect_dir = false;
break;
+ case OPT_INDEX_ON:
+ config->index = true;
+ break;
+
+ case OPT_INDEX_OFF:
+ config->index = false;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
@@ -378,23 +444,29 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
#define OVL_WORKDIR_NAME "work"
+#define OVL_INDEXDIR_NAME "index"
-static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
- struct dentry *dentry)
+static struct dentry *ovl_workdir_create(struct super_block *sb,
+ struct ovl_fs *ufs,
+ struct dentry *dentry,
+ const char *name, bool persist)
{
struct inode *dir = dentry->d_inode;
+ struct vfsmount *mnt = ufs->upper_mnt;
struct dentry *work;
int err;
bool retried = false;
+ bool locked = false;
err = mnt_want_write(mnt);
if (err)
- return ERR_PTR(err);
+ goto out_err;
inode_lock_nested(dir, I_MUTEX_PARENT);
+ locked = true;
+
retry:
- work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
- strlen(OVL_WORKDIR_NAME));
+ work = lookup_one_len(name, dentry, strlen(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -407,6 +479,9 @@ retry:
if (retried)
goto out_dput;
+ if (persist)
+ goto out_unlock;
+
retried = true;
ovl_workdir_cleanup(dir, mnt, work, 0);
dput(work);
@@ -446,16 +521,24 @@ retry:
inode_unlock(work->d_inode);
if (err)
goto out_dput;
+ } else {
+ err = PTR_ERR(work);
+ goto out_err;
}
out_unlock:
- inode_unlock(dir);
mnt_drop_write(mnt);
+ if (locked)
+ inode_unlock(dir);
return work;
out_dput:
dput(work);
- work = ERR_PTR(err);
+out_err:
+ pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ ufs->config.workdir, name, -err);
+ sb->s_flags |= MS_RDONLY;
+ work = NULL;
goto out_unlock;
}
@@ -555,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path,
if (ovl_dentry_remote(path->dentry))
*remote = true;
+ /*
+ * The inodes index feature needs to encode and decode file
+ * handles, so it requires that all layers support them.
+ */
+ if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
+ ofs->config.index = false;
+ pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
+ }
+
return 0;
out_put:
@@ -610,7 +702,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *realinode = ovl_inode_real(inode, NULL);
+ struct inode *realinode = ovl_inode_real(inode);
struct posix_acl *acl = NULL;
int err;
@@ -652,7 +744,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
err = ovl_xattr_set(dentry, handler->name, value, size, flags);
if (!err)
- ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+ ovl_copyattr(ovl_inode_real(inode), inode);
return err;
@@ -734,7 +826,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
struct path upperpath = { };
struct path workpath = { };
struct dentry *root_dentry;
- struct inode *realinode;
struct ovl_entry *oe;
struct ovl_fs *ufs;
struct path *stack = NULL;
@@ -752,8 +843,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ufs)
goto out;
- init_waitqueue_head(&ufs->copyup_wq);
ufs->config.redirect_dir = ovl_redirect_dir_def;
+ ufs->config.index = ovl_index_def;
err = ovl_parse_opt((char *) data, &ufs->config);
if (err)
goto out_free_config;
@@ -788,9 +879,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_put_upperpath;
+ err = -EBUSY;
+ if (!ovl_inuse_trylock(upperpath.dentry)) {
+ pr_err("overlayfs: upperdir is in-use by another mount\n");
+ goto out_put_upperpath;
+ }
+
err = ovl_mount_dir(ufs->config.workdir, &workpath);
if (err)
- goto out_put_upperpath;
+ goto out_unlock_upperdentry;
err = -EINVAL;
if (upperpath.mnt != workpath.mnt) {
@@ -801,12 +898,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
goto out_put_workpath;
}
+
+ err = -EBUSY;
+ if (!ovl_inuse_trylock(workpath.dentry)) {
+ pr_err("overlayfs: workdir is in-use by another mount\n");
+ goto out_put_workpath;
+ }
+
+ ufs->workbasedir = workpath.dentry;
sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
}
err = -ENOMEM;
lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
if (!lowertmp)
- goto out_put_workpath;
+ goto out_unlock_workdentry;
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
@@ -849,20 +954,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
pr_err("overlayfs: failed to clone upperpath\n");
goto out_put_lowerpath;
}
+
/* Don't inherit atime flags */
ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
- ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
- err = PTR_ERR(ufs->workdir);
- if (IS_ERR(ufs->workdir)) {
- pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
- ufs->config.workdir, OVL_WORKDIR_NAME, -err);
- sb->s_flags |= MS_RDONLY;
- ufs->workdir = NULL;
- }
-
+ ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+ OVL_WORKDIR_NAME, false);
/*
* Upper should support d_type, else whiteouts are visible.
* Given workdir and upper are on same fs, we can do
@@ -904,6 +1003,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
} else {
vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
}
+
+ /* Check if upper/work fs supports file handles */
+ if (ufs->config.index &&
+ !ovl_can_decode_fh(ufs->workdir->d_sb)) {
+ ufs->config.index = false;
+ pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+ }
}
}
@@ -941,6 +1047,44 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
ufs->same_sb = NULL;
+ if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
+ /* Verify lower root is upper root origin */
+ err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
+ stack[0].dentry, false, true);
+ if (err) {
+ pr_err("overlayfs: failed to verify upper root origin\n");
+ goto out_put_lower_mnt;
+ }
+
+ ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+ OVL_INDEXDIR_NAME, true);
+ err = PTR_ERR(ufs->indexdir);
+ if (IS_ERR(ufs->indexdir))
+ goto out_put_lower_mnt;
+
+ if (ufs->indexdir) {
+ /* Verify upper root is index dir origin */
+ err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
+ upperpath.dentry, true, true);
+ if (err)
+ pr_err("overlayfs: failed to verify index dir origin\n");
+
+ /* Cleanup bad/stale/orphan index entries */
+ if (!err)
+ err = ovl_indexdir_cleanup(ufs->indexdir,
+ ufs->upper_mnt,
+ stack, numlower);
+ }
+ if (err || !ufs->indexdir)
+ pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+ if (err)
+ goto out_put_indexdir;
+ }
+
+ /* Show index=off/on in /proc/mounts for any of the reasons above */
+ if (!ufs->indexdir)
+ ufs->config.index = false;
+
if (remote)
sb->s_d_op = &ovl_reval_dentry_operations;
else
@@ -948,7 +1092,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->creator_cred = cred = prepare_creds();
if (!cred)
- goto out_put_lower_mnt;
+ goto out_put_indexdir;
/* Never override disk quota limits or use reserved space */
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
@@ -971,12 +1115,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
mntput(upperpath.mnt);
for (i = 0; i < numlower; i++)
mntput(stack[i].mnt);
- path_put(&workpath);
+ mntput(workpath.mnt);
kfree(lowertmp);
if (upperpath.dentry) {
- oe->__upperdentry = upperpath.dentry;
- oe->impure = ovl_is_impuredir(upperpath.dentry);
+ oe->has_upper = true;
+ if (ovl_is_impuredir(upperpath.dentry))
+ ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
}
for (i = 0; i < numlower; i++) {
oe->lowerstack[i].dentry = stack[i].dentry;
@@ -986,9 +1131,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
root_dentry->d_fsdata = oe;
- realinode = d_inode(ovl_dentry_real(root_dentry));
- ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
- ovl_copyattr(realinode, d_inode(root_dentry));
+ ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
+ ovl_dentry_lower(root_dentry));
sb->s_root = root_dentry;
@@ -998,6 +1142,8 @@ out_free_oe:
kfree(oe);
out_put_cred:
put_cred(ufs->creator_cred);
+out_put_indexdir:
+ dput(ufs->indexdir);
out_put_lower_mnt:
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
@@ -1011,8 +1157,12 @@ out_put_lowerpath:
kfree(stack);
out_free_lowertmp:
kfree(lowertmp);
+out_unlock_workdentry:
+ ovl_inuse_unlock(workpath.dentry);
out_put_workpath:
path_put(&workpath);
+out_unlock_upperdentry:
+ ovl_inuse_unlock(upperpath.dentry);
out_put_upperpath:
path_put(&upperpath);
out_free_config:
@@ -1038,14 +1188,43 @@ static struct file_system_type ovl_fs_type = {
};
MODULE_ALIAS_FS("overlay");
+static void ovl_inode_init_once(void *foo)
+{
+ struct ovl_inode *oi = foo;
+
+ inode_init_once(&oi->vfs_inode);
+}
+
static int __init ovl_init(void)
{
- return register_filesystem(&ovl_fs_type);
+ int err;
+
+ ovl_inode_cachep = kmem_cache_create("ovl_inode",
+ sizeof(struct ovl_inode), 0,
+ (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ ovl_inode_init_once);
+ if (ovl_inode_cachep == NULL)
+ return -ENOMEM;
+
+ err = register_filesystem(&ovl_fs_type);
+ if (err)
+ kmem_cache_destroy(ovl_inode_cachep);
+
+ return err;
}
static void __exit ovl_exit(void)
{
unregister_filesystem(&ovl_fs_type);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(ovl_inode_cachep);
+
}
module_init(ovl_init);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 809048913889..c492ba75c659 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,6 +12,10 @@
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/uuid.h>
+#include <linux/namei.h>
+#include <linux/ratelimit.h>
#include "overlayfs.h"
#include "ovl_entry.h"
@@ -47,6 +51,19 @@ struct super_block *ovl_same_sb(struct super_block *sb)
return ofs->same_sb;
}
+bool ovl_can_decode_fh(struct super_block *sb)
+{
+ return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+ !uuid_is_null(&sb->s_uuid));
+}
+
+struct dentry *ovl_indexdir(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return ofs->indexdir;
+}
+
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -78,7 +95,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
struct ovl_entry *oe = dentry->d_fsdata;
enum ovl_path_type type = 0;
- if (oe->__upperdentry) {
+ if (ovl_dentry_upper(dentry)) {
type = __OVL_PATH_UPPER;
/*
@@ -99,10 +116,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
void ovl_path_upper(struct dentry *dentry, struct path *path)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct ovl_entry *oe = dentry->d_fsdata;
path->mnt = ofs->upper_mnt;
- path->dentry = ovl_upperdentry_dereference(oe);
+ path->dentry = ovl_dentry_upper(dentry);
}
void ovl_path_lower(struct dentry *dentry, struct path *path)
@@ -126,47 +142,47 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
+ return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
+}
+
+struct dentry *ovl_dentry_lower(struct dentry *dentry)
+{
struct ovl_entry *oe = dentry->d_fsdata;
- return ovl_upperdentry_dereference(oe);
+ return oe->numlower ? oe->lowerstack[0].dentry : NULL;
}
-static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
+struct dentry *ovl_dentry_real(struct dentry *dentry)
{
- return oe->numlower ? oe->lowerstack[0].dentry : NULL;
+ return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
}
-struct dentry *ovl_dentry_lower(struct dentry *dentry)
+struct inode *ovl_inode_upper(struct inode *inode)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode));
- return __ovl_dentry_lower(oe);
+ return upperdentry ? d_inode(upperdentry) : NULL;
}
-struct dentry *ovl_dentry_real(struct dentry *dentry)
+struct inode *ovl_inode_lower(struct inode *inode)
{
- struct ovl_entry *oe = dentry->d_fsdata;
- struct dentry *realdentry;
-
- realdentry = ovl_upperdentry_dereference(oe);
- if (!realdentry)
- realdentry = __ovl_dentry_lower(oe);
+ return OVL_I(inode)->lower;
+}
- return realdentry;
+struct inode *ovl_inode_real(struct inode *inode)
+{
+ return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
}
+
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return oe->cache;
+ return OVL_I(d_inode(dentry))->cache;
}
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- oe->cache = cache;
+ OVL_I(d_inode(dentry))->cache = cache;
}
bool ovl_dentry_is_opaque(struct dentry *dentry)
@@ -175,23 +191,35 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
return oe->opaque;
}
-bool ovl_dentry_is_impure(struct dentry *dentry)
+bool ovl_dentry_is_whiteout(struct dentry *dentry)
+{
+ return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
+}
+
+void ovl_dentry_set_opaque(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- return oe->impure;
+ oe->opaque = true;
}
-bool ovl_dentry_is_whiteout(struct dentry *dentry)
+/*
+ * For hard links it's possible for ovl_dentry_upper() to return positive, while
+ * there's no actual upper alias for the inode. Copy up code needs to know
+ * about the existence of the upper alias, so it can't use ovl_dentry_upper().
+ */
+bool ovl_dentry_has_upper_alias(struct dentry *dentry)
{
- return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->has_upper;
}
-void ovl_dentry_set_opaque(struct dentry *dentry)
+void ovl_dentry_set_upper_alias(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- oe->opaque = true;
+ oe->has_upper = true;
}
bool ovl_redirect_dir(struct super_block *sb)
@@ -203,63 +231,59 @@ bool ovl_redirect_dir(struct super_block *sb)
const char *ovl_dentry_get_redirect(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return oe->redirect;
+ return OVL_I(d_inode(dentry))->redirect;
}
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_inode *oi = OVL_I(d_inode(dentry));
- kfree(oe->redirect);
- oe->redirect = redirect;
+ kfree(oi->redirect);
+ oi->redirect = redirect;
}
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+ struct dentry *lowerdentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ if (upperdentry)
+ OVL_I(inode)->__upperdentry = upperdentry;
+ if (lowerdentry)
+ OVL_I(inode)->lower = d_inode(lowerdentry);
- WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
- WARN_ON(oe->__upperdentry);
- /*
- * Make sure upperdentry is consistent before making it visible to
- * ovl_upperdentry_dereference().
- */
- smp_wmb();
- oe->__upperdentry = upperdentry;
+ ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
}
-void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper)
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
- WRITE_ONCE(inode->i_private, (unsigned long) realinode |
- (is_upper ? OVL_ISUPPER_MASK : 0));
-}
+ struct inode *upperinode = d_inode(upperdentry);
-void ovl_inode_update(struct inode *inode, struct inode *upperinode)
-{
- WARN_ON(!upperinode);
- WARN_ON(!inode_unhashed(inode));
- WRITE_ONCE(inode->i_private,
- (unsigned long) upperinode | OVL_ISUPPER_MASK);
- if (!S_ISDIR(upperinode->i_mode))
+ WARN_ON(OVL_I(inode)->__upperdentry);
+
+ /*
+ * Make sure upperdentry is consistent before making it visible
+ */
+ smp_wmb();
+ OVL_I(inode)->__upperdentry = upperdentry;
+ if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
+ inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
+ }
}
void ovl_dentry_version_inc(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct inode *inode = d_inode(dentry);
- WARN_ON(!inode_is_locked(dentry->d_inode));
- oe->version++;
+ WARN_ON(!inode_is_locked(inode));
+ OVL_I(inode)->version++;
}
u64 ovl_dentry_version_get(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct inode *inode = d_inode(dentry);
- WARN_ON(!inode_is_locked(dentry->d_inode));
- return oe->version;
+ WARN_ON(!inode_is_locked(inode));
+ return OVL_I(inode)->version;
}
bool ovl_is_whiteout(struct dentry *dentry)
@@ -276,32 +300,21 @@ struct file *ovl_path_open(struct path *path, int flags)
int ovl_copy_up_start(struct dentry *dentry)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_inode *oi = OVL_I(d_inode(dentry));
int err;
- spin_lock(&ofs->copyup_wq.lock);
- err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
- if (!err) {
- if (oe->__upperdentry)
- err = 1; /* Already copied up */
- else
- oe->copying = true;
+ err = mutex_lock_interruptible(&oi->lock);
+ if (!err && ovl_dentry_has_upper_alias(dentry)) {
+ err = 1; /* Already copied up */
+ mutex_unlock(&oi->lock);
}
- spin_unlock(&ofs->copyup_wq.lock);
return err;
}
void ovl_copy_up_end(struct dentry *dentry)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct ovl_entry *oe = dentry->d_fsdata;
-
- spin_lock(&ofs->copyup_wq.lock);
- oe->copying = false;
- wake_up_locked(&ofs->copyup_wq);
- spin_unlock(&ofs->copyup_wq.lock);
+ mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
@@ -343,9 +356,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
{
int err;
- struct ovl_entry *oe = dentry->d_fsdata;
- if (oe->impure)
+ if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
return 0;
/*
@@ -355,7 +367,176 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
"y", 1, 0);
if (!err)
- oe->impure = true;
+ ovl_set_flag(OVL_IMPURE, d_inode(dentry));
return err;
}
+
+void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+ set_bit(flag, &OVL_I(inode)->flags);
+}
+
+bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+ return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ bool locked = false;
+
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_state & I_OVL_INUSE)) {
+ inode->i_state |= I_OVL_INUSE;
+ locked = true;
+ }
+ spin_unlock(&inode->i_lock);
+
+ return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+ if (dentry) {
+ struct inode *inode = d_inode(dentry);
+
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_OVL_INUSE));
+ inode->i_state &= ~I_OVL_INUSE;
+ spin_unlock(&inode->i_lock);
+ }
+}
+
+/* Called must hold OVL_I(inode)->oi_lock */
+static void ovl_cleanup_index(struct dentry *dentry)
+{
+ struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
+ struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+ struct dentry *upperdentry = ovl_dentry_upper(dentry);
+ struct dentry *index = NULL;
+ struct inode *inode;
+ struct qstr name;
+ int err;
+
+ err = ovl_get_index_name(lowerdentry, &name);
+ if (err)
+ goto fail;
+
+ inode = d_inode(upperdentry);
+ if (inode->i_nlink != 1) {
+ pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+ upperdentry, inode->i_ino, inode->i_nlink);
+ /*
+ * We either have a bug with persistent union nlink or a lower
+ * hardlink was added while overlay is mounted. Adding a lower
+ * hardlink and then unlinking all overlay hardlinks would drop
+ * overlay nlink to zero before all upper inodes are unlinked.
+ * As a safety measure, when that situation is detected, set
+ * the overlay nlink to the index inode nlink minus one for the
+ * index entry itself.
+ */
+ set_nlink(d_inode(dentry), inode->i_nlink - 1);
+ ovl_set_nlink_upper(dentry);
+ goto out;
+ }
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ /* TODO: whiteout instead of cleanup to block future open by handle */
+ index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
+ err = PTR_ERR(index);
+ if (!IS_ERR(index))
+ err = ovl_cleanup(dir, index);
+ inode_unlock(dir);
+ if (err)
+ goto fail;
+
+out:
+ dput(index);
+ return;
+
+fail:
+ pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+ goto out;
+}
+
+/*
+ * Operations that change overlay inode and upper inode nlink need to be
+ * synchronized with copy up for persistent nlink accounting.
+ */
+int ovl_nlink_start(struct dentry *dentry, bool *locked)
+{
+ struct ovl_inode *oi = OVL_I(d_inode(dentry));
+ const struct cred *old_cred;
+ int err;
+
+ if (!d_inode(dentry) || d_is_dir(dentry))
+ return 0;
+
+ /*
+ * With inodes index is enabled, we store the union overlay nlink
+ * in an xattr on the index inode. When whiting out lower hardlinks
+ * we need to decrement the overlay persistent nlink, but before the
+ * first copy up, we have no upper index inode to store the xattr.
+ *
+ * As a workaround, before whiteout/rename over of a lower hardlink,
+ * copy up to create the upper index. Creating the upper index will
+ * initialize the overlay nlink, so it could be dropped if unlink
+ * or rename succeeds.
+ *
+ * TODO: implement metadata only index copy up when called with
+ * ovl_copy_up_flags(dentry, O_PATH).
+ */
+ if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
+ d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
+ err = ovl_copy_up(dentry);
+ if (err)
+ return err;
+ }
+
+ err = mutex_lock_interruptible(&oi->lock);
+ if (err)
+ return err;
+
+ if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ goto out;
+
+ old_cred = ovl_override_creds(dentry->d_sb);
+ /*
+ * The overlay inode nlink should be incremented/decremented IFF the
+ * upper operation succeeds, along with nlink change of upper inode.
+ * Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in an upper inode xattr.
+ */
+ err = ovl_set_nlink_upper(dentry);
+ revert_creds(old_cred);
+
+out:
+ if (err)
+ mutex_unlock(&oi->lock);
+ else
+ *locked = true;
+
+ return err;
+}
+
+void ovl_nlink_end(struct dentry *dentry, bool locked)
+{
+ if (locked) {
+ if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
+ d_inode(dentry)->i_nlink == 0) {
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(dentry->d_sb);
+ ovl_cleanup_index(dentry);
+ revert_creds(old_cred);
+ }
+
+ mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+ }
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78e1dbbe4cfd..976aaa1af82a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1955,6 +1955,9 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* wb stat updates to grab mapping->tree_lock. See
* inode_switch_wb_work_fn() for details.
*
+ * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
+ * and work dirs among overlayfs mounts.
+ *
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
#define I_DIRTY_SYNC (1 << 0)
@@ -1975,6 +1978,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
#define __I_DIRTY_TIME_EXPIRED 12
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_WB_SWITCH (1 << 13)
+#define I_OVL_INUSE (1 << 14)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)