diff options
-rw-r--r-- | Documentation/filesystems/overlayfs.txt | 106 | ||||
-rw-r--r-- | fs/dcache.c | 88 | ||||
-rw-r--r-- | fs/overlayfs/Kconfig | 31 | ||||
-rw-r--r-- | fs/overlayfs/Makefile | 3 | ||||
-rw-r--r-- | fs/overlayfs/copy_up.c | 188 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 175 | ||||
-rw-r--r-- | fs/overlayfs/export.c | 715 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 106 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 533 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 66 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 11 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 57 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 125 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 108 | ||||
-rw-r--r-- | include/linux/dcache.h | 2 |
15 files changed, 1905 insertions, 409 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index e6a5f4912b6d..6ea1e64d1464 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -190,6 +190,20 @@ Mount options: Redirects are not created and not followed (equivalent to "redirect_dir=off" if "redirect_always_follow" feature is not enabled). +When the NFS export feature is enabled, every copied up directory is +indexed by the file handle of the lower inode and a file handle of the +upper directory is stored in a "trusted.overlay.upper" extended attribute +on the index entry. On lookup of a merged directory, if the upper +directory does not match the file handle stores in the index, that is an +indication that multiple upper directories may be redirected to the same +lower directory. In that case, lookup returns an error and warns about +a possible inconsistency. + +Because lower layer redirects cannot be verified with the index, enabling +NFS export support on an overlay filesystem with no upper layer requires +turning off redirect follow (e.g. "redirect_dir=nofollow"). + + Non-directories --------------- @@ -281,9 +295,9 @@ filesystem, so both st_dev and st_ino of the file may change. Any open files referring to this inode will access the old data. -If a file with multiple hard links is copied up, then this will -"break" the link. Changes will not be propagated to other names -referring to the same inode. +Unless "inode index" feature is enabled, if a file with multiple hard +links is copied up, then this will "break" the link. Changes will not be +propagated to other names referring to the same inode. Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged directory will fail with EXDEV. @@ -299,6 +313,92 @@ filesystem are not allowed. If the underlying filesystem is changed, the behavior of the overlay is undefined, though it will not result in a crash or deadlock. +When the overlay NFS export feature is enabled, overlay filesystems +behavior on offline changes of the underlying lower layer is different +than the behavior when NFS export is disabled. + +On every copy_up, an NFS file handle of the lower inode, along with the +UUID of the lower filesystem, are encoded and stored in an extended +attribute "trusted.overlay.origin" on the upper inode. + +When the NFS export feature is enabled, a lookup of a merged directory, +that found a lower directory at the lookup path or at the path pointed +to by the "trusted.overlay.redirect" extended attribute, will verify +that the found lower directory file handle and lower filesystem UUID +match the origin file handle that was stored at copy_up time. If a +found lower directory does not match the stored origin, that directory +will not be merged with the upper directory. + + + +NFS export +---------- + +When the underlying filesystems supports NFS export and the "nfs_export" +feature is enabled, an overlay filesystem may be exported to NFS. + +With the "nfs_export" feature, on copy_up of any lower object, an index +entry is created under the index directory. The index entry name is the +hexadecimal representation of the copy up origin file handle. For a +non-directory object, the index entry is a hard link to the upper inode. +For a directory object, the index entry has an extended attribute +"trusted.overlay.upper" with an encoded file handle of the upper +directory inode. + +When encoding a file handle from an overlay filesystem object, the +following rules apply: + +1. For a non-upper object, encode a lower file handle from lower inode +2. For an indexed object, encode a lower file handle from copy_up origin +3. For a pure-upper object and for an existing non-indexed upper object, + encode an upper file handle from upper inode + +The encoded overlay file handle includes: + - Header including path type information (e.g. lower/upper) + - UUID of the underlying filesystem + - Underlying filesystem encoding of underlying inode + +This encoding format is identical to the encoding format file handles that +are stored in extended attribute "trusted.overlay.origin". + +When decoding an overlay file handle, the following steps are followed: + +1. Find underlying layer by UUID and path type information. +2. Decode the underlying filesystem file handle to underlying dentry. +3. For a lower file handle, lookup the handle in index directory by name. +4. If a whiteout is found in index, return ESTALE. This represents an + overlay object that was deleted after its file handle was encoded. +5. For a non-directory, instantiate a disconnected overlay dentry from the + decoded underlying dentry, the path type and index inode, if found. +6. For a directory, use the connected underlying decoded dentry, path type + and index, to lookup a connected overlay dentry. + +Decoding a non-directory file handle may return a disconnected dentry. +copy_up of that disconnected dentry will create an upper index entry with +no upper alias. + +When overlay filesystem has multiple lower layers, a middle layer +directory may have a "redirect" to lower directory. Because middle layer +"redirects" are not indexed, a lower file handle that was encoded from the +"redirect" origin directory, cannot be used to find the middle or upper +layer directory. Similarly, a lower file handle that was encoded from a +descendant of the "redirect" origin directory, cannot be used to +reconstruct a connected overlay path. To mitigate the cases of +directories that cannot be decoded from a lower file handle, these +directories are copied up on encode and encoded as an upper file handle. +On an overlay filesystem with no upper layer this mitigation cannot be +used NFS export in this setup requires turning off redirect follow (e.g. +"redirect_dir=nofollow"). + +The overlay filesystem does not support non-directory connectable file +handles, so exporting with the 'subtree_check' exportfs configuration will +cause failures to lookup files over NFS. + +When the NFS export feature is enabled, all directory index entries are +verified on mount time to check that upper file handles are not stale. +This verification may cause significant overhead in some cases. + + Testsuite --------- diff --git a/fs/dcache.c b/fs/dcache.c index cca2b377ff0a..7c38f39958bc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1698,9 +1698,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_anon(struct super_block *sb) +{ + return __d_alloc(sb, NULL); +} +EXPORT_SYMBOL(d_alloc_anon); + struct dentry *d_alloc_cursor(struct dentry * parent) { - struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + struct dentry *dentry = d_alloc_anon(parent->d_sb); if (dentry) { dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; dentry->d_parent = dget(parent); @@ -1886,7 +1892,7 @@ struct dentry *d_make_root(struct inode *root_inode) struct dentry *res = NULL; if (root_inode) { - res = __d_alloc(root_inode->i_sb, NULL); + res = d_alloc_anon(root_inode->i_sb); if (res) d_instantiate(res, root_inode); else @@ -1925,33 +1931,19 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) +static struct dentry *__d_instantiate_anon(struct dentry *dentry, + struct inode *inode, + bool disconnected) { - struct dentry *tmp; struct dentry *res; unsigned add_flags; - if (!inode) - return ERR_PTR(-ESTALE); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - res = d_find_any_alias(inode); - if (res) - goto out_iput; - - tmp = __d_alloc(inode->i_sb, NULL); - if (!tmp) { - res = ERR_PTR(-ENOMEM); - goto out_iput; - } - - security_d_instantiate(tmp, inode); + security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); if (res) { spin_unlock(&inode->i_lock); - dput(tmp); + dput(dentry); goto out_iput; } @@ -1961,24 +1953,57 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) if (disconnected) add_flags |= DCACHE_DISCONNECTED; - spin_lock(&tmp->d_lock); - __d_set_inode_and_type(tmp, inode, add_flags); - hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); + spin_lock(&dentry->d_lock); + __d_set_inode_and_type(dentry, inode, add_flags); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); if (!disconnected) { - hlist_bl_lock(&tmp->d_sb->s_roots); - hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots); - hlist_bl_unlock(&tmp->d_sb->s_roots); + hlist_bl_lock(&dentry->d_sb->s_roots); + hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots); + hlist_bl_unlock(&dentry->d_sb->s_roots); } - spin_unlock(&tmp->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); - return tmp; + return dentry; out_iput: iput(inode); return res; } +struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode) +{ + return __d_instantiate_anon(dentry, inode, true); +} +EXPORT_SYMBOL(d_instantiate_anon); + +static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) +{ + struct dentry *tmp; + struct dentry *res; + + if (!inode) + return ERR_PTR(-ESTALE); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + res = d_find_any_alias(inode); + if (res) + goto out_iput; + + tmp = d_alloc_anon(inode->i_sb); + if (!tmp) { + res = ERR_PTR(-ENOMEM); + goto out_iput; + } + + return __d_instantiate_anon(tmp, inode, disconnected); + +out_iput: + iput(inode); + return res; +} + /** * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode * @inode: inode to allocate the dentry for @@ -1999,7 +2024,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) */ struct dentry *d_obtain_alias(struct inode *inode) { - return __d_obtain_alias(inode, 1); + return __d_obtain_alias(inode, true); } EXPORT_SYMBOL(d_obtain_alias); @@ -2020,7 +2045,7 @@ EXPORT_SYMBOL(d_obtain_alias); */ struct dentry *d_obtain_root(struct inode *inode) { - return __d_obtain_alias(inode, 0); + return __d_obtain_alias(inode, false); } EXPORT_SYMBOL(d_obtain_root); @@ -3527,6 +3552,7 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) return result; } +EXPORT_SYMBOL(is_subdir); static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 5ac415466861..406e72de88f6 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -47,9 +47,28 @@ config OVERLAY_FS_INDEX The inodes index feature prevents breaking of lower hardlinks on copy up. - Note, that the inodes index feature is read-only backward compatible. - That is, mounting an overlay which has an index dir on a kernel that - doesn't support this feature read-only, will not have any negative - outcomes. However, mounting the same overlay with an old kernel - read-write and then mounting it again with a new kernel, will have - unexpected results. + Note, that the inodes index feature is not backward compatible. + That is, mounting an overlay which has an inodes index on a kernel + that doesn't support this feature will have unexpected results. + +config OVERLAY_FS_NFS_EXPORT + bool "Overlayfs: turn on NFS export feature by default" + depends on OVERLAY_FS + depends on OVERLAY_FS_INDEX + help + If this config option is enabled then overlay filesystems will use + the inodes index dir to decode overlay NFS file handles by default. + In this case, it is still possible to turn off NFS export support + globally with the "nfs_export=off" module option or on a filesystem + instance basis with the "nfs_export=off" mount option. + + The NFS export feature creates an index on copy up of every file and + directory. This full index is used to detect overlay filesystems + inconsistencies on lookup, like redirect from multiple upper dirs to + the same lower dir. The full index may incur some overhead on mount + time, especially when verifying that directory file handles are not + stale. + + Note, that the NFS export feature is not backward compatible. + That is, mounting an overlay which has a full index on a kernel + that doesn't support this feature will have unexpected results. diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 99373bbc1478..30802347a020 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \ + export.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index eb3b8d39fb61..d855f508fa20 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -232,13 +232,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) +struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper) { struct ovl_fh *fh; int fh_type, fh_len, dwords; void *buf; int buflen = MAX_HANDLE_SZ; - uuid_t *uuid = &lower->d_sb->s_uuid; + uuid_t *uuid = &real->d_sb->s_uuid; buf = kmalloc(buflen, GFP_KERNEL); if (!buf) @@ -250,7 +250,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) * the price or reconnecting the dentry. */ dwords = buflen >> 2; - fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); + fh_type = exportfs_encode_fh(real, buf, &dwords, 0); buflen = (dwords << 2); fh = ERR_PTR(-EIO); @@ -288,8 +288,8 @@ out: return fh; } -static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, - struct dentry *upper) +int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; @@ -315,6 +315,94 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, return err; } +/* Store file handle of @upper dir in @index dir entry */ +static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index) +{ + const struct ovl_fh *fh; + int err; + + fh = ovl_encode_fh(upper, true); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0); + + kfree(fh); + return err; +} + +/* + * Create and install index entry. + * + * Caller must hold i_mutex on indexdir. + */ +static int ovl_create_index(struct dentry *dentry, struct dentry *origin, + struct dentry *upper) +{ + struct dentry *indexdir = ovl_indexdir(dentry->d_sb); + struct inode *dir = d_inode(indexdir); + struct dentry *index = NULL; + struct dentry *temp = NULL; + struct qstr name = { }; + int err; + + /* + * For now this is only used for creating index entry for directories, + * because non-dir are copied up directly to index and then hardlinked + * to upper dir. + * + * TODO: implement create index for non-dir, so we can call it when + * encoding file handle for non-dir in case index does not exist. + */ + if (WARN_ON(!d_is_dir(dentry))) + return -EIO; + + /* Directory not expected to be indexed before copy up */ + if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) + return -EIO; + + err = ovl_get_index_name(origin, &name); + if (err) + return err; + + temp = ovl_lookup_temp(indexdir); + if (IS_ERR(temp)) + goto temp_err; + + err = ovl_do_mkdir(dir, temp, S_IFDIR, true); + if (err) + goto out; + + err = ovl_set_upper_fh(upper, temp); + if (err) + goto out_cleanup; + + index = lookup_one_len(name.name, indexdir, name.len); + if (IS_ERR(index)) { + err = PTR_ERR(index); + } else { + err = ovl_do_rename(dir, temp, dir, index, 0); + dput(index); + } + + if (err) + goto out_cleanup; + +out: + dput(temp); + kfree(name.name); + return err; + +temp_err: + err = PTR_ERR(temp); + temp = NULL; + goto out; + +out_cleanup: + ovl_cleanup(dir, temp); + goto out; +} + struct ovl_copy_up_ctx { struct dentry *parent; struct dentry *dentry; @@ -327,6 +415,7 @@ struct ovl_copy_up_ctx { struct dentry *workdir; bool tmpfile; bool origin; + bool indexed; }; static int ovl_link_up(struct ovl_copy_up_ctx *c) @@ -361,7 +450,10 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) } } inode_unlock(udir); - ovl_set_nlink_upper(c->dentry); + if (err) + return err; + + err = ovl_set_nlink_upper(c->dentry); return err; } @@ -498,6 +590,12 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) if (err) goto out_cleanup; + if (S_ISDIR(c->stat.mode) && c->indexed) { + err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); + if (err) + goto out_cleanup; + } + if (c->tmpfile) { inode_lock_nested(udir, I_MUTEX_PARENT); err = ovl_install_temp(c, temp, &newdentry); @@ -536,20 +634,33 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; - bool indexed = false; + bool to_index = false; - if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) && - c->stat.nlink > 1) - indexed = true; + /* + * Indexed non-dir is copied up directly to the index entry and then + * hardlinked to upper dir. Indexed dir is copied up to indexdir, + * then index entry is created and then copied up dir installed. + * Copying dir up to indexdir instead of workdir simplifies locking. + */ + if (ovl_need_index(c->dentry)) { + c->indexed = true; + if (S_ISDIR(c->stat.mode)) + c->workdir = ovl_indexdir(c->dentry->d_sb); + else + to_index = true; + } - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) c->origin = true; - if (indexed) { + if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); if (err) return err; + } else if (WARN_ON(!c->parent)) { + /* Disconnected dentry must be copied up to index dir */ + return -EIO; } else { /* * Mark parent "impure" because it may now contain non-pure @@ -572,11 +683,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) } } - if (indexed) { - if (!err) - ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); - kfree(c->destname.name); - } else if (!err) { + + if (err) + goto out; + + if (c->indexed) + ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + + if (to_index) { + /* Initialize nlink for copy up of disconnected dentry */ + err = ovl_set_nlink_upper(c->dentry); + } else { struct inode *udir = d_inode(c->destdir); /* Restore timestamps on parent (best effort) */ @@ -587,6 +704,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); } +out: + if (to_index) + kfree(c->destname.name); return err; } @@ -611,14 +731,17 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, if (err) return err; - ovl_path_upper(parent, &parentpath); - ctx.destdir = parentpath.dentry; - ctx.destname = dentry->d_name; + if (parent) { + ovl_path_upper(parent, &parentpath); + ctx.destdir = parentpath.dentry; + ctx.destname = dentry->d_name; - err = vfs_getattr(&parentpath, &ctx.pstat, - STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); - if (err) - return err; + err = vfs_getattr(&parentpath, &ctx.pstat, + STATX_ATIME | STATX_MTIME, + AT_STATX_SYNC_AS_STAT); + if (err) + return err; + } /* maybe truncate regular file. this has no effect on dirs */ if (flags & O_TRUNC) @@ -639,7 +762,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } else { if (!ovl_dentry_upper(dentry)) err = ovl_do_copy_up(&ctx); - if (!err && !ovl_dentry_has_upper_alias(dentry)) + if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) err = ovl_link_up(&ctx); ovl_copy_up_end(dentry); } @@ -652,10 +775,19 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; const struct cred *old_cred = ovl_override_creds(dentry->d_sb); + bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED); + + /* + * With NFS export, copy up can get called for a disconnected non-dir. + * In this case, we will copy up lower inode to index dir without + * linking it to upper dir. + */ + if (WARN_ON(disconnected && d_is_dir(dentry))) + return -EIO; while (!err) { struct dentry *next; - struct dentry *parent; + struct dentry *parent = NULL; /* * Check if copy-up has happened as well as for upper alias (in @@ -671,12 +803,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) * with rename. */ if (ovl_dentry_upper(dentry) && - ovl_dentry_has_upper_alias(dentry)) + (ovl_dentry_has_upper_alias(dentry) || disconnected)) break; next = dget(dentry); /* find the topmost dentry not yet copied up */ - for (;;) { + for (; !disconnected;) { parent = dget_parent(next); if (ovl_dentry_upper(parent)) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f9788bc116a8..839709c7803a 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -63,8 +63,7 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir) } /* caller holds i_mutex on workdir */ -static struct dentry *ovl_whiteout(struct dentry *workdir, - struct dentry *dentry) +static struct dentry *ovl_whiteout(struct dentry *workdir) { int err; struct dentry *whiteout; @@ -83,6 +82,38 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, return whiteout; } +/* Caller must hold i_mutex on both workdir and dir */ +int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, + struct dentry *dentry) +{ + struct inode *wdir = workdir->d_inode; + struct dentry *whiteout; + int err; + int flags = 0; + + whiteout = ovl_whiteout(workdir); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + return err; + + if (d_is_dir(dentry)) + flags = RENAME_EXCHANGE; + + err = ovl_do_rename(wdir, whiteout, dir, dentry, flags); + if (err) + goto kill_whiteout; + if (flags) + ovl_cleanup(wdir, dentry); + +out: + dput(whiteout); + return err; + +kill_whiteout: + ovl_cleanup(wdir, whiteout); + goto out; +} + int ovl_create_real(struct inode *dir, struct dentry *newdentry, struct cattr *attr, struct dentry *hardlink, bool debug) { @@ -181,11 +212,6 @@ static bool ovl_type_origin(struct dentry *dentry) return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); } -static bool ovl_may_have_whiteouts(struct dentry *dentry) -{ - return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); -} - static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -301,37 +327,6 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) -{ - int err; - struct dentry *ret = NULL; - LIST_HEAD(list); - - err = ovl_check_empty_dir(dentry, &list); - if (err) { - ret = ERR_PTR(err); - goto out_free; - } - - /* - * When removing an empty opaque directory, then it makes no sense to - * replace it with an exact replica of itself. - * - * If upperdentry has whiteouts, clear them. - * - * Can race with copy-up, since we don't hold the upperdir mutex. - * Doesn't matter, since copy-up can't create a non-empty directory - * from an empty one. - */ - if (!list_empty(&list)) - ret = ovl_clear_empty(dentry, &list); - -out_free: - ovl_cache_free(&list); - - return ret; -} - static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, const struct posix_acl *acl) { @@ -623,23 +618,20 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper) return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper); } -static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, + struct list_head *list) { struct dentry *workdir = ovl_workdir(dentry); - struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; - struct dentry *whiteout; struct dentry *upper; struct dentry *opaquedir = NULL; int err; - int flags = 0; if (WARN_ON(!workdir)) return -EROFS; - if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry); + if (!list_empty(list)) { + opaquedir = ovl_clear_empty(dentry, list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -662,24 +654,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) goto out_dput_upper; } - whiteout = ovl_whiteout(workdir, dentry); - err = PTR_ERR(whiteout); - if (IS_ERR(whiteout)) - goto out_dput_upper; - - if (d_is_dir(upper)) - flags = RENAME_EXCHANGE; - - err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper); if (err) - goto kill_whiteout; - if (flags) - ovl_cleanup(wdir, upper); + goto out_d_drop; ovl_dentry_version_inc(dentry->d_parent, true); out_d_drop: d_drop(dentry); - dput(whiteout); out_dput_upper: dput(upper); out_unlock: @@ -688,13 +669,10 @@ out_dput: dput(opaquedir); out: return err; - -kill_whiteout: - ovl_cleanup(wdir, whiteout); - goto out_d_drop; } -static int ovl_remove_upper(struct dentry *dentry, bool is_dir) +static int ovl_remove_upper(struct dentry *dentry, bool is_dir, + struct list_head *list) { struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; @@ -702,10 +680,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - /* Redirect/origin dir can be !ovl_lower_positive && not clean */ - if (is_dir && (ovl_dentry_get_redirect(dentry) || - ovl_may_have_whiteouts(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); + if (!list_empty(list)) { + opaquedir = ovl_clear_empty(dentry, list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -746,11 +722,26 @@ out: return err; } +static bool ovl_pure_upper(struct dentry *dentry) +{ + return !ovl_dentry_lower(dentry) && + !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); +} + static int ovl_do_remove(struct dentry *dentry, bool is_dir) { int err; bool locked = false; const struct cred *old_cred; + bool lower_positive = ovl_lower_positive(dentry); + LIST_HEAD(list); + + /* No need to clean pure upper removed by vfs_rmdir() */ + if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) { + err = ovl_check_empty_dir(dentry, &list); + if (err) + goto out; + } err = ovl_want_write(dentry); if (err) @@ -765,10 +756,10 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) goto out_drop_write; old_cred = ovl_override_creds(dentry->d_sb); - if (!ovl_lower_positive(dentry)) - err = ovl_remove_upper(dentry, is_dir); + if (!lower_positive) + err = ovl_remove_upper(dentry, is_dir, &list); else - err = ovl_remove_and_whiteout(dentry, is_dir); + err = ovl_remove_and_whiteout(dentry, &list); revert_creds(old_cred); if (!err) { if (is_dir) @@ -780,6 +771,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) out_drop_write: ovl_drop_write(dentry); out: + ovl_cache_free(&list); return err; } @@ -915,6 +907,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, bool samedir = olddir == newdir; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; + LIST_HEAD(list); err = -EINVAL; if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) @@ -929,6 +922,27 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (!overwrite && !ovl_can_move(new)) goto out; + if (overwrite && new_is_dir && !ovl_pure_upper(new)) { + err = ovl_check_empty_dir(new, &list); + if (err) + goto out; + } + + if (overwrite) { + if (ovl_lower_positive(old)) { + if (!ovl_dentry_is_whiteout(new)) { + /* Whiteout source */ + flags |= RENAME_WHITEOUT; + } else { + /* Switch whiteouts */ + flags |= RENAME_EXCHANGE; + } + } else if (is_dir && ovl_dentry_is_whiteout(new)) { + flags |= RENAME_EXCHANGE; + cleanup_whiteout = true; + } + } + err = ovl_want_write(old); if (err) goto out; @@ -952,9 +966,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_cred = ovl_override_creds(old->d_sb); - if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) || - ovl_may_have_whiteouts(new))) { - opaquedir = ovl_check_empty_and_clear(new); + if (!list_empty(&list)) { + opaquedir = ovl_clear_empty(new, &list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; @@ -962,21 +975,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, } } - if (overwrite) { - if (ovl_lower_positive(old)) { - if (!ovl_dentry_is_whiteout(new)) { - /* Whiteout source */ - flags |= RENAME_WHITEOUT; - } else { - /* Switch whiteouts */ - flags |= RENAME_EXCHANGE; - } - } else if (is_dir && ovl_dentry_is_whiteout(new)) { - flags |= RENAME_EXCHANGE; - cleanup_whiteout = true; - } - } - old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); @@ -1094,6 +1092,7 @@ out_drop_write: ovl_drop_write(old); out: dput(opaquedir); + ovl_cache_free(&list); return err; } diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c new file mode 100644 index 000000000000..bb94ce9da5c8 --- /dev/null +++ b/fs/overlayfs/export.c @@ -0,0 +1,715 @@ +/* + * Overlayfs NFS export support. + * + * Amir Goldstein <amir73il@gmail.com> + * + * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/cred.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/xattr.h> +#include <linux/exportfs.h> +#include <linux/ratelimit.h> +#include "overlayfs.h" + +/* + * We only need to encode origin if there is a chance that the same object was + * encoded pre copy up and then we need to stay consistent with the same + * encoding also after copy up. If non-pure upper is not indexed, then it was + * copied up before NFS export was enabled. In that case we don't need to worry + * about staying consistent with pre copy up encoding and we encode an upper + * file handle. Overlay root dentry is a private case of non-indexed upper. + * + * The following table summarizes the different file handle encodings used for + * different overlay object types: + * + * Object type | Encoding + * -------------------------------- + * Pure upper | U + * Non-indexed upper | U + * Indexed upper | L (*) + * Non-upper | L (*) + * + * U = upper file handle + * L = lower file handle + * + * (*) Connecting an overlay dir from real lower dentry is not always + * possible when there are redirects in lower layers. To mitigate this case, + * we copy up the lower dir first and then encode an upper dir file handle. + */ +static bool ovl_should_encode_origin(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (!ovl_dentry_lower(dentry)) + return false; + + /* + * Decoding a merge dir, whose origin's parent is under a redirected + * lower dir is not always possible. As a simple aproximation, we do + * not encode lower dir file handles when overlay has multiple lower + * layers and origin is below the topmost lower layer. + * + * TODO: copy up only the parent that is under redirected lower. + */ + if (d_is_dir(dentry) && ofs->upper_mnt && + OVL_E(dentry)->lowerstack[0].layer->idx > 1) + return false; + + /* Decoding a non-indexed upper from origin is not implemented */ + if (ovl_dentry_upper(dentry) && + !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + return false; + + return true; +} + +static int ovl_encode_maybe_copy_up(struct dentry *dentry) +{ + int err; + + if (ovl_dentry_upper(dentry)) + return 0; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = ovl_copy_up(dentry); + + ovl_drop_write(dentry); + return err; +} + +static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) +{ + struct dentry *origin = ovl_dentry_lower(dentry); + struct ovl_fh *fh = NULL; + int err; + + /* + * If we should not encode a lower dir file handle, copy up and encode + * an upper dir file handle. + */ + if (!ovl_should_encode_origin(dentry)) { + err = ovl_encode_maybe_copy_up(dentry); + if (err) + goto fail; + + origin = NULL; + } + + /* Encode an upper or origin file handle */ + fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); + err = PTR_ERR(fh); + if (IS_ERR(fh)) + goto fail; + + err = -EOVERFLOW; + if (fh->len > buflen) + goto fail; + + memcpy(buf, (char *)fh, fh->len); + err = fh->len; + +out: + kfree(fh); + return err; + +fail: + pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", + dentry, err, buflen, fh ? (int)fh->len : 0, + fh ? fh->type : 0); + goto out; +} + +static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len) +{ + int res, len = *max_len << 2; + + res = ovl_d_to_fh(dentry, (char *)fid, len); + if (res <= 0) + return FILEID_INVALID; + + len = res; + + /* Round up to dwords */ + *max_len = (len + 3) >> 2; + return OVL_FILEID; +} + +static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len, + struct inode *parent) +{ + struct dentry *dentry; + int type; + + /* TODO: encode connectable file handles */ + if (parent) + return FILEID_INVALID; + + dentry = d_find_any_alias(inode); + if (WARN_ON(!dentry)) + return FILEID_INVALID; + + type = ovl_dentry_to_fh(dentry, fid, max_len); + + dput(dentry); + return type; +} + +/* + * Find or instantiate an overlay dentry from real dentries and index. + */ +static struct dentry *ovl_obtain_alias(struct super_block *sb, + struct dentry *upper_alias, + struct ovl_path *lowerpath, + struct dentry *index) +{ + struct dentry *lower = lowerpath ? lowerpath->dentry : NULL; + struct dentry *upper = upper_alias ?: index; + struct dentry *dentry; + struct inode *inode; + struct ovl_entry *oe; + + /* We get overlay directory dentries with ovl_lookup_real() */ + if (d_is_dir(upper ?: lower)) + return ERR_PTR(-EIO); + + inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower); + if (IS_ERR(inode)) { + dput(upper); + return ERR_CAST(inode); + } + + if (index) + ovl_set_flag(OVL_INDEX, inode); + + dentry = d_find_any_alias(inode); + if (!dentry) { + dentry = d_alloc_anon(inode->i_sb); + if (!dentry) + goto nomem; + oe = ovl_alloc_entry(lower ? 1 : 0); + if (!oe) + goto nomem; + + if (lower) { + oe->lowerstack->dentry = dget(lower); + oe->lowerstack->layer = lowerpath->layer; + } + dentry->d_fsdata = oe; + if (upper_alias) + ovl_dentry_set_upper_alias(dentry); + } + + return d_instantiate_anon(dentry, inode); + +nomem: + iput(inode); + dput(dentry); + return ERR_PTR(-ENOMEM); +} + +/* Get the upper or lower dentry in stach whose on layer @idx */ +static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx) +{ + struct ovl_entry *oe = dentry->d_fsdata; + int i; + + if (!idx) + return ovl_dentry_upper(dentry); + + for (i = 0; i < oe->numlower; i++) { + if (oe->lowerstack[i].layer->idx == idx) + return oe->lowerstack[i].dentry; + } + + return NULL; +} + +/* + * Lookup a child overlay dentry to get a connected overlay dentry whose real + * dentry is @real. If @real is on upper layer, we lookup a child overlay + * dentry with the same name as the real dentry. Otherwise, we need to consult + * index for lookup. + */ +static struct dentry *ovl_lookup_real_one(struct dentry *connected, + struct dentry *real, + struct ovl_layer *layer) +{ + struct inode *dir = d_inode(connected); + struct dentry *this, *parent = NULL; + struct name_snapshot name; + int err; + + /* + * Lookup child overlay dentry by real name. The dir mutex protects us + * from racing with overlay rename. If the overlay dentry that is above + * real has already been moved to a parent that is not under the + * connected overlay dir, we return -ECHILD and restart the lookup of + * connected real path from the top. + */ + inode_lock_nested(dir, I_MUTEX_PARENT); + err = -ECHILD; + parent = dget_parent(real); + if (ovl_dentry_real_at(connected, layer->idx) != parent) + goto fail; + + /* + * We also need to take a snapshot of real dentry name to protect us + * from racing with underlying layer rename. In this case, we don't + * care about returning ESTALE, only from dereferencing a free name + * pointer because we hold no lock on the real dentry. + */ + take_dentry_name_snapshot(&name, real); + this = lookup_one_len(name.name, connected, strlen(name.name)); + err = PTR_ERR(this); + if (IS_ERR(this)) { + goto fail; + } else if (!this || !this->d_inode) { + dput(this); + err = -ENOENT; + goto fail; + } else if (ovl_dentry_real_at(this, layer->idx) != real) { + dput(this); + err = -ESTALE; + goto fail; + } + +out: + release_dentry_name_snapshot(&name); + dput(parent); + inode_unlock(dir); + return this; + +fail: + pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + real, layer->idx, connected, err); + this = ERR_PTR(err); + goto out; +} + +static struct dentry *ovl_lookup_real(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer); + +/* + * Lookup an indexed or hashed overlay dentry by real inode. + */ +static struct dentry *ovl_lookup_real_inode(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct dentry *index = NULL; + struct dentry *this = NULL; + struct inode *inode; + + /* + * Decoding upper dir from index is expensive, so first try to lookup + * overlay dentry in inode/dcache. + */ + inode = ovl_lookup_inode(sb, real, !layer->idx); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (inode) { + this = d_find_any_alias(inode); + iput(inode); + } + + /* + * For decoded lower dir file handle, lookup index by origin to check + * if lower dir was copied up and and/or removed. + */ + if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) { + index = ovl_lookup_index(ofs, NULL, real, false); + if (IS_ERR(index)) + return index; + } + + /* Get connected upper overlay dir from index */ + if (index) { + struct dentry *upper = ovl_index_upper(ofs, index); + + dput(index); + if (IS_ERR_OR_NULL(upper)) + return upper; + + /* + * ovl_lookup_real() in lower layer may call recursively once to + * ovl_lookup_real() in upper layer. The first level call walks + * back lower parents to the topmost indexed parent. The second + * recursive call walks back from indexed upper to the topmost + * connected/hashed upper parent (or up to root). + */ + this = ovl_lookup_real(sb, upper, &upper_layer); + dput(upper); + } + + if (!this) + return NULL; + + if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { + dput(this); + this = ERR_PTR(-EIO); + } + + return this; +} + +/* + * Lookup an indexed or hashed overlay dentry, whose real dentry is an + * ancestor of @real. + */ +static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *next, *parent = NULL; + struct dentry *ancestor = ERR_PTR(-EIO); + + if (real == layer->mnt->mnt_root) + return dget(sb->s_root); + + /* Find the topmost indexed or hashed ancestor */ + next = dget(real); + for (;;) { + parent = dget_parent(next); + + /* + * Lookup a matching overlay dentry in inode/dentry + * cache or in index by real inode. + */ + ancestor = ovl_lookup_real_inode(sb, next, layer); + if (ancestor) + break; + + if (parent == layer->mnt->mnt_root) { + ancestor = dget(sb->s_root); + break; + } + + /* + * If @real has been moved out of the layer root directory, + * we will eventully hit the real fs root. This cannot happen + * by legit overlay rename, so we return error in that case. + */ + if (parent == next) { + ancestor = ERR_PTR(-EXDEV); + break; + } + + dput(next); + next = parent; + } + + dput(parent); + dput(next); + + return ancestor; +} + +/* + * Lookup a connected overlay dentry whose real dentry is @real. + * If @real is on upper layer, we lookup a child overlay dentry with the same + * path the real dentry. Otherwise, we need to consult index for lookup. + */ +static struct dentry *ovl_lookup_real(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *connected; + int err = 0; + + connected = ovl_lookup_real_ancestor(sb, real, layer); + if (IS_ERR(connected)) + return connected; + + while (!err) { + struct dentry *next, *this; + struct dentry *parent = NULL; + struct dentry *real_connected = ovl_dentry_real_at(connected, + layer->idx); + + if (real_connected == real) + break; + + /* Find the topmost dentry not yet connected */ + next = dget(real); + for (;;) { + parent = dget_parent(next); + + if (parent == real_connected) + break; + + /* + * If real has been moved out of 'real_connected', + * we will not find 'real_connected' and hit the layer + * root. In that case, we need to restart connecting. + * This game can go on forever in the worst case. We + * may want to consider taking s_vfs_rename_mutex if + * this happens more than once. + */ + if (parent == layer->mnt->mnt_root) { + dput(connected); + connected = dget(sb->s_root); + break; + } + + /* + * If real file has been moved out of the layer root + * directory, we will eventully hit the real fs root. + * This cannot happen by legit overlay rename, so we + * return error in that case. + */ + if (parent == next) { + err = -EXDEV; + break; + } + + dput(next); + next = parent; + } + + if (!err) { + this = ovl_lookup_real_one(connected, next, layer); + if (IS_ERR(this)) + err = PTR_ERR(this); + + /* + * Lookup of child in overlay can fail when racing with + * overlay rename of child away from 'connected' parent. + * In this case, we need to restart the lookup from the + * top, because we cannot trust that 'real_connected' is + * still an ancestor of 'real'. There is a good chance + * that the renamed overlay ancestor is now in cache, so + * ovl_lookup_real_ancestor() will find it and we can + * continue to connect exactly from where lookup failed. + */ + if (err == -ECHILD) { + this = ovl_lookup_real_ancestor(sb, real, + layer); + err = IS_ERR(this) ? PTR_ERR(this) : 0; + } + if (!err) { + dput(connected); + connected = this; + } + } + + dput(parent); + dput(next); + } + + if (err) + goto fail; + + return connected; + +fail: + pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + real, layer->idx, connected, err); + dput(connected); + return ERR_PTR(err); +} + +/* + * Get an overlay dentry from upper/lower real dentries and index. + */ +static struct dentry *ovl_get_dentry(struct super_block *sb, + struct dentry *upper, + struct ovl_path *lowerpath, + struct dentry *index) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer; + struct dentry *real = upper ?: (index ?: lowerpath->dentry); + + /* + * Obtain a disconnected overlay dentry from a non-dir real dentry + * and index. + */ + if (!d_is_dir(real)) + return ovl_obtain_alias(sb, upper, lowerpath, index); + + /* Removed empty directory? */ + if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real)) + return ERR_PTR(-ENOENT); + + /* + * If real dentry is connected and hashed, get a connected overlay + * dentry whose real dentry is @real. + */ + return ovl_lookup_real(sb, real, layer); +} + +static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, + struct ovl_fh *fh) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct dentry *dentry; + struct dentry *upper; + + if (!ofs->upper_mnt) + return ERR_PTR(-EACCES); + + upper = ovl_decode_fh(fh, ofs->upper_mnt); + if (IS_ERR_OR_NULL(upper)) + return upper; + + dentry = ovl_get_dentry(sb, upper, NULL, NULL); + dput(upper); + + return dentry; +} + +static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, + struct ovl_fh *fh) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_path origin = { }; + struct ovl_path *stack = &origin; + struct dentry *dentry = NULL; + struct dentry *index = NULL; + struct inode *inode = NULL; + bool is_deleted = false; + int err; + + /* First lookup indexed upper by fh */ + if (ofs->indexdir) { + index = ovl_get_index_fh(ofs, fh); + err = PTR_ERR(index); + if (IS_ERR(index)) { + if (err != -ESTALE) + return ERR_PTR(err); + + /* Found a whiteout index - treat as deleted inode */ + is_deleted = true; + index = NULL; + } + } + + /* Then try to get upper dir by index */ + if (index && d_is_dir(index)) { + struct dentry *upper = ovl_index_upper(ofs, index); + + err = PTR_ERR(upper); + if (IS_ERR_OR_NULL(upper)) + goto out_err; + + dentry = ovl_get_dentry(sb, upper, NULL, NULL); + dput(upper); + goto out; + } + + /* Then lookup origin by fh */ + err = ovl_check_origin_fh(ofs, fh, NULL, &stack); + if (err) { + goto out_err; + } else if (index) { + err = ovl_verify_origin(index, origin.dentry, false); + if (err) + goto out_err; + } else if (is_deleted) { + /* Lookup deleted non-dir by origin inode */ + if (!d_is_dir(origin.dentry)) + inode = ovl_lookup_inode(sb, origin.dentry, false); + err = -ESTALE; + if (!inode || atomic_read(&inode->i_count) == 1) + goto out_err; + + /* Deleted but still open? */ + index = dget(ovl_i_dentry_upper(inode)); + } + + dentry = ovl_get_dentry(sb, NULL, &origin, index); + +out: + dput(origin.dentry); + dput(index); + iput(inode); + return dentry; + +out_err: + dentry = ERR_PTR(err); + goto out; +} + +static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct dentry *dentry = NULL; + struct ovl_fh *fh = (struct ovl_fh *) fid; + int len = fh_len << 2; + unsigned int flags = 0; + int err; + + err = -EINVAL; + if (fh_type != OVL_FILEID) + goto out_err; + + err = ovl_check_fh_len(fh, len); + if (err) + goto out_err; + + flags = fh->flags; + dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ? + ovl_upper_fh_to_d(sb, fh) : + ovl_lower_fh_to_d(sb, fh); + err = PTR_ERR(dentry); + if (IS_ERR(dentry) && err != -ESTALE) + goto out_err; + + return dentry; + +out_err: + pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", + len, fh_type, flags, err); + return ERR_PTR(err); +} + +static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); + return ERR_PTR(-EACCES); +} + +static int ovl_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + /* + * ovl_fh_to_dentry() returns connected dir overlay dentries and + * ovl_fh_to_parent() is not implemented, so we should not get here. + */ + WARN_ON_ONCE(1); + return -EIO; +} + +static struct dentry *ovl_get_parent(struct dentry *dentry) +{ + /* + * ovl_fh_to_dentry() returns connected dir overlay dentries, so we + * should not get here. + */ + WARN_ON_ONCE(1); + return ERR_PTR(-EIO); +} + +const struct export_operations ovl_export_operations = { + .encode_fh = ovl_encode_inode_fh, + .fh_to_dentry = ovl_fh_to_dentry, + .fh_to_parent = ovl_fh_to_parent, + .get_name = ovl_get_name, + .get_parent = ovl_get_parent, +}; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 00b6b294272a..fcd97b783fa1 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -105,12 +105,20 @@ int ovl_getattr(const struct path *path, struct kstat *stat, * Lower hardlinks may be broken on copy up to different * upper files, so we cannot use the lower origin st_ino * for those different files, even for the same fs case. + * + * Similarly, several redirected dirs can point to the + * same dir on a lower layer. With the "verify_lower" + * feature, we do not use the lower origin st_ino, if + * we haven't verified that this redirect is unique. + * * With inodes index enabled, it is safe to use st_ino - * of an indexed hardlinked origin. The index validates - * that the upper hardlink is not broken. + * of an indexed origin. The index validates that the + * upper hardlink is not broken and that a redirected + * dir is the only redirect to that origin. */ - if (is_dir || lowerstat.nlink == 1 || - ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || + (!ovl_verify_lower(dentry->d_sb) && + (is_dir || lowerstat.nlink == 1))) stat->ino = lowerstat.ino; if (samefs) @@ -343,8 +351,10 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) { + /* Copy up of disconnected dentry does not set upper alias */ if (ovl_dentry_upper(dentry) && - ovl_dentry_has_upper_alias(dentry)) + (ovl_dentry_has_upper_alias(dentry) || + (dentry->d_flags & DCACHE_DISCONNECTED))) return false; if (special_file(d_inode(dentry)->i_mode)) @@ -604,9 +614,25 @@ static int ovl_inode_set(struct inode *inode, void *data) } static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, - struct dentry *upperdentry) + struct dentry *upperdentry, bool strict) { /* + * For directories, @strict verify from lookup path performs consistency + * checks, so NULL lower/upper in dentry must match NULL lower/upper in + * inode. Non @strict verify from NFS handle decode path passes NULL for + * 'unknown' lower/upper. + */ + if (S_ISDIR(inode->i_mode) && strict) { + /* Real lower dir moved to upper layer under us? */ + if (!lowerdentry && ovl_inode_lower(inode)) + return false; + + /* Lookup of an uncovered redirect origin? */ + if (!upperdentry && ovl_inode_upper(inode)) + return false; + } + + /* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed * or hardlinked overlay dentry and lower dentry cannot be followed @@ -625,14 +651,35 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, - struct dentry *index) +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, + bool is_upper) { - struct dentry *lowerdentry = ovl_dentry_lower(dentry); + struct inode *inode, *key = d_inode(real); + + inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); + if (!inode) + return NULL; + + if (!ovl_verify_inode(inode, is_upper ? NULL : real, + is_upper ? real : NULL, false)) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + return inode; +} + +struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, + struct dentry *lowerdentry, struct dentry *index, + unsigned int numlower) +{ + struct ovl_fs *ofs = sb->s_fs_info; struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; /* Already indexed or could be indexed on copy up? */ - bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + bool indexed = (index || (ovl_indexdir(sb) && !upperdentry)); + struct dentry *origin = indexed ? lowerdentry : NULL; + bool is_dir; if (WARN_ON(upperdentry && indexed && !lowerdentry)) return ERR_PTR(-EIO); @@ -641,17 +688,22 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed upper, but we must - * not use lower as hash key in that case. - * Hash inodes that are or could be indexed by origin inode and - * non-indexed upper inodes that could be hard linked by upper inode. + * Copy up origin (lower) may exist for non-indexed non-dir upper, but + * we must not use lower as hash key in that case. + * Hash non-dir that is or could be indexed by origin inode. + * Hash dir that is or could be merged by origin inode. + * Hash pure upper and non-indexed non-dir by upper inode. + * Hash non-indexed dir by upper inode for NFS export. */ - if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { - struct inode *key = d_inode(indexed ? lowerdentry : - upperdentry); - unsigned int nlink; + is_dir = S_ISDIR(realinode->i_mode); + if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt)) + origin = lowerdentry; + + if (upperdentry || origin) { + struct inode *key = d_inode(origin ?: upperdentry); + unsigned int nlink = is_dir ? 1 : realinode->i_nlink; - inode = iget5_locked(dentry->d_sb, (unsigned long) key, + inode = iget5_locked(sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); if (!inode) goto out_nomem; @@ -660,7 +712,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, * Verify that the underlying files stored in the inode * match those in the dentry. */ - if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { + if (!ovl_verify_inode(inode, lowerdentry, upperdentry, + true)) { iput(inode); inode = ERR_PTR(-ESTALE); goto out; @@ -670,11 +723,12 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, goto out; } - nlink = ovl_get_nlink(lowerdentry, upperdentry, - realinode->i_nlink); + /* Recalculate nlink for non-dir due to indexing */ + if (!is_dir) + nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { - inode = new_inode(dentry->d_sb); + inode = new_inode(sb); if (!inode) goto out_nomem; } @@ -685,10 +739,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode); /* Check for non-merge dir that may have whiteouts */ - if (S_ISDIR(realinode->i_mode)) { - struct ovl_entry *oe = dentry->d_fsdata; - - if (((upperdentry && lowerdentry) || oe->numlower > 1) || + if (is_dir) { + if (((upperdentry && lowerdentry) || numlower > 1) || ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { ovl_set_flag(OVL_WHITEOUTS, inode); } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index beb945e1963c..de3e6da1d5a5 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/cred.h> +#include <linux/ctype.h> #include <linux/namei.h> #include <linux/xattr.h> #include <linux/ratelimit.h> @@ -84,15 +85,54 @@ invalid: static int ovl_acceptable(void *ctx, struct dentry *dentry) { - return 1; + /* + * A non-dir origin may be disconnected, which is fine, because + * we only need it for its unique inode number. + */ + if (!d_is_dir(dentry)) + return 1; + + /* Don't decode a deleted empty directory */ + if (d_unhashed(dentry)) + return 0; + + /* Check if directory belongs to the layer we are decoding from */ + return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root); } -static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) +/* + * Check validity of an overlay file handle buffer. + * + * Return 0 for a valid file handle. + * Return -ENODATA for "origin unknown". + * Return <0 for an invalid file handle. + */ +int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) { - int res; + if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len) + return -EINVAL; + + if (fh->magic != OVL_FH_MAGIC) + return -EINVAL; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) + return -ENODATA; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + return -ENODATA; + + return 0; +} + +static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) +{ + int res, err; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = vfs_getxattr(dentry, name, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -102,28 +142,20 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (res == 0) return NULL; - fh = kzalloc(res, GFP_KERNEL); + fh = kzalloc(res, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + res = vfs_getxattr(dentry, name, fh, res); if (res < 0) goto fail; - if (res < sizeof(struct ovl_fh) || res < fh->len) - goto invalid; - - if (fh->magic != OVL_FH_MAGIC) + err = ovl_check_fh_len(fh, res); + if (err < 0) { + if (err == -ENODATA) + goto out; goto invalid; - - /* Treat larger version and unknown flags as "origin unknown" */ - if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) - goto out; - - /* Treat endianness mismatch as "origin unknown" */ - if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && - (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) - goto out; + } return fh; @@ -139,47 +171,41 @@ invalid: goto out; } -static struct dentry *ovl_get_origin(struct dentry *dentry, - struct vfsmount *mnt) +struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) { - struct dentry *origin = NULL; - struct ovl_fh *fh = ovl_get_origin_fh(dentry); + struct dentry *real; int bytes; - if (IS_ERR_OR_NULL(fh)) - return (struct dentry *)fh; - /* * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. */ if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) - goto out; + return NULL; bytes = (fh->len - offsetof(struct ovl_fh, fid)); - origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, - bytes >> 2, (int)fh->type, - ovl_acceptable, NULL); - if (IS_ERR(origin)) { - /* Treat stale file handle as "origin unknown" */ - if (origin == ERR_PTR(-ESTALE)) - origin = NULL; - goto out; + real = exportfs_decode_fh(mnt, (struct fid *)fh->fid, + bytes >> 2, (int)fh->type, + ovl_acceptable, mnt); + if (IS_ERR(real)) { + /* + * Treat stale file handle to lower file as "origin unknown". + * upper file handle could become stale when upper file is + * unlinked and this information is needed to handle stale + * index entries correctly. + */ + if (real == ERR_PTR(-ESTALE) && + !(fh->flags & OVL_FH_FLAG_PATH_UPPER)) + real = NULL; + return real; } - if (ovl_dentry_weird(origin) || - ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) - goto invalid; - -out: - kfree(fh); - return origin; + if (ovl_dentry_weird(real)) { + dput(real); + return NULL; + } -invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); - dput(origin); - origin = NULL; - goto out; + return real; } static bool ovl_is_opaquedir(struct dentry *dentry) @@ -284,47 +310,81 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -static int ovl_check_origin(struct dentry *upperdentry, - struct ovl_path *lower, unsigned int numlower, - struct ovl_path **stackp, unsigned int *ctrp) +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp) { - struct vfsmount *mnt; struct dentry *origin = NULL; int i; - for (i = 0; i < numlower; i++) { - mnt = lower[i].layer->mnt; - origin = ovl_get_origin(upperdentry, mnt); - if (IS_ERR(origin)) - return PTR_ERR(origin); - + for (i = 0; i < ofs->numlower; i++) { + origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt); if (origin) break; } if (!origin) - return 0; + return -ESTALE; + else if (IS_ERR(origin)) + return PTR_ERR(origin); + + if (upperdentry && !ovl_is_whiteout(upperdentry) && + ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT)) + goto invalid; - BUG_ON(*ctrp); if (!*stackp) *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); if (!*stackp) { dput(origin); return -ENOMEM; } - **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; - *ctrp = 1; + **stackp = (struct ovl_path){ + .dentry = origin, + .layer = &ofs->lower_layers[i] + }; + + return 0; + +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", + upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); + dput(origin); + return -EIO; +} + +static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, + struct ovl_path **stackp, unsigned int *ctrp) +{ + struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN); + int err; + + if (IS_ERR_OR_NULL(fh)) + return PTR_ERR(fh); + + err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp); + kfree(fh); + + if (err) { + if (err == -ESTALE) + return 0; + return err; + } + + if (WARN_ON(*ctrp)) + return -EIO; + *ctrp = 1; return 0; } /* - * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. + * Verify that @fh matches the file handle stored in xattr @name. * Return 0 on match, -ESTALE on mismatch, < 0 on error. */ -static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) +static int ovl_verify_fh(struct dentry *dentry, const char *name, + const struct ovl_fh *fh) { - struct ovl_fh *ofh = ovl_get_origin_fh(dentry); + struct ovl_fh *ofh = ovl_get_fh(dentry, name); int err = 0; if (!ofh) @@ -341,28 +401,28 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) } /* - * Verify that an inode matches the origin file handle stored in upper inode. + * Verify that @real dentry matches the file handle stored in xattr @name. * - * If @set is true and there is no stored file handle, encode and store origin - * file handle in OVL_XATTR_ORIGIN. + * If @set is true and there is no stored file handle, encode @real and store + * file handle in xattr @name. * - * Return 0 on match, -ESTALE on mismatch, < 0 on error. + * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, - bool is_upper, bool set) +int ovl_verify_set_fh(struct dentry *dentry, const char *name, + struct dentry *real, bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; int err; - fh = ovl_encode_fh(origin, is_upper); + fh = ovl_encode_fh(real, is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; - err = ovl_verify_origin_fh(dentry, fh); + err = ovl_verify_fh(dentry, name, fh); if (set && err == -ENODATA) - err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); + err = ovl_do_setxattr(dentry, name, fh, fh->len, 0); if (err) goto fail; @@ -371,45 +431,71 @@ out: return err; fail: - inode = d_inode(origin); - pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", - origin, inode ? inode->i_ino : 0, err); + inode = d_inode(real); + pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n", + is_upper ? "upper" : "origin", real, + inode ? inode->i_ino : 0, err); goto out; } +/* Get upper dentry from index */ +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) +{ + struct ovl_fh *fh; + struct dentry *upper; + + if (!d_is_dir(index)) + return dget(index); + + fh = ovl_get_fh(index, OVL_XATTR_UPPER); + if (IS_ERR_OR_NULL(fh)) + return ERR_CAST(fh); + + upper = ovl_decode_fh(fh, ofs->upper_mnt); + kfree(fh); + + if (IS_ERR_OR_NULL(upper)) + return upper ?: ERR_PTR(-ESTALE); + + if (!d_is_dir(upper)) { + pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n", + index, upper); + dput(upper); + return ERR_PTR(-EIO); + } + + return upper; +} + +/* Is this a leftover from create/whiteout of directory index entry? */ +static bool ovl_is_temp_index(struct dentry *index) +{ + return index->d_name.name[0] == '#'; +} + /* * Verify that an index entry name matches the origin file handle stored in * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. */ -int ovl_verify_index(struct dentry *index, struct ovl_path *lower, - unsigned int numlower) +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) { struct ovl_fh *fh = NULL; size_t len; struct ovl_path origin = { }; struct ovl_path *stack = &origin; - unsigned int ctr = 0; + struct dentry *upper = NULL; int err; if (!d_inode(index)) return 0; - /* - * Directory index entries are going to be used for looking up - * redirected upper dirs by lower dir fh when decoding an overlay - * file handle of a merge dir. Whiteout index entries are going to be - * used as an indication that an exported overlay file handle should - * be treated as stale (i.e. after unlink of the overlay inode). - * We don't know the verification rules for directory and whiteout - * index entries, because they have not been implemented yet, so return - * EINVAL if those entries are found to abort the mount to avoid - * corrupting an index that was created by a newer kernel. - */ - err = -EINVAL; - if (d_is_dir(index) || ovl_is_whiteout(index)) + /* Cleanup leftover from index create/cleanup attempt */ + err = -ESTALE; + if (ovl_is_temp_index(index)) goto fail; + err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; @@ -420,26 +506,68 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower, goto fail; err = -EINVAL; - if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) + if (hex2bin((u8 *)fh, index->d_name.name, len)) goto fail; - err = ovl_verify_origin_fh(index, fh); + err = ovl_check_fh_len(fh, len); if (err) goto fail; - err = ovl_check_origin(index, lower, numlower, &stack, &ctr); - if (!err && !ctr) - err = -ESTALE; + /* + * Whiteout index entries are used as an indication that an exported + * overlay file handle should be treated as stale (i.e. after unlink + * of the overlay inode). These entries contain no origin xattr. + */ + if (ovl_is_whiteout(index)) + goto out; + + /* + * Verifying directory index entries are not stale is expensive, so + * only verify stale dir index if NFS export is enabled. + */ + if (d_is_dir(index) && !ofs->config.nfs_export) + goto out; + + /* + * Directory index entries should have 'upper' xattr pointing to the + * real upper dir. Non-dir index entries are hardlinks to the upper + * real inode. For non-dir index, we can read the copy up origin xattr + * directly from the index dentry, but for dir index we first need to + * decode the upper directory. + */ + upper = ovl_index_upper(ofs, index); + if (IS_ERR_OR_NULL(upper)) { + err = PTR_ERR(upper); + /* + * Directory index entries with no 'upper' xattr need to be + * removed. When dir index entry has a stale 'upper' xattr, + * we assume that upper dir was removed and we treat the dir + * index as orphan entry that needs to be whited out. + */ + if (err == -ESTALE) + goto orphan; + else if (!err) + err = -ESTALE; + goto fail; + } + + err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh); + dput(upper); if (err) goto fail; - /* Check if index is orphan and don't warn before cleaning it */ - if (d_inode(index)->i_nlink == 1 && - ovl_get_nlink(origin.dentry, index, 0) == 0) - err = -ENOENT; + /* Check if non-dir index is orphan and don't warn before cleaning it */ + if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) { + err = ovl_check_origin_fh(ofs, fh, index, &stack); + if (err) + goto fail; + + if (ovl_get_nlink(origin.dentry, index, 0) == 0) + goto orphan; + } - dput(origin.dentry); out: + dput(origin.dentry); kfree(fh); return err; @@ -447,6 +575,28 @@ fail: pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", index, d_inode(index)->i_mode & S_IFMT, err); goto out; + +orphan: + pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(index)->i_nlink); + err = -ENOENT; + goto out; +} + +static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) +{ + char *n, *s; + + n = kzalloc(fh->len * 2, GFP_KERNEL); + if (!n) + return -ENOMEM; + + s = bin2hex(n, fh, fh->len); + *name = (struct qstr) QSTR_INIT(n, s - n); + + return 0; + } /* @@ -466,35 +616,58 @@ fail: */ int ovl_get_index_name(struct dentry *origin, struct qstr *name) { - int err; struct ovl_fh *fh; - char *n, *s; + int err; fh = ovl_encode_fh(origin, false); if (IS_ERR(fh)) return PTR_ERR(fh); - err = -ENOMEM; - n = kzalloc(fh->len * 2, GFP_KERNEL); - if (n) { - s = bin2hex(n, fh, fh->len); - *name = (struct qstr) QSTR_INIT(n, s - n); - err = 0; - } - kfree(fh); + err = ovl_get_index_name_fh(fh, name); + kfree(fh); return err; +} + +/* Lookup index by file handle for NFS export */ +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh) +{ + struct dentry *index; + struct qstr name; + int err; + + err = ovl_get_index_name_fh(fh, &name); + if (err) + return ERR_PTR(err); + + index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); + kfree(name.name); + if (IS_ERR(index)) { + if (PTR_ERR(index) == -ENOENT) + index = NULL; + return index; + } + if (d_is_negative(index)) + err = 0; + else if (ovl_is_whiteout(index)) + err = -ESTALE; + else if (ovl_dentry_weird(index)) + err = -EIO; + else + return index; + + dput(index); + return ERR_PTR(err); } -static struct dentry *ovl_lookup_index(struct dentry *dentry, - struct dentry *upper, - struct dentry *origin) +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct dentry *index; struct inode *inode; struct qstr name; + bool is_dir = d_is_dir(origin); int err; err = ovl_get_index_name(origin, &name); @@ -518,8 +691,16 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { goto out_dput; - } else if (upper && d_inode(upper) != inode) { - goto out_dput; + } else if (ovl_is_whiteout(index) && !verify) { + /* + * When index lookup is called with !verify for decoding an + * overlay file handle, a whiteout index implies that decode + * should treat file handle as stale and no need to print a + * warning about it. + */ + dput(index); + index = ERR_PTR(-ESTALE); + goto out; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -533,8 +714,25 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; - } + } else if (is_dir && verify) { + if (!upper) { + pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", + origin, index); + goto fail; + } + /* Verify that dir index 'upper' xattr points to upper dir */ + err = ovl_verify_upper(index, upper, false); + if (err) { + if (err == -ESTALE) { + pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", + upper, origin, index); + } + goto fail; + } + } else if (upper && d_inode(upper) != inode) { + goto out_dput; + } out: kfree(name.name); return index; @@ -572,16 +770,25 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) return (idx < oe->numlower) ? idx + 1 : -1; } -static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) +/* Fix missing 'origin' xattr */ +static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) { - int i; + int err; - for (i = 0; i < ofs->numlower; i++) { - if (ofs->lower_layers[i].mnt == path->layer->mnt) - break; - } + if (ovl_check_origin_xattr(upper)) + return 0; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = ovl_set_origin(dentry, lower, upper); + if (!err) + err = ovl_set_impure(dentry->d_parent, upper->d_parent); - return i; + ovl_drop_write(dentry); + return err; } struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, @@ -594,6 +801,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct ovl_path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; + struct dentry *origin = NULL; struct dentry *index = NULL; unsigned int ctr = 0; struct inode *inode = NULL; @@ -638,8 +846,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(upperdentry, roe->lowerstack, - roe->numlower, &stack, &ctr); + err = ovl_check_origin(ofs, upperdentry, &stack, &ctr); if (err) goto out_put_upper; } @@ -674,6 +881,34 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!this) continue; + /* + * If no origin fh is stored in upper of a merge dir, store fh + * of lower dir and set upper parent "impure". + */ + if (upperdentry && !ctr && !ofs->noxattr) { + err = ovl_fix_origin(dentry, this, upperdentry); + if (err) { + dput(this); + goto out_put; + } + } + + /* + * When "verify_lower" feature is enabled, do not merge with a + * lower dir that does not match a stored origin xattr. In any + * case, only verified origin is used for index lookup. + */ + if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) { + err = ovl_verify_origin(upperdentry, this, false); + if (err) { + dput(this); + break; + } + + /* Bless lower dir as verified origin */ + origin = this; + } + stack[ctr].dentry = this; stack[ctr].layer = lower.layer; ctr++; @@ -693,25 +928,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, */ err = -EPERM; if (d.redirect && !ofs->config.redirect_follow) { - pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry); + pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n", + dentry); goto out_put; } if (d.redirect && d.redirect[0] == '/' && poe != roe) { poe = roe; - /* Find the current layer on the root dentry */ - i = ovl_find_layer(ofs, &lower); - if (WARN_ON(i == ofs->numlower)) - break; + i = lower.layer->idx - 1; } } - /* Lookup index by lower inode and verify it matches upper inode */ - if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { - struct dentry *origin = stack[0].dentry; + /* + * Lookup index by lower inode and verify it matches upper inode. + * We only trust dir index if we verified that lower dir matches + * origin, otherwise dir index entries may be inconsistent and we + * ignore them. Always lookup index of non-dir and non-upper. + */ + if (ctr && (!upperdentry || !d.is_dir)) + origin = stack[0].dentry; - index = ovl_lookup_index(dentry, upperdentry, origin); + if (origin && ovl_indexdir(dentry->d_sb) && + (!d.is_dir || ovl_index_all(dentry->d_sb))) { + index = ovl_lookup_index(ofs, upperdentry, origin, true); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; @@ -724,17 +964,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!oe) goto out_put; - oe->opaque = upperopaque; memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); dentry->d_fsdata = oe; + if (upperopaque) + ovl_dentry_set_opaque(dentry); + if (upperdentry) ovl_dentry_set_upper_alias(dentry); else if (index) upperdentry = dget(index); if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry, index); + if (ctr) + origin = stack[0].dentry; + inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index, + ctr); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; @@ -748,9 +993,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, dput(index); kfree(stack); kfree(d.redirect); - d_add(dentry, inode); - - return NULL; + return d_splice_alias(inode, dentry); out_free_oe: dentry->d_fsdata = NULL; @@ -771,9 +1014,9 @@ out: bool ovl_lower_positive(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; struct ovl_entry *poe = dentry->d_parent->d_fsdata; const struct qstr *name = &dentry->d_name; + const struct cred *old_cred; unsigned int i; bool positive = false; bool done = false; @@ -783,12 +1026,13 @@ bool ovl_lower_positive(struct dentry *dentry) * whiteout. */ if (!dentry->d_inode) - return oe->opaque; + return ovl_dentry_is_opaque(dentry); /* Negative upper -> positive lower */ if (!ovl_dentry_upper(dentry)) return true; + old_cred = ovl_override_creds(dentry->d_sb); /* Positive upper -> have to look up lower to see whether it exists */ for (i = 0; !done && !positive && i < poe->numlower; i++) { struct dentry *this; @@ -818,6 +1062,7 @@ bool ovl_lower_positive(struct dentry *dentry) dput(this); } } + revert_creds(old_cred); return positive; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index b489099ccd49..0df25a9c94bd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -27,8 +27,9 @@ enum ovl_path_type { #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" +#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" -enum ovl_flag { +enum ovl_inode_flag { /* Pure upper dir that may contain non pure upper entries */ OVL_IMPURE, /* Non-merge dir that may contain whiteout entries */ @@ -36,6 +37,11 @@ enum ovl_flag { OVL_INDEX, }; +enum ovl_entry_flag { + OVL_E_UPPER_ALIAS, + OVL_E_OPAQUE, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -62,6 +68,9 @@ enum ovl_flag { #error Endianness not defined #endif +/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */ +#define OVL_FILEID 0xfb + /* On-disk and in-memeory format for redirect by file handle */ struct ovl_fh { u8 version; /* 0 */ @@ -194,6 +203,8 @@ const struct cred *ovl_override_creds(struct super_block *sb); struct super_block *ovl_same_sb(struct super_block *sb); bool ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); +bool ovl_index_all(struct super_block *sb); +bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); @@ -210,6 +221,9 @@ struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); +void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); +void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry); +bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); @@ -238,6 +252,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode); bool ovl_test_flag(unsigned long flag, struct inode *inode); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); +bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry, bool *locked); void ovl_nlink_end(struct dentry *dentry, bool locked); int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); @@ -249,15 +264,35 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ -int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, - bool is_upper, bool set); -int ovl_verify_index(struct dentry *index, struct ovl_path *lower, - unsigned int numlower); +int ovl_check_fh_len(struct ovl_fh *fh, int fh_len); +struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt); +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp); +int ovl_verify_set_fh(struct dentry *dentry, const char *name, + struct dentry *real, bool is_upper, bool set); +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index); +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); +static inline int ovl_verify_origin(struct dentry *upper, + struct dentry *origin, bool set) +{ + return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set); +} + +static inline int ovl_verify_upper(struct dentry *index, + struct dentry *upper, bool set) +{ + return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set); +} + /* readdir.c */ extern const struct file_operations ovl_dir_operations; int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); @@ -267,8 +302,7 @@ void ovl_dir_cache_free(struct inode *inode); int ovl_check_d_type_supported(struct path *realpath); void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); -int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct ovl_path *lower, unsigned int numlower); +int ovl_indexdir_cleanup(struct ovl_fs *ofs); /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); @@ -291,8 +325,11 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, - struct dentry *index); +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, + bool is_upper); +struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, + struct dentry *lowerdentry, struct dentry *index, + unsigned int numlower); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; @@ -306,6 +343,8 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; struct dentry *ovl_lookup_temp(struct dentry *workdir); +int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, + struct dentry *dentry); struct cattr { dev_t rdev; umode_t mode; @@ -321,4 +360,9 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); +struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper); +int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper); + +/* export.c */ +extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 9d0bc03bf6e4..bfef6edcc111 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -17,11 +17,14 @@ struct ovl_config { bool redirect_follow; const char *redirect_mode; bool index; + bool nfs_export; }; struct ovl_layer { struct vfsmount *mnt; dev_t pseudo_dev; + /* Index of this layer in fs root (upper == 0) */ + int idx; }; struct ovl_path { @@ -58,8 +61,7 @@ struct ovl_fs { struct ovl_entry { union { struct { - unsigned long has_upper; - bool opaque; + unsigned long flags; }; struct rcu_head rcu; }; @@ -69,6 +71,11 @@ struct ovl_entry { struct ovl_entry *ovl_alloc_entry(unsigned int numlower); +static inline struct ovl_entry *OVL_E(struct dentry *dentry) +{ + return (struct ovl_entry *) dentry->d_fsdata; +} + struct ovl_inode { struct ovl_dir_cache *cache; const char *redirect; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 8c98578d27a1..c11f5c0906c3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -593,8 +593,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) return ERR_PTR(res); } if (list_empty(&cache->entries)) { - /* Good oportunity to get rid of an unnecessary "impure" flag */ - ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE); + /* + * A good opportunity to get rid of an unneeded "impure" flag. + * Removing the "impure" xattr is best effort. + */ + if (!ovl_want_write(dentry)) { + ovl_do_removexattr(ovl_dentry_upper(dentry), + OVL_XATTR_IMPURE); + ovl_drop_write(dentry); + } ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); kfree(cache); return NULL; @@ -769,10 +776,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = READ_ONCE(od->upperfile); @@ -858,8 +869,11 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) int err; struct ovl_cache_entry *p, *n; struct rb_root root = RB_ROOT; + const struct cred *old_cred; + old_cred = ovl_override_creds(dentry->d_sb); err = ovl_dir_read_merged(dentry, list, &root); + revert_creds(old_cred); if (err) return err; @@ -1016,13 +1030,13 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, } } -int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct ovl_path *lower, unsigned int numlower) +int ovl_indexdir_cleanup(struct ovl_fs *ofs) { int err; + struct dentry *indexdir = ofs->indexdir; struct dentry *index = NULL; - struct inode *dir = dentry->d_inode; - struct path path = { .mnt = mnt, .dentry = dentry }; + struct inode *dir = indexdir->d_inode; + struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir }; LIST_HEAD(list); struct rb_root root = RB_ROOT; struct ovl_cache_entry *p; @@ -1046,19 +1060,40 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, if (p->len == 2 && p->name[1] == '.') continue; } - index = lookup_one_len(p->name, dentry, p->len); + index = lookup_one_len(p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; break; } - err = ovl_verify_index(index, lower, numlower); - /* Cleanup stale and orphan index entries */ - if (err && (err == -ESTALE || err == -ENOENT)) + err = ovl_verify_index(ofs, index); + if (!err) { + goto next; + } else if (err == -ESTALE) { + /* Cleanup stale index entries */ + err = ovl_cleanup(dir, index); + } else if (err != -ENOENT) { + /* + * Abort mount to avoid corrupting the index if + * an incompatible index entry was found or on out + * of memory. + */ + break; + } else if (ofs->config.nfs_export) { + /* + * Whiteout orphan index to block future open by + * handle after overlay nlink dropped to zero. + */ + err = ovl_cleanup_and_whiteout(indexdir, dir, index); + } else { + /* Cleanup orphan index entries */ err = ovl_cleanup(dir, index); + } + if (err) break; +next: dput(index); index = NULL; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 76440feb79f6..9ee37c76091d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -45,6 +45,11 @@ module_param_named(index, ovl_index_def, bool, 0644); MODULE_PARM_DESC(ovl_index_def, "Default to on or off for the inodes index feature"); +static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); +module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); +MODULE_PARM_DESC(ovl_nfs_export_def, + "Default to on or off for the NFS export feature"); + static void ovl_entry_stack_free(struct ovl_entry *oe) { unsigned int i; @@ -211,6 +216,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); + iput(oi->lower); kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock); @@ -341,6 +347,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); if (ofs->config.index != ovl_index_def) seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); + if (ofs->config.nfs_export != ovl_nfs_export_def) + seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? + "on" : "off"); return 0; } @@ -373,6 +382,8 @@ enum { OPT_REDIRECT_DIR, OPT_INDEX_ON, OPT_INDEX_OFF, + OPT_NFS_EXPORT_ON, + OPT_NFS_EXPORT_OFF, OPT_ERR, }; @@ -384,6 +395,8 @@ static const match_table_t ovl_tokens = { {OPT_REDIRECT_DIR, "redirect_dir=%s"}, {OPT_INDEX_ON, "index=on"}, {OPT_INDEX_OFF, "index=off"}, + {OPT_NFS_EXPORT_ON, "nfs_export=on"}, + {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, {OPT_ERR, NULL} }; @@ -490,6 +503,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->index = false; break; + case OPT_NFS_EXPORT_ON: + config->nfs_export = true; + break; + + case OPT_NFS_EXPORT_OFF: + config->nfs_export = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -520,10 +541,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, bool retried = false; bool locked = false; - err = mnt_want_write(mnt); - if (err) - goto out_err; - inode_lock_nested(dir, I_MUTEX_PARENT); locked = true; @@ -588,7 +605,6 @@ retry: goto out_err; } out_unlock: - mnt_drop_write(mnt); if (locked) inode_unlock(dir); @@ -700,12 +716,16 @@ static int ovl_lower_dir(const char *name, struct path *path, *remote = true; /* - * The inodes index feature needs to encode and decode file - * handles, so it requires that all layers support them. + * The inodes index feature and NFS export need to encode and decode + * file handles, so they require that all layers support them. */ - if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { + if ((ofs->config.nfs_export || + (ofs->config.index && ofs->config.upperdir)) && + !ovl_can_decode_fh(path->dentry->d_sb)) { ofs->config.index = false; - pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); + ofs->config.nfs_export = false; + pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", + name); } return 0; @@ -929,12 +949,17 @@ out: static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) { + struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; int err; + err = mnt_want_write(mnt); + if (err) + return err; + ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); if (!ofs->workdir) - return 0; + goto out; /* * Upper should support d_type, else whiteouts are visible. Given @@ -944,7 +969,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) */ err = ovl_check_d_type_supported(workpath); if (err < 0) - return err; + goto out; /* * We allowed this configuration and don't want to break users over @@ -967,7 +992,9 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); if (err) { ofs->noxattr = true; - pr_warn("overlayfs: upper fs does not support xattr.\n"); + ofs->config.index = false; + pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); + err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); } @@ -979,7 +1006,15 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); } - return 0; + /* NFS export of r/w mount depends on index */ + if (ofs->config.nfs_export && !ofs->config.index) { + pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + +out: + mnt_drop_write(mnt); + return err; } static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) @@ -1026,11 +1061,16 @@ out: static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, struct path *upperpath) { + struct vfsmount *mnt = ofs->upper_mnt; int err; + err = mnt_want_write(mnt); + if (err) + return err; + /* Verify lower root is upper root origin */ err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, - false, true); + true); if (err) { pr_err("overlayfs: failed to verify upper root origin\n"); goto out; @@ -1038,23 +1078,33 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (ofs->indexdir) { - /* Verify upper root is index dir origin */ - err = ovl_verify_origin(ofs->indexdir, upperpath->dentry, - true, true); + /* + * Verify upper root is exclusively associated with index dir. + * Older kernels stored upper fh in "trusted.overlay.origin" + * xattr. If that xattr exists, verify that it is a match to + * upper dir file handle. In any case, verify or set xattr + * "trusted.overlay.upper" to indicate that index may have + * directory entries. + */ + if (ovl_check_origin_xattr(ofs->indexdir)) { + err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, + upperpath->dentry, true, false); + if (err) + pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); + } + err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); if (err) - pr_err("overlayfs: failed to verify index dir origin\n"); + pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); /* Cleanup bad/stale/orphan index entries */ if (!err) - err = ovl_indexdir_cleanup(ofs->indexdir, - ofs->upper_mnt, - oe->lowerstack, - oe->numlower); + err = ovl_indexdir_cleanup(ofs); } if (err || !ofs->indexdir) pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: + mnt_drop_write(mnt); return err; } @@ -1094,6 +1144,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, ofs->lower_layers[ofs->numlower].mnt = mnt; ofs->lower_layers[ofs->numlower].pseudo_dev = dev; + ofs->lower_layers[ofs->numlower].idx = i + 1; ofs->numlower++; /* Check if all lower layers are on same sb */ @@ -1131,6 +1182,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, } else if (!ofs->config.upperdir && stacklen == 1) { pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); goto out_err; + } else if (!ofs->config.upperdir && ofs->config.nfs_export && + ofs->config.redirect_follow) { + pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; } err = -ENOMEM; @@ -1207,6 +1262,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_err; ofs->config.index = ovl_index_def; + ofs->config.nfs_export = ovl_nfs_export_def; err = ovl_parse_opt((char *) data, &ofs->config); if (err) goto out_err; @@ -1257,13 +1313,26 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_free_oe; - if (!ofs->indexdir) + /* Force r/o mount with no index dir */ + if (!ofs->indexdir) { + dput(ofs->workdir); + ofs->workdir = NULL; sb->s_flags |= SB_RDONLY; + } + } - /* Show index=off/on in /proc/mounts for any of the reasons above */ - if (!ofs->indexdir) + /* Show index=off in /proc/mounts for forced r/o mount */ + if (!ofs->indexdir) { ofs->config.index = false; + if (ofs->upper_mnt && ofs->config.nfs_export) { + pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + } + + if (ofs->config.nfs_export) + sb->s_export_op = &ovl_export_operations; /* Never override disk quota limits or use reserved space */ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); @@ -1279,15 +1348,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!root_dentry) goto out_free_oe; + root_dentry->d_fsdata = oe; + mntput(upperpath.mnt); if (upperpath.dentry) { - oe->has_upper = true; + ovl_dentry_set_upper_alias(root_dentry); if (ovl_is_impuredir(upperpath.dentry)) ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); } - root_dentry->d_fsdata = oe; - /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index d6bb1c9f5e7a..930784a26623 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -63,6 +63,22 @@ struct dentry *ovl_indexdir(struct super_block *sb) return ofs->indexdir; } +/* Index all files on copy up. For now only enabled for NFS export */ +bool ovl_index_all(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.nfs_export && ofs->config.index; +} + +/* Verify lower origin on lookup. For now only enabled for NFS export */ +bool ovl_verify_lower(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.nfs_export && ofs->config.index; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -194,10 +210,24 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache) OVL_I(inode)->cache = cache; } +void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry) +{ + set_bit(flag, &OVL_E(dentry)->flags); +} + +void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry) +{ + clear_bit(flag, &OVL_E(dentry)->flags); +} + +bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry) +{ + return test_bit(flag, &OVL_E(dentry)->flags); +} + bool ovl_dentry_is_opaque(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - return oe->opaque; + return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry); } bool ovl_dentry_is_whiteout(struct dentry *dentry) @@ -207,28 +237,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry) void ovl_dentry_set_opaque(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->opaque = true; + ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); } /* - * For hard links it's possible for ovl_dentry_upper() to return positive, while - * there's no actual upper alias for the inode. Copy up code needs to know - * about the existence of the upper alias, so it can't use ovl_dentry_upper(). + * For hard links and decoded file handles, it's possible for ovl_dentry_upper() + * to return positive, while there's no actual upper alias for the inode. + * Copy up code needs to know about the existence of the upper alias, so it + * can't use ovl_dentry_upper(). */ bool ovl_dentry_has_upper_alias(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->has_upper; + return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry); } void ovl_dentry_set_upper_alias(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->has_upper = true; + ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry); } bool ovl_redirect_dir(struct super_block *sb) @@ -257,7 +282,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry) - OVL_I(inode)->lower = d_inode(lowerdentry); + OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); } @@ -273,7 +298,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) */ smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; - if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + if (inode_unhashed(inode)) { inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } @@ -447,10 +472,32 @@ void ovl_inuse_unlock(struct dentry *dentry) } } +/* + * Does this overlay dentry need to be indexed on copy up? + */ +bool ovl_need_index(struct dentry *dentry) +{ + struct dentry *lower = ovl_dentry_lower(dentry); + + if (!lower || !ovl_indexdir(dentry->d_sb)) + return false; + + /* Index all files for NFS export and consistency verification */ + if (ovl_index_all(dentry->d_sb)) + return true; + + /* Index only lower hardlinks on copy up */ + if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1) + return true; + + return false; +} + /* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { - struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; + struct dentry *indexdir = ovl_indexdir(dentry->d_sb); + struct inode *dir = indexdir->d_inode; struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; @@ -463,7 +510,7 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; inode = d_inode(upperdentry); - if (inode->i_nlink != 1) { + if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", upperdentry, inode->i_ino, inode->i_nlink); /* @@ -481,13 +528,17 @@ static void ovl_cleanup_index(struct dentry *dentry) } inode_lock_nested(dir, I_MUTEX_PARENT); - /* TODO: whiteout instead of cleanup to block future open by handle */ - index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len); + index = lookup_one_len(name.name, indexdir, name.len); err = PTR_ERR(index); - if (!IS_ERR(index)) - err = ovl_cleanup(dir, index); - else + if (IS_ERR(index)) { index = NULL; + } else if (ovl_index_all(dentry->d_sb)) { + /* Whiteout orphan index to block future open by handle */ + err = ovl_cleanup_and_whiteout(indexdir, dir, index); + } else { + /* Cleanup orphan index entries */ + err = ovl_cleanup(dir, index); + } inode_unlock(dir); if (err) @@ -512,16 +563,16 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) const struct cred *old_cred; int err; - if (!d_inode(dentry) || d_is_dir(dentry)) + if (!d_inode(dentry)) return 0; /* * With inodes index is enabled, we store the union overlay nlink - * in an xattr on the index inode. When whiting out lower hardlinks + * in an xattr on the index inode. When whiting out an indexed lower, * we need to decrement the overlay persistent nlink, but before the * first copy up, we have no upper index inode to store the xattr. * - * As a workaround, before whiteout/rename over of a lower hardlink, + * As a workaround, before whiteout/rename over an indexed lower, * copy up to create the upper index. Creating the upper index will * initialize the overlay nlink, so it could be dropped if unlink * or rename succeeds. @@ -529,8 +580,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) * TODO: implement metadata only index copy up when called with * ovl_copy_up_flags(dentry, O_PATH). */ - if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) && - d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) { + if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) { err = ovl_copy_up(dentry); if (err) return err; @@ -540,7 +590,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) if (err) return err; - if (!ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry))) goto out; old_cred = ovl_override_creds(dentry->d_sb); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 65cd8ab60b7a..82a99d366aec 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -227,6 +227,7 @@ extern seqlock_t rename_lock; */ extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); +extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); extern void __d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); @@ -235,6 +236,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); |