diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-04 14:45:21 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-04 14:45:21 +0300 |
commit | 7f879e1a94ac99586abf0659c03f35c1e48279c4 (patch) | |
tree | cf11e7944a7836bb6121f0fa4f3ba47d20d7eafb /fs | |
parent | a45ad71e8995eed2b95c6ef0f4c442da0c4f6677 (diff) | |
parent | a4ac9d45c0cd14a2adc872186431c79804b77dbf (diff) | |
download | linux-7f879e1a94ac99586abf0659c03f35c1e48279c4.tar.xz |
Merge tag 'ovl-update-5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs update from Miklos Szeredi:
- Try to preserve holes in sparse files when copying up, thus saving
disk space and improving performance.
- Fix a performance regression introduced in v4.19 by preserving
asynchronicity of IO when fowarding to underlying layers. Add VFS
helpers to submit async iocbs.
- Fix a regression in lseek(2) introduced in v4.19 that breaks >2G
seeks on 32bit kernels.
- Fix a corner case where st_ino/st_dev was not preserved across copy
up.
- Miscellaneous fixes and cleanups.
* tag 'ovl-update-5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
ovl: fix lseek overflow on 32bit
ovl: add splice file read write helper
ovl: implement async IO routines
vfs: add vfs_iocb_iter_[read|write] helper functions
ovl: layer is const
ovl: fix corner case of non-constant st_dev;st_ino
ovl: fix corner case of conflicting lower layer uuid
ovl: generalize the lower_fs[] array
ovl: simplify ovl_same_sb() helper
ovl: generalize the lower_layers[] array
ovl: improving copy-up efficiency for big sparse file
ovl: use ovl_inode_lock in ovl_llseek()
ovl: use pr_fmt auto generate prefix
ovl: fix wrong WARN_ON() in ovl_cache_update_ino()
Diffstat (limited to 'fs')
-rw-r--r-- | fs/overlayfs/copy_up.c | 43 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 10 | ||||
-rw-r--r-- | fs/overlayfs/export.c | 28 | ||||
-rw-r--r-- | fs/overlayfs/file.c | 162 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 66 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 38 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 24 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 23 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 22 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 233 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 28 | ||||
-rw-r--r-- | fs/read_write.c | 56 |
12 files changed, 505 insertions, 228 deletions
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 6220642fe113..9fc47c2e078d 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -24,7 +24,7 @@ static int ovl_ccup_set(const char *buf, const struct kernel_param *param) { - pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n"); + pr_warn("\"check_copy_up\" module option is obsolete\n"); return 0; } @@ -123,6 +123,9 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) loff_t old_pos = 0; loff_t new_pos = 0; loff_t cloned; + loff_t data_pos = -1; + loff_t hole_len; + bool skip_hole = false; int error = 0; if (len == 0) @@ -144,7 +147,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) goto out; /* Couldn't clone, so now we try to copy the data */ - /* FIXME: copy up sparse files efficiently */ + /* Check if lower fs supports seek operation */ + if (old_file->f_mode & FMODE_LSEEK && + old_file->f_op->llseek) + skip_hole = true; + while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; long bytes; @@ -157,6 +164,36 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) break; } + /* + * Fill zero for hole will cost unnecessary disk space + * and meanwhile slow down the copy-up speed, so we do + * an optimization for hole during copy-up, it relies + * on SEEK_DATA implementation in lower fs so if lower + * fs does not support it, copy-up will behave as before. + * + * Detail logic of hole detection as below: + * When we detect next data position is larger than current + * position we will skip that hole, otherwise we copy + * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually, + * it may not recognize all kind of holes and sometimes + * only skips partial of hole area. However, it will be + * enough for most of the use cases. + */ + + if (skip_hole && data_pos < old_pos) { + data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA); + if (data_pos > old_pos) { + hole_len = data_pos - old_pos; + len -= hole_len; + old_pos = new_pos = data_pos; + continue; + } else if (data_pos == -ENXIO) { + break; + } else if (data_pos < 0) { + skip_hole = false; + } + } + bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); @@ -480,7 +517,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) } inode_lock(temp->d_inode); - if (c->metacopy) + if (S_ISREG(c->stat.mode)) err = ovl_set_size(temp, &c->stat); if (!err) err = ovl_set_attr(temp, &c->stat); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 29abdb1d3b5c..8e57d5372b8f 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -35,7 +35,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) dput(wdentry); if (err) { - pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", + pr_err("cleanup of '%pd2' failed (%i)\n", wdentry, err); } @@ -53,7 +53,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir) temp = lookup_one_len(name, workdir, strlen(name)); if (!IS_ERR(temp) && temp->d_inode) { - pr_err("overlayfs: workdir/%s already exists\n", name); + pr_err("workdir/%s already exists\n", name); dput(temp); temp = ERR_PTR(-EIO); } @@ -134,7 +134,7 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, d = lookup_one_len(dentry->d_name.name, dentry->d_parent, dentry->d_name.len); if (IS_ERR(d)) { - pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n", + pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", dentry, err); return PTR_ERR(d); } @@ -267,7 +267,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, d_instantiate(dentry, inode); if (inode != oip.newinode) { - pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n", + pr_warn_ratelimited("newly created inode found in cache (%pd2)\n", dentry); } @@ -1009,7 +1009,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) spin_unlock(&dentry->d_lock); } else { kfree(redirect); - pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n", + pr_warn_ratelimited("failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 70e55588aedc..6f54d70cef27 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -30,7 +30,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry) } if (err) { - pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n", + pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n", dentry, err); } @@ -244,7 +244,7 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", + pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", dentry, err, buflen, fh ? (int)fh->fb.len : 0, fh ? fh->fb.type : 0); goto out; @@ -358,7 +358,7 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx) */ static struct dentry *ovl_lookup_real_one(struct dentry *connected, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct inode *dir = d_inode(connected); struct dentry *this, *parent = NULL; @@ -406,7 +406,7 @@ out: return this; fail: - pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", real, layer->idx, connected, err); this = ERR_PTR(err); goto out; @@ -414,17 +414,16 @@ fail: static struct dentry *ovl_lookup_real(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer); + const struct ovl_layer *layer); /* * Lookup an indexed or hashed overlay dentry by real inode. */ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct ovl_fs *ofs = sb->s_fs_info; - struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; struct dentry *index = NULL; struct dentry *this = NULL; struct inode *inode; @@ -466,7 +465,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, * recursive call walks back from indexed upper to the topmost * connected/hashed upper parent (or up to root). */ - this = ovl_lookup_real(sb, upper, &upper_layer); + this = ovl_lookup_real(sb, upper, &ofs->layers[0]); dput(upper); } @@ -487,7 +486,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, */ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct dentry *next, *parent = NULL; struct dentry *ancestor = ERR_PTR(-EIO); @@ -540,7 +539,7 @@ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, */ static struct dentry *ovl_lookup_real(struct super_block *sb, struct dentry *real, - struct ovl_layer *layer) + const struct ovl_layer *layer) { struct dentry *connected; int err = 0; @@ -631,7 +630,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, return connected; fail: - pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", real, layer->idx, connected, err); dput(connected); return ERR_PTR(err); @@ -646,8 +645,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb, struct dentry *index) { struct ovl_fs *ofs = sb->s_fs_info; - struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; - struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer; + const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer; struct dentry *real = upper ?: (index ?: lowerpath->dentry); /* @@ -822,7 +820,7 @@ out: return dentry; out_err: - pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", + pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", fh_len, fh_type, flags, err); dentry = ERR_PTR(err); goto out; @@ -831,7 +829,7 @@ out_err: static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); + pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); return ERR_PTR(-EACCES); } diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index e235a635d9ec..a5317216de73 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -9,8 +9,19 @@ #include <linux/xattr.h> #include <linux/uio.h> #include <linux/uaccess.h> +#include <linux/splice.h> +#include <linux/mm.h> +#include <linux/fs.h> #include "overlayfs.h" +struct ovl_aio_req { + struct kiocb iocb; + struct kiocb *orig_iocb; + struct fd fd; +}; + +static struct kmem_cache *ovl_aio_request_cachep; + static char ovl_whatisit(struct inode *inode, struct inode *realinode) { if (realinode != ovl_inode_upper(inode)) @@ -146,7 +157,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) struct inode *inode = file_inode(file); struct fd real; const struct cred *old_cred; - ssize_t ret; + loff_t ret; /* * The two special cases below do not need to involve real fs, @@ -171,7 +182,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) * limitations that are more strict than ->s_maxbytes for specific * files, so we use the real file to perform seeks. */ - inode_lock(inode); + ovl_inode_lock(inode); real.file->f_pos = file->f_pos; old_cred = ovl_override_creds(inode->i_sb); @@ -179,7 +190,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) revert_creds(old_cred); file->f_pos = real.file->f_pos; - inode_unlock(inode); + ovl_inode_unlock(inode); fdput(real); @@ -225,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) return flags; } +static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) +{ + struct kiocb *iocb = &aio_req->iocb; + struct kiocb *orig_iocb = aio_req->orig_iocb; + + if (iocb->ki_flags & IOCB_WRITE) { + struct inode *inode = file_inode(orig_iocb->ki_filp); + + file_end_write(iocb->ki_filp); + ovl_copyattr(ovl_inode_real(inode), inode); + } + + orig_iocb->ki_pos = iocb->ki_pos; + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); +} + +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) +{ + struct ovl_aio_req *aio_req = container_of(iocb, + struct ovl_aio_req, iocb); + struct kiocb *orig_iocb = aio_req->orig_iocb; + + ovl_aio_cleanup_handler(aio_req); + orig_iocb->ki_complete(orig_iocb, res, res2); +} + static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -240,10 +278,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) return ret; old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); + if (is_sync_kiocb(iocb)) { + ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, + ovl_iocb_to_rwf(iocb)); + } else { + struct ovl_aio_req *aio_req; + + ret = -ENOMEM; + aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); + if (!aio_req) + goto out; + + aio_req->fd = real; + real.flags = 0; + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +out: revert_creds(old_cred); - ovl_file_accessed(file); fdput(real); @@ -274,15 +330,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out_unlock; old_cred = ovl_override_creds(file_inode(file)->i_sb); - file_start_write(real.file); - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb)); - file_end_write(real.file); + if (is_sync_kiocb(iocb)) { + file_start_write(real.file); + ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, + ovl_iocb_to_rwf(iocb)); + file_end_write(real.file); + /* Update size */ + ovl_copyattr(ovl_inode_real(inode), inode); + } else { + struct ovl_aio_req *aio_req; + + ret = -ENOMEM; + aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); + if (!aio_req) + goto out; + + file_start_write(real.file); + aio_req->fd = real; + real.flags = 0; + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +out: revert_creds(old_cred); - - /* Update size */ - ovl_copyattr(ovl_inode_real(inode), inode); - fdput(real); out_unlock: @@ -291,6 +365,48 @@ out_unlock: return ret; } +static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + ssize_t ret; + struct fd real; + const struct cred *old_cred; + + ret = ovl_real_fdget(in, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(in)->i_sb); + ret = generic_file_splice_read(real.file, ppos, pipe, len, flags); + revert_creds(old_cred); + + ovl_file_accessed(in); + fdput(real); + return ret; +} + +static ssize_t +ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct fd real; + const struct cred *old_cred; + ssize_t ret; + + ret = ovl_real_fdget(out, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(out)->i_sb); + ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); + revert_creds(old_cred); + + ovl_file_accessed(out); + fdput(real); + return ret; +} + static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fd real; @@ -647,7 +763,25 @@ const struct file_operations ovl_file_operations = { .fadvise = ovl_fadvise, .unlocked_ioctl = ovl_ioctl, .compat_ioctl = ovl_compat_ioctl, + .splice_read = ovl_splice_read, + .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, }; + +int __init ovl_aio_request_cache_init(void) +{ + ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req", + sizeof(struct ovl_aio_req), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ovl_aio_request_cachep) + return -ENOMEM; + + return 0; +} + +void ovl_aio_request_cache_destroy(void) +{ + kmem_cache_destroy(ovl_aio_request_cachep); +} diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b045cf1826fc..79e8994e3bc1 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -75,10 +75,9 @@ out: return err; } -static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, - struct ovl_layer *lower_layer) +static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) { - bool samefs = ovl_same_sb(dentry->d_sb); + bool samefs = ovl_same_fs(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb); if (samefs) { @@ -100,12 +99,10 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, * persistent for a given layer configuration. */ if (stat->ino >> shift) { - pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", + pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", dentry, stat->ino, xinobits); } else { - if (lower_layer) - stat->ino |= ((u64)lower_layer->fsid) << shift; - + stat->ino |= ((u64)fsid) << shift; stat->dev = dentry->d_sb->s_dev; return 0; } @@ -124,15 +121,14 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, */ stat->dev = dentry->d_sb->s_dev; stat->ino = dentry->d_inode->i_ino; - } else if (lower_layer && lower_layer->fsid) { + } else { /* * For non-samefs setup, if we cannot map all layers st_ino * to a unified address space, we need to make sure that st_dev - * is unique per lower fs. Upper layer uses real st_dev and - * lower layers use the unique anonymous bdev assigned to the - * lower fs. + * is unique per underlying fs, so we use the unique anonymous + * bdev assigned to the underlying fs. */ - stat->dev = lower_layer->fs->pseudo_dev; + stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev; } return 0; @@ -146,8 +142,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, struct path realpath; const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); - bool samefs = ovl_same_sb(dentry->d_sb); - struct ovl_layer *lower_layer = NULL; + int fsid = 0; int err; bool metacopy_blocks = false; @@ -168,9 +163,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat, * If lower filesystem supports NFS file handles, this also guaranties * persistent st_ino across mount cycle. */ - if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { + if (!is_dir || ovl_same_dev(dentry->d_sb)) { if (!OVL_TYPE_UPPER(type)) { - lower_layer = ovl_layer_lower(dentry); + fsid = ovl_layer_lower(dentry)->fsid; } else if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; u32 lowermask = STATX_INO | STATX_BLOCKS | @@ -200,14 +195,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || (!ovl_verify_lower(dentry->d_sb) && (is_dir || lowerstat.nlink == 1))) { - lower_layer = ovl_layer_lower(dentry); - /* - * Cannot use origin st_dev;st_ino because - * origin inode content may differ from overlay - * inode content. - */ - if (samefs || lower_layer->fsid) - stat->ino = lowerstat.ino; + fsid = ovl_layer_lower(dentry)->fsid; + stat->ino = lowerstat.ino; } /* @@ -241,7 +230,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, } } - err = ovl_map_dev_ino(dentry, stat, lower_layer); + err = ovl_map_dev_ino(dentry, stat, fsid); if (err) goto out; @@ -527,6 +516,27 @@ static const struct address_space_operations ovl_aops = { * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) * [...] &type->i_mutex_dir_key (stack_depth=0) + * + * Locking order w.r.t ovl_want_write() is important for nested overlayfs. + * + * This chain is valid: + * - inode->i_rwsem (inode_lock[2]) + * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) + * - OVL_I(inode)->lock (ovl_inode_lock[2]) + * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) + * + * And this chain is valid: + * - inode->i_rwsem (inode_lock[2]) + * - OVL_I(inode)->lock (ovl_inode_lock[2]) + * - lowerinode->i_rwsem (inode_lock[1]) + * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) + * + * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is + * held, because it is in reverse order of the non-nested case using the same + * upper fs: + * - inode->i_rwsem (inode_lock[1]) + * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) + * - OVL_I(inode)->lock (ovl_inode_lock[1]) */ #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH @@ -565,7 +575,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). */ - if (ovl_same_sb(inode->i_sb) || xinobits) { + if (ovl_same_dev(inode->i_sb)) { inode->i_ino = ino; if (xinobits && fsid && !(ino >> (64 - xinobits))) inode->i_ino |= (unsigned long)fsid << (64 - xinobits); @@ -698,7 +708,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, return nlink; fail: - pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", + pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n", upperdentry, err); return fallback; } @@ -969,7 +979,7 @@ out: return inode; out_err: - pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); + pr_warn_ratelimited("failed to get inode (%i)\n", err); inode = ERR_PTR(err); goto out; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 76ff66339173..ed9e129fae04 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -141,10 +141,10 @@ out: return NULL; fail: - pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + pr_warn_ratelimited("failed to get origin (%i)\n", res); goto out; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh); goto out; } @@ -322,16 +322,16 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *origin = NULL; int i; - for (i = 0; i < ofs->numlower; i++) { + for (i = 1; i < ofs->numlayer; i++) { /* * If lower fs uuid is not unique among lower fs we cannot match * fh->uuid to layer. */ - if (ofs->lower_layers[i].fsid && - ofs->lower_layers[i].fs->bad_uuid) + if (ofs->layers[i].fsid && + ofs->layers[i].fs->bad_uuid) continue; - origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt, + origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt, connected); if (origin) break; @@ -354,13 +354,13 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, } **stackp = (struct ovl_path){ .dentry = origin, - .layer = &ofs->lower_layers[i] + .layer = &ofs->layers[i] }; return 0; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", + pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); dput(origin); @@ -449,7 +449,7 @@ out: fail: inode = d_inode(real); - pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n", + pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n", is_upper ? "upper" : "origin", real, inode ? inode->i_ino : 0, err); goto out; @@ -475,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) return upper ?: ERR_PTR(-ESTALE); if (!d_is_dir(upper)) { - pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n", + pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n", index, upper); dput(upper); return ERR_PTR(-EIO); @@ -589,12 +589,12 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n", index, d_inode(index)->i_mode & S_IFMT, err); goto out; orphan: - pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n", + pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n", index, d_inode(index)->i_mode & S_IFMT, d_inode(index)->i_nlink); err = -ENOENT; @@ -696,7 +696,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, index = NULL; goto out; } - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" + pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, err); @@ -723,13 +723,13 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, * unlinked, which means that finding a lower origin on lookup * whose index is a whiteout should be treated as an error. */ - pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; } else if (is_dir && verify) { if (!upper) { - pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", + pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", origin, index); goto fail; } @@ -738,7 +738,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, err = ovl_verify_upper(index, upper, false); if (err) { if (err == -ESTALE) { - pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", + pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", upper, origin, index); } goto fail; @@ -885,7 +885,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!d.stop && poe->numlower) { err = -ENOMEM; - stack = kcalloc(ofs->numlower, sizeof(struct ovl_path), + stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path), GFP_KERNEL); if (!stack) goto out_put_upper; @@ -967,7 +967,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, */ err = -EPERM; if (d.redirect && !ofs->config.redirect_follow) { - pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n", + pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n", dentry); goto out_put; } @@ -994,7 +994,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = -EPERM; if (!ofs->config.metacopy) { - pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n", + pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry); goto out_put; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f283b1d69a9e..3623d28aa4fa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -9,6 +9,9 @@ #include <linux/fs.h> #include "ovl_entry.h" +#undef pr_fmt +#define pr_fmt(fmt) "overlayfs: " fmt + enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), @@ -221,7 +224,6 @@ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); -struct super_block *ovl_same_sb(struct super_block *sb); int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); @@ -237,7 +239,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); -struct ovl_layer *ovl_layer_lower(struct dentry *dentry); +const struct ovl_layer *ovl_layer_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); @@ -299,11 +301,21 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); } -static inline unsigned int ovl_xino_bits(struct super_block *sb) +/* All layers on same fs? */ +static inline bool ovl_same_fs(struct super_block *sb) +{ + return OVL_FS(sb)->xino_mode == 0; +} + +/* All overlay inodes have same st_dev? */ +static inline bool ovl_same_dev(struct super_block *sb) { - struct ovl_fs *ofs = sb->s_fs_info; + return OVL_FS(sb)->xino_mode >= 0; +} - return ofs->xino_bits; +static inline unsigned int ovl_xino_bits(struct super_block *sb) +{ + return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0; } static inline int ovl_inode_lock(struct inode *inode) @@ -438,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; +int __init ovl_aio_request_cache_init(void); +void ovl_aio_request_cache_destroy(void); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 28348c44ea5b..89015ea822e7 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -24,6 +24,8 @@ struct ovl_sb { dev_t pseudo_dev; /* Unusable (conflicting) uuid */ bool bad_uuid; + /* Used as a lower layer (but maybe also as upper) */ + bool is_lower; }; struct ovl_layer { @@ -38,18 +40,18 @@ struct ovl_layer { }; struct ovl_path { - struct ovl_layer *layer; + const struct ovl_layer *layer; struct dentry *dentry; }; /* private information held for overlayfs's superblock */ struct ovl_fs { struct vfsmount *upper_mnt; - unsigned int numlower; - /* Number of unique lower sb that differ from upper sb */ - unsigned int numlowerfs; - struct ovl_layer *lower_layers; - struct ovl_sb *lower_fs; + unsigned int numlayer; + /* Number of unique fs among layers including upper fs */ + unsigned int numfs; + const struct ovl_layer *layers; + struct ovl_sb *fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; /* workdir is the 'work' directory under workbasedir */ @@ -71,10 +73,15 @@ struct ovl_fs { struct inode *workbasedir_trap; struct inode *workdir_trap; struct inode *indexdir_trap; - /* Inode numbers in all layers do not use the high xino_bits */ - unsigned int xino_bits; + /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */ + int xino_mode; }; +static inline struct ovl_fs *OVL_FS(struct super_block *sb) +{ + return (struct ovl_fs *)sb->s_fs_info; +} + /* private information held for every overlayfs dentry */ struct ovl_entry { union { diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 47a91c9733a5..40ac9ce2465a 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -441,7 +441,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, const char *name, int namelen) { if (ino >> (64 - xinobits)) { - pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", + pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", namelen, name, ino, xinobits); return ino; } @@ -469,7 +469,7 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) int xinobits = ovl_xino_bits(dir->d_sb); int err = 0; - if (!ovl_same_sb(dir->d_sb) && !xinobits) + if (!ovl_same_dev(dir->d_sb)) goto out; if (p->name[0] == '.') { @@ -504,7 +504,13 @@ get: if (err) goto fail; - WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); + /* + * Directory inode is always on overlay st_dev. + * Non-dir with ovl_same_dev() could be on pseudo st_dev in case + * of xino bits overflow. + */ + WARN_ON_ONCE(S_ISDIR(stat.mode) && + dir->d_sb->s_dev != stat.dev); ino = stat.ino; } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, @@ -518,7 +524,7 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n", + pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n", p->name, err); goto out; } @@ -685,7 +691,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) int err; struct ovl_dir_file *od = file->private_data; struct dentry *dir = file->f_path.dentry; - struct ovl_layer *lower_layer = ovl_layer_lower(dir); + const struct ovl_layer *lower_layer = ovl_layer_lower(dir); struct ovl_readdir_translate rdt = { .ctx.actor = ovl_fill_real, .orig_ctx = ctx, @@ -738,7 +744,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) * entries. */ if (ovl_xino_bits(dentry->d_sb) || - (ovl_same_sb(dentry->d_sb) && + (ovl_same_fs(dentry->d_sb) && (ovl_is_impure_dir(file) || OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { return ovl_iterate_real(file, ctx); @@ -965,7 +971,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) dentry = lookup_one_len(p->name, upper, p->len); if (IS_ERR(dentry)) { - pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", + pr_err("lookup '%s/%.*s' failed (%i)\n", upper->d_name.name, p->len, p->name, (int) PTR_ERR(dentry)); continue; @@ -1147,6 +1153,6 @@ next: out: ovl_cache_free(&list); if (err) - pr_err("overlayfs: failed index dir cleanup (%i)\n", err); + pr_err("failed index dir cleanup (%i)\n", err); return err; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7621ff176d15..319fe0d355b0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -224,14 +224,14 @@ static void ovl_free_fs(struct ovl_fs *ofs) if (ofs->upperdir_locked) ovl_inuse_unlock(ofs->upper_mnt->mnt_root); mntput(ofs->upper_mnt); - for (i = 0; i < ofs->numlower; i++) { - iput(ofs->lower_layers[i].trap); - mntput(ofs->lower_layers[i].mnt); + for (i = 1; i < ofs->numlayer; i++) { + iput(ofs->layers[i].trap); + mntput(ofs->layers[i].mnt); } - for (i = 0; i < ofs->numlowerfs; i++) - free_anon_bdev(ofs->lower_fs[i].pseudo_dev); - kfree(ofs->lower_layers); - kfree(ofs->lower_fs); + kfree(ofs->layers); + for (i = 0; i < ofs->numfs; i++) + free_anon_bdev(ofs->fs[i].pseudo_dev); + kfree(ofs->fs); kfree(ofs->config.lowerdir); kfree(ofs->config.upperdir); @@ -358,7 +358,7 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ofs->config.nfs_export != ovl_nfs_export_def) seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? "on" : "off"); - if (ofs->config.xino != ovl_xino_def()) + if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb)) seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", @@ -462,7 +462,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) if (ovl_redirect_always_follow) config->redirect_follow = true; } else if (strcmp(mode, "nofollow") != 0) { - pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n", + pr_err("bad mount option \"redirect_dir=%s\"\n", mode); return -EINVAL; } @@ -560,14 +560,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) break; default: - pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); + pr_err("unrecognized mount option \"%s\" or missing value\n", + p); return -EINVAL; } } /* Workdir is useless in non-upper mount */ if (!config->upperdir && config->workdir) { - pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", config->workdir); kfree(config->workdir); config->workdir = NULL; @@ -587,7 +588,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) /* Resolve metacopy -> redirect_dir dependency */ if (config->metacopy && !config->redirect_dir) { if (metacopy_opt && redirect_opt) { - pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n", + pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", config->redirect_mode); return -EINVAL; } @@ -596,7 +597,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) * There was an explicit redirect_dir=... that resulted * in this conflict. */ - pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n", + pr_info("disabling metacopy due to redirect_dir=%s\n", config->redirect_mode); config->metacopy = false; } else { @@ -692,7 +693,7 @@ out_unlock: out_dput: dput(work); out_err: - pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", ofs->config.workdir, name, -err); work = NULL; goto out_unlock; @@ -716,21 +717,21 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) int err = -EINVAL; if (!*name) { - pr_err("overlayfs: empty lowerdir\n"); + pr_err("empty lowerdir\n"); goto out; } err = kern_path(name, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("failed to resolve '%s': %i\n", name, err); goto out; } err = -EINVAL; if (ovl_dentry_weird(path->dentry)) { - pr_err("overlayfs: filesystem on '%s' not supported\n", name); + pr_err("filesystem on '%s' not supported\n", name); goto out_put; } if (!d_is_dir(path->dentry)) { - pr_err("overlayfs: '%s' not a directory\n", name); + pr_err("'%s' not a directory\n", name); goto out_put; } return 0; @@ -752,7 +753,7 @@ static int ovl_mount_dir(const char *name, struct path *path) if (!err) if (ovl_dentry_remote(path->dentry)) { - pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", + pr_err("filesystem on '%s' not supported as upperdir\n", tmp); path_put_init(path); err = -EINVAL; @@ -769,7 +770,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, int err = vfs_statfs(path, &statfs); if (err) - pr_err("overlayfs: statfs failed on '%s'\n", name); + pr_err("statfs failed on '%s'\n", name); else ofs->namelen = max(ofs->namelen, statfs.f_namelen); @@ -804,13 +805,13 @@ static int ovl_lower_dir(const char *name, struct path *path, (ofs->config.index && ofs->config.upperdir)) && !fh_type) { ofs->config.index = false; ofs->config.nfs_export = false; - pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", + pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", name); } /* Check if lower fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) - ofs->xino_bits = 0; + ofs->xino_mode = -1; return 0; @@ -996,7 +997,7 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, err = PTR_ERR_OR_ZERO(trap); if (err) { if (err == -ELOOP) - pr_err("overlayfs: conflicting %s path\n", name); + pr_err("conflicting %s path\n", name); return err; } @@ -1013,11 +1014,11 @@ static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) { if (ofs->config.index) { - pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", + pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", name); return -EBUSY; } else { - pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", + pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", name); return 0; } @@ -1035,7 +1036,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, /* Upper fs should not be r/o */ if (sb_rdonly(upperpath->mnt->mnt_sb)) { - pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); + pr_err("upper fs is r/o, try multi-lower layers mount\n"); err = -EINVAL; goto out; } @@ -1052,7 +1053,7 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, upper_mnt = clone_private_mount(upperpath); err = PTR_ERR(upper_mnt); if (IS_ERR(upper_mnt)) { - pr_err("overlayfs: failed to clone upperpath\n"); + pr_err("failed to clone upperpath\n"); goto out; } @@ -1108,7 +1109,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, * kernel upgrade. So warn instead of erroring out. */ if (!err) - pr_warn("overlayfs: upper fs needs to support d_type.\n"); + pr_warn("upper fs needs to support d_type.\n"); /* Check if upper/work fs supports O_TMPFILE */ temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); @@ -1116,7 +1117,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, if (ofs->tmpfile) dput(temp); else - pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + pr_warn("upper fs does not support tmpfile.\n"); /* * Check if upper/work fs supports trusted.overlay.* xattr @@ -1126,7 +1127,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, ofs->noxattr = true; ofs->config.index = false; ofs->config.metacopy = false; - pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); + pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); @@ -1136,16 +1137,16 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); if (ofs->config.index && !fh_type) { ofs->config.index = false; - pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); + pr_warn("upper fs does not support file handles, falling back to index=off.\n"); } /* Check if upper fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) - ofs->xino_bits = 0; + ofs->xino_mode = -1; /* NFS export of r/w mount depends on index */ if (ofs->config.nfs_export && !ofs->config.index) { - pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); + pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } out: @@ -1165,11 +1166,11 @@ static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, err = -EINVAL; if (upperpath->mnt != workpath.mnt) { - pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); + pr_err("workdir and upperdir must reside under the same mount\n"); goto out; } if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { - pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); + pr_err("workdir and upperdir must be separate subtrees\n"); goto out; } @@ -1210,7 +1211,7 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, true); if (err) { - pr_err("overlayfs: failed to verify upper root origin\n"); + pr_err("failed to verify upper root origin\n"); goto out; } @@ -1233,18 +1234,18 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, upperpath->dentry, true, false); if (err) - pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); + pr_err("failed to verify index dir 'origin' xattr\n"); } err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); if (err) - pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); + pr_err("failed to verify index dir 'upper' xattr\n"); /* Cleanup bad/stale/orphan index entries */ if (!err) err = ovl_indexdir_cleanup(ofs); } if (err || !ofs->indexdir) - pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); + pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: mnt_drop_write(mnt); @@ -1258,7 +1259,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) if (!ofs->config.nfs_export && !ofs->upper_mnt) return true; - for (i = 0; i < ofs->numlowerfs; i++) { + for (i = 0; i < ofs->numfs; i++) { /* * We use uuid to associate an overlay lower file handle with a * lower layer, so we can accept lower fs with null uuid as long @@ -1266,8 +1267,9 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * if we detect multiple lower fs with the same uuid, we * disable lower file handle decoding on all of them. */ - if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) { - ofs->lower_fs[i].bad_uuid = true; + if (ofs->fs[i].is_lower && + uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { + ofs->fs[i].bad_uuid = true; return false; } } @@ -1283,13 +1285,9 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) int err; bool bad_uuid = false; - /* fsid 0 is reserved for upper fs even with non upper overlay */ - if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb) - return 0; - - for (i = 0; i < ofs->numlowerfs; i++) { - if (ofs->lower_fs[i].sb == sb) - return i + 1; + for (i = 0; i < ofs->numfs; i++) { + if (ofs->fs[i].sb == sb) + return i; } if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { @@ -1297,7 +1295,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) if (ofs->config.index || ofs->config.nfs_export) { ofs->config.index = false; ofs->config.nfs_export = false; - pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", + pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", uuid_is_null(&sb->s_uuid) ? "null" : "conflicting", path->dentry); @@ -1306,35 +1304,59 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) err = get_anon_bdev(&dev); if (err) { - pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); + pr_err("failed to get anonymous bdev for lowerpath\n"); return err; } - ofs->lower_fs[ofs->numlowerfs].sb = sb; - ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev; - ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid; - ofs->numlowerfs++; + ofs->fs[ofs->numfs].sb = sb; + ofs->fs[ofs->numfs].pseudo_dev = dev; + ofs->fs[ofs->numfs].bad_uuid = bad_uuid; - return ofs->numlowerfs; + return ofs->numfs++; } -static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, - struct path *stack, unsigned int numlower) +static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, + struct path *stack, unsigned int numlower) { int err; unsigned int i; + struct ovl_layer *layers; err = -ENOMEM; - ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), - GFP_KERNEL); - if (ofs->lower_layers == NULL) + layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); + if (!layers) goto out; + ofs->layers = layers; - ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb), - GFP_KERNEL); - if (ofs->lower_fs == NULL) + ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); + if (ofs->fs == NULL) goto out; + /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ + ofs->numfs++; + + layers[0].mnt = ofs->upper_mnt; + layers[0].idx = 0; + layers[0].fsid = 0; + ofs->numlayer = 1; + + /* + * All lower layers that share the same fs as upper layer, use the same + * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower + * only overlay to simplify ovl_fs_free(). + * is_lower will be set if upper fs is shared with a lower layer. + */ + err = get_anon_bdev(&ofs->fs[0].pseudo_dev); + if (err) { + pr_err("failed to get anonymous bdev for upper fs\n"); + goto out; + } + + if (ofs->upper_mnt) { + ofs->fs[0].sb = ofs->upper_mnt->mnt_sb; + ofs->fs[0].is_lower = false; + } + for (i = 0; i < numlower; i++) { struct vfsmount *mnt; struct inode *trap; @@ -1357,7 +1379,7 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, mnt = clone_private_mount(&stack[i]); err = PTR_ERR(mnt); if (IS_ERR(mnt)) { - pr_err("overlayfs: failed to clone lowerpath\n"); + pr_err("failed to clone lowerpath\n"); iput(trap); goto out; } @@ -1368,15 +1390,13 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, */ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; - ofs->lower_layers[ofs->numlower].trap = trap; - ofs->lower_layers[ofs->numlower].mnt = mnt; - ofs->lower_layers[ofs->numlower].idx = i + 1; - ofs->lower_layers[ofs->numlower].fsid = fsid; - if (fsid) { - ofs->lower_layers[ofs->numlower].fs = - &ofs->lower_fs[fsid - 1]; - } - ofs->numlower++; + layers[ofs->numlayer].trap = trap; + layers[ofs->numlayer].mnt = mnt; + layers[ofs->numlayer].idx = ofs->numlayer; + layers[ofs->numlayer].fsid = fsid; + layers[ofs->numlayer].fs = &ofs->fs[fsid]; + ofs->numlayer++; + ofs->fs[fsid].is_lower = true; } /* @@ -1387,22 +1407,23 @@ static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, * bits reserved for fsid, it emits a warning and uses the original * inode number. */ - if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) { - ofs->xino_bits = 0; - ofs->config.xino = OVL_XINO_OFF; - } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) { + if (ofs->numfs - !ofs->upper_mnt == 1) { + if (ofs->config.xino == OVL_XINO_ON) + pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); + ofs->xino_mode = 0; + } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { /* - * This is a roundup of number of bits needed for numlowerfs+1 - * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for - * upper fs even with non upper overlay. + * This is a roundup of number of bits needed for encoding + * fsid, where fsid 0 is reserved for upper fs even with + * lower only overlay. */ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); - ofs->xino_bits = ilog2(ofs->numlowerfs) + 1; + ofs->xino_mode = ilog2(ofs->numfs - 1) + 1; } - if (ofs->xino_bits) { - pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n", - ofs->xino_bits); + if (ofs->xino_mode > 0) { + pr_info("\"xino\" feature enabled using %d upper inode bits.\n", + ofs->xino_mode); } err = 0; @@ -1428,15 +1449,15 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); if (stacklen > OVL_MAX_STACK) { - pr_err("overlayfs: too many lower directories, limit is %d\n", + pr_err("too many lower directories, limit is %d\n", OVL_MAX_STACK); goto out_err; } else if (!ofs->config.upperdir && stacklen == 1) { - pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); + pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); goto out_err; } else if (!ofs->config.upperdir && ofs->config.nfs_export && ofs->config.redirect_follow) { - pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } @@ -1459,11 +1480,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, err = -EINVAL; sb->s_stack_depth++; if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { - pr_err("overlayfs: maximum fs stacking depth exceeded\n"); + pr_err("maximum fs stacking depth exceeded\n"); goto out_err; } - err = ovl_get_lower_layers(sb, ofs, stack, numlower); + err = ovl_get_layers(sb, ofs, stack, numlower); if (err) goto out_err; @@ -1474,7 +1495,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = dget(stack[i].dentry); - oe->lowerstack[i].layer = &ofs->lower_layers[i]; + oe->lowerstack[i].layer = &ofs->layers[i+1]; } if (remote) @@ -1515,7 +1536,7 @@ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, while (!err && parent != next) { if (ovl_lookup_trap_inode(sb, parent)) { err = -ELOOP; - pr_err("overlayfs: overlapping %s path\n", name); + pr_err("overlapping %s path\n", name); } else if (ovl_is_inuse(parent)) { err = ovl_report_in_use(ofs, name); } @@ -1555,9 +1576,9 @@ static int ovl_check_overlapping_layers(struct super_block *sb, return err; } - for (i = 0; i < ofs->numlower; i++) { + for (i = 1; i < ofs->numlayer; i++) { err = ovl_check_layer(sb, ofs, - ofs->lower_layers[i].mnt->mnt_root, + ofs->layers[i].mnt->mnt_root, "lowerdir"); if (err) return err; @@ -1595,7 +1616,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; if (!ofs->config.lowerdir) { if (!silent) - pr_err("overlayfs: missing 'lowerdir'\n"); + pr_err("missing 'lowerdir'\n"); goto out_err; } @@ -1603,14 +1624,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ if (ofs->config.xino != OVL_XINO_OFF) - ofs->xino_bits = BITS_PER_LONG - 32; + ofs->xino_mode = BITS_PER_LONG - 32; /* alloc/destroy_inode needed for setting up traps in inode cache */ sb->s_op = &ovl_super_operations; if (ofs->config.upperdir) { if (!ofs->config.workdir) { - pr_err("overlayfs: missing 'workdir'\n"); + pr_err("missing 'workdir'\n"); goto out_err; } @@ -1660,13 +1681,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ofs->indexdir) { ofs->config.index = false; if (ofs->upper_mnt && ofs->config.nfs_export) { - pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); + pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } } if (ofs->config.metacopy && ofs->config.nfs_export) { - pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); + pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } @@ -1749,9 +1770,15 @@ static int __init ovl_init(void) if (ovl_inode_cachep == NULL) return -ENOMEM; - err = register_filesystem(&ovl_fs_type); - if (err) - kmem_cache_destroy(ovl_inode_cachep); + err = ovl_aio_request_cache_init(); + if (!err) { + err = register_filesystem(&ovl_fs_type); + if (!err) + return 0; + + ovl_aio_request_cache_destroy(); + } + kmem_cache_destroy(ovl_inode_cachep); return err; } @@ -1766,7 +1793,7 @@ static void __exit ovl_exit(void) */ rcu_barrier(); kmem_cache_destroy(ovl_inode_cachep); - + ovl_aio_request_cache_destroy(); } module_init(ovl_init); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index f5678a3f8350..ea005085803f 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -40,18 +40,6 @@ const struct cred *ovl_override_creds(struct super_block *sb) return override_creds(ofs->creator_cred); } -struct super_block *ovl_same_sb(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - if (!ofs->numlowerfs) - return ofs->upper_mnt->mnt_sb; - else if (ofs->numlowerfs == 1 && !ofs->upper_mnt) - return ofs->lower_fs[0].sb; - else - return NULL; -} - /* * Check if underlying fs supports file handles and try to determine encoding * type, in order to deduce maximum inode number used by fs. @@ -198,7 +186,7 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry) return oe->numlower ? oe->lowerstack[0].dentry : NULL; } -struct ovl_layer *ovl_layer_lower(struct dentry *dentry) +const struct ovl_layer *ovl_layer_lower(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; @@ -576,7 +564,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, err = ovl_do_setxattr(upperdentry, name, value, size, 0); if (err == -EOPNOTSUPP) { - pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + pr_warn("cannot set %s xattr on upper\n", name); ofs->noxattr = true; return xerr; } @@ -700,7 +688,7 @@ static void ovl_cleanup_index(struct dentry *dentry) inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { - pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", + pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", upperdentry, inode->i_ino, inode->i_nlink); /* * We either have a bug with persistent union nlink or a lower @@ -739,7 +727,7 @@ out: return; fail: - pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err); + pr_err("cleanup index of '%pd2' failed (%i)\n", dentry, err); goto out; } @@ -830,7 +818,7 @@ int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) err_unlock: unlock_rename(workdir, upperdir); err: - pr_err("overlayfs: failed to lock workdir+upperdir\n"); + pr_err("failed to lock workdir+upperdir\n"); return -EIO; } @@ -852,7 +840,7 @@ int ovl_check_metacopy_xattr(struct dentry *dentry) return 1; out: - pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res); + pr_warn_ratelimited("failed to get metacopy (%i)\n", res); return res; } @@ -899,7 +887,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value, return res; fail: - pr_warn_ratelimited("overlayfs: failed to get xattr %s: err=%zi)\n", + pr_warn_ratelimited("failed to get xattr %s: err=%zi)\n", name, res); kfree(buf); return res; @@ -931,7 +919,7 @@ char *ovl_get_redirect_xattr(struct dentry *dentry, int padding) return buf; invalid: - pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); + pr_warn_ratelimited("invalid redirect (%s)\n", buf); res = -EINVAL; kfree(buf); return ERR_PTR(res); diff --git a/fs/read_write.c b/fs/read_write.c index 7458fccc59e1..59d819c5b92e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -939,6 +939,34 @@ out: return ret; } +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, + struct iov_iter *iter) +{ + size_t tot_len; + ssize_t ret = 0; + + if (!file->f_op->read_iter) + return -EINVAL; + if (!(file->f_mode & FMODE_READ)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_READ)) + return -EINVAL; + + tot_len = iov_iter_count(iter); + if (!tot_len) + goto out; + ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); + if (ret < 0) + return ret; + + ret = call_read_iter(file, iocb, iter); +out: + if (ret >= 0) + fsnotify_access(file); + return ret; +} +EXPORT_SYMBOL(vfs_iocb_iter_read); + ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags) { @@ -975,6 +1003,34 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, return ret; } +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, + struct iov_iter *iter) +{ + size_t tot_len; + ssize_t ret = 0; + + if (!file->f_op->write_iter) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EINVAL; + + tot_len = iov_iter_count(iter); + if (!tot_len) + return 0; + ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); + if (ret < 0) + return ret; + + ret = call_write_iter(file, iocb, iter); + if (ret > 0) + fsnotify_modify(file); + + return ret; +} +EXPORT_SYMBOL(vfs_iocb_iter_write); + ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags) { |