diff options
Diffstat (limited to 'fs/cachefiles')
-rw-r--r-- | fs/cachefiles/Kconfig | 7 | ||||
-rw-r--r-- | fs/cachefiles/Makefile | 6 | ||||
-rw-r--r-- | fs/cachefiles/bind.c | 278 | ||||
-rw-r--r-- | fs/cachefiles/cache.c | 378 | ||||
-rw-r--r-- | fs/cachefiles/daemon.c | 180 | ||||
-rw-r--r-- | fs/cachefiles/error_inject.c | 46 | ||||
-rw-r--r-- | fs/cachefiles/interface.c | 747 | ||||
-rw-r--r-- | fs/cachefiles/internal.h | 270 | ||||
-rw-r--r-- | fs/cachefiles/io.c | 330 | ||||
-rw-r--r-- | fs/cachefiles/key.c | 201 | ||||
-rw-r--r-- | fs/cachefiles/main.c | 22 | ||||
-rw-r--r-- | fs/cachefiles/namei.c | 1223 | ||||
-rw-r--r-- | fs/cachefiles/rdwr.c | 972 | ||||
-rw-r--r-- | fs/cachefiles/security.c | 2 | ||||
-rw-r--r-- | fs/cachefiles/volume.c | 139 | ||||
-rw-r--r-- | fs/cachefiles/xattr.c | 421 |
16 files changed, 2172 insertions, 3050 deletions
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index 6827b40f7ddc..719faeeda168 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -19,3 +19,10 @@ config CACHEFILES_DEBUG caching on files module. If this is set, the debugging output may be enabled by setting bits in /sys/modules/cachefiles/parameter/debug or by including a debugging specifier in /etc/cachefilesd.conf. + +config CACHEFILES_ERROR_INJECTION + bool "Provide error injection for cachefiles" + depends on CACHEFILES && SYSCTL + help + This permits error injection to be enabled in cachefiles whilst a + cache is in service. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile index 02fd17731769..16d811f1a2fa 100644 --- a/fs/cachefiles/Makefile +++ b/fs/cachefiles/Makefile @@ -4,15 +4,17 @@ # cachefiles-y := \ - bind.o \ + cache.o \ daemon.o \ interface.o \ io.o \ key.o \ main.o \ namei.o \ - rdwr.o \ security.o \ + volume.o \ xattr.o +cachefiles-$(CONFIG_CACHEFILES_ERROR_INJECTION) += error_inject.o + obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c deleted file mode 100644 index 146291be6263..000000000000 --- a/fs/cachefiles/bind.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Bind and unbind a cache from the filesystem backing it - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/completion.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/namei.h> -#include <linux/mount.h> -#include <linux/statfs.h> -#include <linux/ctype.h> -#include <linux/xattr.h> -#include "internal.h" - -static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches); - -/* - * bind a directory as a cache - */ -int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) -{ - _enter("{%u,%u,%u,%u,%u,%u},%s", - cache->frun_percent, - cache->fcull_percent, - cache->fstop_percent, - cache->brun_percent, - cache->bcull_percent, - cache->bstop_percent, - args); - - /* start by checking things over */ - ASSERT(cache->fstop_percent >= 0 && - cache->fstop_percent < cache->fcull_percent && - cache->fcull_percent < cache->frun_percent && - cache->frun_percent < 100); - - ASSERT(cache->bstop_percent >= 0 && - cache->bstop_percent < cache->bcull_percent && - cache->bcull_percent < cache->brun_percent && - cache->brun_percent < 100); - - if (*args) { - pr_err("'bind' command doesn't take an argument\n"); - return -EINVAL; - } - - if (!cache->rootdirname) { - pr_err("No cache directory specified\n"); - return -EINVAL; - } - - /* don't permit already bound caches to be re-bound */ - if (test_bit(CACHEFILES_READY, &cache->flags)) { - pr_err("Cache already bound\n"); - return -EBUSY; - } - - /* make sure we have copies of the tag and dirname strings */ - if (!cache->tag) { - /* the tag string is released by the fops->release() - * function, so we don't release it on error here */ - cache->tag = kstrdup("CacheFiles", GFP_KERNEL); - if (!cache->tag) - return -ENOMEM; - } - - /* add the cache */ - return cachefiles_daemon_add_cache(cache); -} - -/* - * add a cache - */ -static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) -{ - struct cachefiles_object *fsdef; - struct path path; - struct kstatfs stats; - struct dentry *graveyard, *cachedir, *root; - const struct cred *saved_cred; - int ret; - - _enter(""); - - /* we want to work under the module's security ID */ - ret = cachefiles_get_security_ID(cache); - if (ret < 0) - return ret; - - cachefiles_begin_secure(cache, &saved_cred); - - /* allocate the root index object */ - ret = -ENOMEM; - - fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); - if (!fsdef) - goto error_root_object; - - ASSERTCMP(fsdef->backer, ==, NULL); - - atomic_set(&fsdef->usage, 1); - fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; - - /* look up the directory at the root of the cache */ - ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); - if (ret < 0) - goto error_open_root; - - cache->mnt = path.mnt; - root = path.dentry; - - ret = -EINVAL; - if (is_idmapped_mnt(path.mnt)) { - pr_warn("File cache on idmapped mounts not supported"); - goto error_unsupported; - } - - /* check parameters */ - ret = -EOPNOTSUPP; - if (d_is_negative(root) || - !d_backing_inode(root)->i_op->lookup || - !d_backing_inode(root)->i_op->mkdir || - !(d_backing_inode(root)->i_opflags & IOP_XATTR) || - !root->d_sb->s_op->statfs || - !root->d_sb->s_op->sync_fs) - goto error_unsupported; - - ret = -EROFS; - if (sb_rdonly(root->d_sb)) - goto error_unsupported; - - /* determine the security of the on-disk cache as this governs - * security ID of files we create */ - ret = cachefiles_determine_cache_security(cache, root, &saved_cred); - if (ret < 0) - goto error_unsupported; - - /* get the cache size and blocksize */ - ret = vfs_statfs(&path, &stats); - if (ret < 0) - goto error_unsupported; - - ret = -ERANGE; - if (stats.f_bsize <= 0) - goto error_unsupported; - - ret = -EOPNOTSUPP; - if (stats.f_bsize > PAGE_SIZE) - goto error_unsupported; - - cache->bsize = stats.f_bsize; - cache->bshift = 0; - if (stats.f_bsize < PAGE_SIZE) - cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); - - _debug("blksize %u (shift %u)", - cache->bsize, cache->bshift); - - _debug("size %llu, avail %llu", - (unsigned long long) stats.f_blocks, - (unsigned long long) stats.f_bavail); - - /* set up caching limits */ - do_div(stats.f_files, 100); - cache->fstop = stats.f_files * cache->fstop_percent; - cache->fcull = stats.f_files * cache->fcull_percent; - cache->frun = stats.f_files * cache->frun_percent; - - _debug("limits {%llu,%llu,%llu} files", - (unsigned long long) cache->frun, - (unsigned long long) cache->fcull, - (unsigned long long) cache->fstop); - - stats.f_blocks >>= cache->bshift; - do_div(stats.f_blocks, 100); - cache->bstop = stats.f_blocks * cache->bstop_percent; - cache->bcull = stats.f_blocks * cache->bcull_percent; - cache->brun = stats.f_blocks * cache->brun_percent; - - _debug("limits {%llu,%llu,%llu} blocks", - (unsigned long long) cache->brun, - (unsigned long long) cache->bcull, - (unsigned long long) cache->bstop); - - /* get the cache directory and check its type */ - cachedir = cachefiles_get_directory(cache, root, "cache"); - if (IS_ERR(cachedir)) { - ret = PTR_ERR(cachedir); - goto error_unsupported; - } - - fsdef->dentry = cachedir; - fsdef->fscache.cookie = NULL; - - ret = cachefiles_check_object_type(fsdef); - if (ret < 0) - goto error_unsupported; - - /* get the graveyard directory */ - graveyard = cachefiles_get_directory(cache, root, "graveyard"); - if (IS_ERR(graveyard)) { - ret = PTR_ERR(graveyard); - goto error_unsupported; - } - - cache->graveyard = graveyard; - - /* publish the cache */ - fscache_init_cache(&cache->cache, - &cachefiles_cache_ops, - "%s", - fsdef->dentry->d_sb->s_id); - - fscache_object_init(&fsdef->fscache, &fscache_fsdef_index, - &cache->cache); - - ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); - if (ret < 0) - goto error_add_cache; - - /* done */ - set_bit(CACHEFILES_READY, &cache->flags); - dput(root); - - pr_info("File cache on %s registered\n", cache->cache.identifier); - - /* check how much space the cache has */ - cachefiles_has_space(cache, 0, 0); - cachefiles_end_secure(cache, saved_cred); - return 0; - -error_add_cache: - dput(cache->graveyard); - cache->graveyard = NULL; -error_unsupported: - mntput(cache->mnt); - cache->mnt = NULL; - dput(fsdef->dentry); - fsdef->dentry = NULL; - dput(root); -error_open_root: - kmem_cache_free(cachefiles_object_jar, fsdef); -error_root_object: - cachefiles_end_secure(cache, saved_cred); - pr_err("Failed to register: %d\n", ret); - return ret; -} - -/* - * unbind a cache on fd release - */ -void cachefiles_daemon_unbind(struct cachefiles_cache *cache) -{ - _enter(""); - - if (test_bit(CACHEFILES_READY, &cache->flags)) { - pr_info("File cache on %s unregistering\n", - cache->cache.identifier); - - fscache_withdraw_cache(&cache->cache); - } - - dput(cache->graveyard); - mntput(cache->mnt); - - kfree(cache->rootdirname); - kfree(cache->secctx); - kfree(cache->tag); - - _leave(""); -} diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c new file mode 100644 index 000000000000..ce4d4785003c --- /dev/null +++ b/fs/cachefiles/cache.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Manage high-level VFS aspects of a cache. + * + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/statfs.h> +#include <linux/namei.h> +#include "internal.h" + +/* + * Bring a cache online. + */ +int cachefiles_add_cache(struct cachefiles_cache *cache) +{ + struct fscache_cache *cache_cookie; + struct path path; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + const struct cred *saved_cred; + int ret; + + _enter(""); + + cache_cookie = fscache_acquire_cache(cache->tag); + if (IS_ERR(cache_cookie)) + return PTR_ERR(cache_cookie); + + /* we want to work under the module's security ID */ + ret = cachefiles_get_security_ID(cache); + if (ret < 0) + goto error_getsec; + + cachefiles_begin_secure(cache, &saved_cred); + + /* look up the directory at the root of the cache */ + ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); + if (ret < 0) + goto error_open_root; + + cache->mnt = path.mnt; + root = path.dentry; + + ret = -EINVAL; + if (is_idmapped_mnt(path.mnt)) { + pr_warn("File cache on idmapped mounts not supported"); + goto error_unsupported; + } + + /* check parameters */ + ret = -EOPNOTSUPP; + if (d_is_negative(root) || + !d_backing_inode(root)->i_op->lookup || + !d_backing_inode(root)->i_op->mkdir || + !(d_backing_inode(root)->i_opflags & IOP_XATTR) || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs || + root->d_sb->s_blocksize > PAGE_SIZE) + goto error_unsupported; + + ret = -EROFS; + if (sb_rdonly(root->d_sb)) + goto error_unsupported; + + /* determine the security of the on-disk cache as this governs + * security ID of files we create */ + ret = cachefiles_determine_cache_security(cache, root, &saved_cred); + if (ret < 0) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = vfs_statfs(&path, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = 0; + if (stats.f_bsize < PAGE_SIZE) + cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", + (unsigned long long) stats.f_blocks, + (unsigned long long) stats.f_bavail); + + /* set up caching limits */ + do_div(stats.f_files, 100); + cache->fstop = stats.f_files * cache->fstop_percent; + cache->fcull = stats.f_files * cache->fcull_percent; + cache->frun = stats.f_files * cache->frun_percent; + + _debug("limits {%llu,%llu,%llu} files", + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop); + + stats.f_blocks >>= cache->bshift; + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu} blocks", + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop); + + /* get the cache directory and check its type */ + cachedir = cachefiles_get_directory(cache, root, "cache", NULL); + if (IS_ERR(cachedir)) { + ret = PTR_ERR(cachedir); + goto error_unsupported; + } + + cache->store = cachedir; + + /* get the graveyard directory */ + graveyard = cachefiles_get_directory(cache, root, "graveyard", NULL); + if (IS_ERR(graveyard)) { + ret = PTR_ERR(graveyard); + goto error_unsupported; + } + + cache->graveyard = graveyard; + cache->cache = cache_cookie; + + ret = fscache_add_cache(cache_cookie, &cachefiles_cache_ops, cache); + if (ret < 0) + goto error_add_cache; + + /* done */ + set_bit(CACHEFILES_READY, &cache->flags); + dput(root); + + pr_info("File cache on %s registered\n", cache_cookie->name); + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); + cachefiles_end_secure(cache, saved_cred); + _leave(" = 0 [%px]", cache->cache); + return 0; + +error_add_cache: + cachefiles_put_directory(cache->graveyard); + cache->graveyard = NULL; +error_unsupported: + cachefiles_put_directory(cache->store); + cache->store = NULL; + mntput(cache->mnt); + cache->mnt = NULL; + dput(root); +error_open_root: + cachefiles_end_secure(cache, saved_cred); +error_getsec: + fscache_relinquish_cache(cache_cookie); + cache->cache = NULL; + pr_err("Failed to register: %d\n", ret); + return ret; +} + +/* + * See if we have space for a number of pages and/or a number of files in the + * cache + */ +int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr, + enum cachefiles_has_space_for reason) +{ + struct kstatfs stats; + u64 b_avail, b_writing; + int ret; + + struct path path = { + .mnt = cache->mnt, + .dentry = cache->mnt->mnt_root, + }; + + //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", + // (unsigned long long) cache->frun, + // (unsigned long long) cache->fcull, + // (unsigned long long) cache->fstop, + // (unsigned long long) cache->brun, + // (unsigned long long) cache->bcull, + // (unsigned long long) cache->bstop, + // fnr, bnr); + + /* find out how many pages of blockdev are available */ + memset(&stats, 0, sizeof(stats)); + + ret = vfs_statfs(&path, &stats); + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(path.dentry), ret, + cachefiles_trace_statfs_error); + if (ret == -EIO) + cachefiles_io_error(cache, "statfs failed"); + _leave(" = %d", ret); + return ret; + } + + b_avail = stats.f_bavail >> cache->bshift; + b_writing = atomic_long_read(&cache->b_writing); + if (b_avail > b_writing) + b_avail -= b_writing; + else + b_avail = 0; + + //_debug("avail %llu,%llu", + // (unsigned long long)stats.f_ffree, + // (unsigned long long)b_avail); + + /* see if there is sufficient space */ + if (stats.f_ffree > fnr) + stats.f_ffree -= fnr; + else + stats.f_ffree = 0; + + if (b_avail > bnr) + b_avail -= bnr; + else + b_avail = 0; + + ret = -ENOBUFS; + if (stats.f_ffree < cache->fstop || + b_avail < cache->bstop) + goto stop_and_begin_cull; + + ret = 0; + if (stats.f_ffree < cache->fcull || + b_avail < cache->bcull) + goto begin_cull; + + if (test_bit(CACHEFILES_CULLING, &cache->flags) && + stats.f_ffree >= cache->frun && + b_avail >= cache->brun && + test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) + ) { + _debug("cease culling"); + cachefiles_state_changed(cache); + } + + //_leave(" = 0"); + return 0; + +stop_and_begin_cull: + switch (reason) { + case cachefiles_has_space_for_write: + fscache_count_no_write_space(); + break; + case cachefiles_has_space_for_create: + fscache_count_no_create_space(); + break; + default: + break; + } +begin_cull: + if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("### CULL CACHE ###"); + cachefiles_state_changed(cache); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Mark all the objects as being out of service and queue them all for cleanup. + */ +static void cachefiles_withdraw_objects(struct cachefiles_cache *cache) +{ + struct cachefiles_object *object; + unsigned int count = 0; + + _enter(""); + + spin_lock(&cache->object_list_lock); + + while (!list_empty(&cache->object_list)) { + object = list_first_entry(&cache->object_list, + struct cachefiles_object, cache_link); + cachefiles_see_object(object, cachefiles_obj_see_withdrawal); + list_del_init(&object->cache_link); + fscache_withdraw_cookie(object->cookie); + count++; + if ((count & 63) == 0) { + spin_unlock(&cache->object_list_lock); + cond_resched(); + spin_lock(&cache->object_list_lock); + } + } + + spin_unlock(&cache->object_list_lock); + _leave(" [%u objs]", count); +} + +/* + * Withdraw volumes. + */ +static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) +{ + _enter(""); + + for (;;) { + struct cachefiles_volume *volume = NULL; + + spin_lock(&cache->object_list_lock); + if (!list_empty(&cache->volumes)) { + volume = list_first_entry(&cache->volumes, + struct cachefiles_volume, cache_link); + list_del_init(&volume->cache_link); + } + spin_unlock(&cache->object_list_lock); + if (!volume) + break; + + cachefiles_withdraw_volume(volume); + } + + _leave(""); +} + +/* + * Sync a cache to backing disk. + */ +static void cachefiles_sync_cache(struct cachefiles_cache *cache) +{ + const struct cred *saved_cred; + int ret; + + _enter("%s", cache->cache->name); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + cachefiles_begin_secure(cache, &saved_cred); + down_read(&cache->mnt->mnt_sb->s_umount); + ret = sync_filesystem(cache->mnt->mnt_sb); + up_read(&cache->mnt->mnt_sb->s_umount); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) + cachefiles_io_error(cache, + "Attempt to sync backing fs superblock returned error %d", + ret); +} + +/* + * Withdraw cache objects. + */ +void cachefiles_withdraw_cache(struct cachefiles_cache *cache) +{ + struct fscache_cache *fscache = cache->cache; + + pr_info("File cache on %s unregistering\n", fscache->name); + + fscache_withdraw_cache(fscache); + + /* we now have to destroy all the active objects pertaining to this + * cache - which we do by passing them off to thread pool to be + * disposed of */ + cachefiles_withdraw_objects(cache); + fscache_wait_for_objects(fscache); + + cachefiles_withdraw_volumes(cache); + cachefiles_sync_cache(cache); + cache->cache = NULL; + fscache_relinquish_cache(fscache); +} diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 752c1e43416f..40a792421fc1 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Daemon interface * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ @@ -41,6 +41,8 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bind(struct cachefiles_cache *, char *); +static void cachefiles_daemon_unbind(struct cachefiles_cache *); static unsigned long cachefiles_open; @@ -78,7 +80,7 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { /* - * do various checks + * Prepare a cache for caching. */ static int cachefiles_daemon_open(struct inode *inode, struct file *file) { @@ -102,9 +104,10 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) } mutex_init(&cache->daemon_mutex); - cache->active_nodes = RB_ROOT; - rwlock_init(&cache->active_lock); init_waitqueue_head(&cache->daemon_pollwq); + INIT_LIST_HEAD(&cache->volumes); + INIT_LIST_HEAD(&cache->object_list); + spin_lock_init(&cache->object_list_lock); /* set default caching limits * - limit at 1% free space and/or free files @@ -124,7 +127,7 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) } /* - * release a cache + * Release a cache. */ static int cachefiles_daemon_release(struct inode *inode, struct file *file) { @@ -138,8 +141,6 @@ static int cachefiles_daemon_release(struct inode *inode, struct file *file) cachefiles_daemon_unbind(cache); - ASSERT(!cache->active_nodes.rb_node); - /* clean up the control file interface */ cache->cachefilesd = NULL; file->private_data = NULL; @@ -152,7 +153,7 @@ static int cachefiles_daemon_release(struct inode *inode, struct file *file) } /* - * read the cache state + * Read the cache state. */ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, size_t buflen, loff_t *pos) @@ -169,7 +170,7 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, return 0; /* check how much space the cache has */ - cachefiles_has_space(cache, 0, 0); + cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); /* summarise */ f_released = atomic_xchg(&cache->f_released, 0); @@ -206,7 +207,7 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, } /* - * command the cache + * Take a command from cachefilesd, parse it and act on it. */ static ssize_t cachefiles_daemon_write(struct file *file, const char __user *_data, @@ -225,7 +226,7 @@ static ssize_t cachefiles_daemon_write(struct file *file, if (test_bit(CACHEFILES_DEAD, &cache->flags)) return -EIO; - if (datalen < 0 || datalen > PAGE_SIZE - 1) + if (datalen > PAGE_SIZE - 1) return -EOPNOTSUPP; /* drag the command string into the kernel so we can parse it */ @@ -284,7 +285,7 @@ found_command: } /* - * poll for culling state + * Poll for culling state * - use EPOLLOUT to indicate culling state */ static __poll_t cachefiles_daemon_poll(struct file *file, @@ -306,7 +307,7 @@ static __poll_t cachefiles_daemon_poll(struct file *file, } /* - * give a range error for cache space constraints + * Give a range error for cache space constraints * - can be tail-called */ static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, @@ -318,7 +319,7 @@ static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, } /* - * set the percentage of files at which to stop culling + * Set the percentage of files at which to stop culling * - command: "frun <N>%" */ static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) @@ -342,7 +343,7 @@ static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of files at which to start culling + * Set the percentage of files at which to start culling * - command: "fcull <N>%" */ static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) @@ -366,7 +367,7 @@ static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of files at which to stop allocating + * Set the percentage of files at which to stop allocating * - command: "fstop <N>%" */ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) @@ -382,7 +383,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) if (args[0] != '%' || args[1] != '\0') return -EINVAL; - if (fstop < 0 || fstop >= cache->fcull_percent) + if (fstop >= cache->fcull_percent) return cachefiles_daemon_range_error(cache, args); cache->fstop_percent = fstop; @@ -390,7 +391,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of blocks at which to stop culling + * Set the percentage of blocks at which to stop culling * - command: "brun <N>%" */ static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) @@ -414,7 +415,7 @@ static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of blocks at which to start culling + * Set the percentage of blocks at which to start culling * - command: "bcull <N>%" */ static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) @@ -438,7 +439,7 @@ static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of blocks at which to stop allocating + * Set the percentage of blocks at which to stop allocating * - command: "bstop <N>%" */ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) @@ -454,7 +455,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) if (args[0] != '%' || args[1] != '\0') return -EINVAL; - if (bstop < 0 || bstop >= cache->bcull_percent) + if (bstop >= cache->bcull_percent) return cachefiles_daemon_range_error(cache, args); cache->bstop_percent = bstop; @@ -462,7 +463,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) } /* - * set the cache directory + * Set the cache directory * - command: "dir <name>" */ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) @@ -490,7 +491,7 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) } /* - * set the cache security context + * Set the cache security context * - command: "secctx <ctx>" */ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) @@ -518,7 +519,7 @@ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) } /* - * set the cache tag + * Set the cache tag * - command: "tag <name>" */ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) @@ -544,7 +545,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) } /* - * request a node in the cache be culled from the current working directory + * Request a node in the cache be culled from the current working directory * - command: "cull <name>" */ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) @@ -568,7 +569,6 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) return -EIO; } - /* extract the directory dentry from the cwd */ get_fs_pwd(current->fs, &path); if (!d_can_lookup(path.dentry)) @@ -593,7 +593,7 @@ inval: } /* - * set debugging mode + * Set debugging mode * - command: "debug <mask>" */ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) @@ -616,7 +616,7 @@ inval: } /* - * find out whether an object in the current working directory is in use or not + * Find out whether an object in the current working directory is in use or not * - command: "inuse <name>" */ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) @@ -640,7 +640,6 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) return -EIO; } - /* extract the directory dentry from the cwd */ get_fs_pwd(current->fs, &path); if (!d_can_lookup(path.dentry)) @@ -665,84 +664,65 @@ inval: } /* - * see if we have space for a number of pages and/or a number of files in the - * cache + * Bind a directory as a cache */ -int cachefiles_has_space(struct cachefiles_cache *cache, - unsigned fnr, unsigned bnr) +static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) { - struct kstatfs stats; - struct path path = { - .mnt = cache->mnt, - .dentry = cache->mnt->mnt_root, - }; - int ret; + _enter("{%u,%u,%u,%u,%u,%u},%s", + cache->frun_percent, + cache->fcull_percent, + cache->fstop_percent, + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + if (cache->fstop_percent >= cache->fcull_percent || + cache->fcull_percent >= cache->frun_percent || + cache->frun_percent >= 100) + return -ERANGE; + + if (cache->bstop_percent >= cache->bcull_percent || + cache->bcull_percent >= cache->brun_percent || + cache->brun_percent >= 100) + return -ERANGE; - //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", - // (unsigned long long) cache->frun, - // (unsigned long long) cache->fcull, - // (unsigned long long) cache->fstop, - // (unsigned long long) cache->brun, - // (unsigned long long) cache->bcull, - // (unsigned long long) cache->bstop, - // fnr, bnr); - - /* find out how many pages of blockdev are available */ - memset(&stats, 0, sizeof(stats)); - - ret = vfs_statfs(&path, &stats); - if (ret < 0) { - if (ret == -EIO) - cachefiles_io_error(cache, "statfs failed"); - _leave(" = %d", ret); - return ret; + if (*args) { + pr_err("'bind' command doesn't take an argument\n"); + return -EINVAL; } - stats.f_bavail >>= cache->bshift; - - //_debug("avail %llu,%llu", - // (unsigned long long) stats.f_ffree, - // (unsigned long long) stats.f_bavail); - - /* see if there is sufficient space */ - if (stats.f_ffree > fnr) - stats.f_ffree -= fnr; - else - stats.f_ffree = 0; - - if (stats.f_bavail > bnr) - stats.f_bavail -= bnr; - else - stats.f_bavail = 0; - - ret = -ENOBUFS; - if (stats.f_ffree < cache->fstop || - stats.f_bavail < cache->bstop) - goto begin_cull; - - ret = 0; - if (stats.f_ffree < cache->fcull || - stats.f_bavail < cache->bcull) - goto begin_cull; - - if (test_bit(CACHEFILES_CULLING, &cache->flags) && - stats.f_ffree >= cache->frun && - stats.f_bavail >= cache->brun && - test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) - ) { - _debug("cease culling"); - cachefiles_state_changed(cache); + if (!cache->rootdirname) { + pr_err("No cache directory specified\n"); + return -EINVAL; } - //_leave(" = 0"); - return 0; - -begin_cull: - if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { - _debug("### CULL CACHE ###"); - cachefiles_state_changed(cache); + /* Don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("Cache already bound\n"); + return -EBUSY; } - _leave(" = %d", ret); - return ret; + return cachefiles_add_cache(cache); +} + +/* + * Unbind a cache. + */ +static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) +{ + _enter(""); + + if (test_bit(CACHEFILES_READY, &cache->flags)) + cachefiles_withdraw_cache(cache); + + cachefiles_put_directory(cache->graveyard); + cachefiles_put_directory(cache->store); + mntput(cache->mnt); + + kfree(cache->rootdirname); + kfree(cache->secctx); + kfree(cache->tag); + + _leave(""); } diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c new file mode 100644 index 000000000000..58f8aec964e4 --- /dev/null +++ b/fs/cachefiles/error_inject.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Error injection handling. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sysctl.h> +#include "internal.h" + +unsigned int cachefiles_error_injection_state; + +static struct ctl_table_header *cachefiles_sysctl; +static struct ctl_table cachefiles_sysctls[] = { + { + .procname = "error_injection", + .data = &cachefiles_error_injection_state, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + {} +}; + +static struct ctl_table cachefiles_sysctls_root[] = { + { + .procname = "cachefiles", + .mode = 0555, + .child = cachefiles_sysctls, + }, + {} +}; + +int __init cachefiles_register_error_injection(void) +{ + cachefiles_sysctl = register_sysctl_table(cachefiles_sysctls_root); + if (!cachefiles_sysctl) + return -ENOMEM; + return 0; + +} + +void cachefiles_unregister_error_injection(void) +{ + unregister_sysctl_table(cachefiles_sysctl); +} diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index da28ac1fa225..51c968cd00a6 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -1,572 +1,445 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache interface to CacheFiles * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/mount.h> +#include <linux/xattr.h> +#include <linux/file.h> +#include <linux/falloc.h> +#include <trace/events/fscache.h> #include "internal.h" -struct cachefiles_lookup_data { - struct cachefiles_xattr *auxdata; /* auxiliary data */ - char *key; /* key path */ -}; - -static int cachefiles_attr_changed(struct fscache_object *_object); +static atomic_t cachefiles_object_debug_id; /* - * allocate an object record for a cookie lookup and prepare the lookup data + * Allocate a cache object record. */ -static struct fscache_object *cachefiles_alloc_object( - struct fscache_cache *_cache, - struct fscache_cookie *cookie) +static +struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie) { - struct cachefiles_lookup_data *lookup_data; + struct fscache_volume *vcookie = cookie->volume; + struct cachefiles_volume *volume = vcookie->cache_priv; struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct cachefiles_xattr *auxdata; - unsigned keylen, auxlen; - void *buffer, *p; - char *key; - cache = container_of(_cache, struct cachefiles_cache, cache); + _enter("{%s},%x,", vcookie->key, cookie->debug_id); - _enter("{%s},%x,", cache->cache.identifier, cookie->debug_id); - - lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp); - if (!lookup_data) - goto nomem_lookup_data; - - /* create a new object record and a temporary leaf image */ - object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp); + object = kmem_cache_zalloc(cachefiles_object_jar, GFP_KERNEL); if (!object) - goto nomem_object; - - ASSERTCMP(object->backer, ==, NULL); + return NULL; - BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); - atomic_set(&object->usage, 1); + refcount_set(&object->ref, 1); - fscache_object_init(&object->fscache, cookie, &cache->cache); + spin_lock_init(&object->lock); + INIT_LIST_HEAD(&object->cache_link); + object->volume = volume; + object->debug_id = atomic_inc_return(&cachefiles_object_debug_id); + object->cookie = fscache_get_cookie(cookie, fscache_cookie_get_attach_object); - object->type = cookie->def->type; - - /* get hold of the raw key - * - stick the length on the front and leave space on the back for the - * encoder - */ - buffer = kmalloc((2 + 512) + 3, cachefiles_gfp); - if (!buffer) - goto nomem_buffer; - - keylen = cookie->key_len; - if (keylen <= sizeof(cookie->inline_key)) - p = cookie->inline_key; - else - p = cookie->key; - memcpy(buffer + 2, p, keylen); - - *(uint16_t *)buffer = keylen; - ((char *)buffer)[keylen + 2] = 0; - ((char *)buffer)[keylen + 3] = 0; - ((char *)buffer)[keylen + 4] = 0; - - /* turn the raw key into something that can work with as a filename */ - key = cachefiles_cook_key(buffer, keylen + 2, object->type); - if (!key) - goto nomem_key; - - /* get hold of the auxiliary data and prepend the object type */ - auxdata = buffer; - auxlen = cookie->aux_len; - if (auxlen) { - if (auxlen <= sizeof(cookie->inline_aux)) - p = cookie->inline_aux; - else - p = cookie->aux; - memcpy(auxdata->data, p, auxlen); - } - - auxdata->len = auxlen + 1; - auxdata->type = cookie->type; - - lookup_data->auxdata = auxdata; - lookup_data->key = key; - object->lookup_data = lookup_data; - - _leave(" = %x [%p]", object->fscache.debug_id, lookup_data); - return &object->fscache; - -nomem_key: - kfree(buffer); -nomem_buffer: - BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); - kmem_cache_free(cachefiles_object_jar, object); - fscache_object_destroyed(&cache->cache); -nomem_object: - kfree(lookup_data); -nomem_lookup_data: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + fscache_count_object(vcookie->cache); + trace_cachefiles_ref(object->debug_id, cookie->debug_id, 1, + cachefiles_obj_new); + return object; } /* - * attempt to look up the nominated node in this cache - * - return -ETIMEDOUT to be scheduled again + * Note that an object has been seen. */ -static int cachefiles_lookup_object(struct fscache_object *_object) +void cachefiles_see_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) { - struct cachefiles_lookup_data *lookup_data; - struct cachefiles_object *parent, *object; - struct cachefiles_cache *cache; - const struct cred *saved_cred; - int ret; - - _enter("{OBJ%x}", _object->debug_id); - - cache = container_of(_object->cache, struct cachefiles_cache, cache); - parent = container_of(_object->parent, - struct cachefiles_object, fscache); - object = container_of(_object, struct cachefiles_object, fscache); - lookup_data = object->lookup_data; - - ASSERTCMP(lookup_data, !=, NULL); - - /* look up the key, creating any missing bits */ - cachefiles_begin_secure(cache, &saved_cred); - ret = cachefiles_walk_to_object(parent, object, - lookup_data->key, - lookup_data->auxdata); - cachefiles_end_secure(cache, saved_cred); - - /* polish off by setting the attributes of non-index files */ - if (ret == 0 && - object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) - cachefiles_attr_changed(&object->fscache); - - if (ret < 0 && ret != -ETIMEDOUT) { - if (ret != -ENOBUFS) - pr_warn("Lookup failed error %d\n", ret); - fscache_object_lookup_error(&object->fscache); - } - - _leave(" [%d]", ret); - return ret; + trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, + refcount_read(&object->ref), why); } /* - * indication of lookup completion + * Increment the usage count on an object; */ -static void cachefiles_lookup_complete(struct fscache_object *_object) +struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) { - struct cachefiles_object *object; - - object = container_of(_object, struct cachefiles_object, fscache); - - _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data); + int r; - if (object->lookup_data) { - kfree(object->lookup_data->key); - kfree(object->lookup_data->auxdata); - kfree(object->lookup_data); - object->lookup_data = NULL; - } + __refcount_inc(&object->ref, &r); + trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, r, why); + return object; } /* - * increment the usage count on an inode object (may fail if unmounting) + * dispose of a reference to an object */ -static -struct fscache_object *cachefiles_grab_object(struct fscache_object *_object, - enum fscache_obj_ref_trace why) +void cachefiles_put_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) { - struct cachefiles_object *object = - container_of(_object, struct cachefiles_object, fscache); - int u; + unsigned int object_debug_id = object->debug_id; + unsigned int cookie_debug_id = object->cookie->debug_id; + struct fscache_cache *cache; + bool done; + int r; + + done = __refcount_dec_and_test(&object->ref, &r); + trace_cachefiles_ref(object_debug_id, cookie_debug_id, r, why); + if (done) { + _debug("- kill object OBJ%x", object_debug_id); + + ASSERTCMP(object->file, ==, NULL); - _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); + kfree(object->d_name); -#ifdef CACHEFILES_DEBUG_SLAB - ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); -#endif + cache = object->volume->cache->cache; + fscache_put_cookie(object->cookie, fscache_cookie_put_object); + object->cookie = NULL; + kmem_cache_free(cachefiles_object_jar, object); + fscache_uncount_object(cache); + } - u = atomic_inc_return(&object->usage); - trace_cachefiles_ref(object, _object->cookie, - (enum cachefiles_obj_ref_trace)why, u); - return &object->fscache; + _leave(""); } /* - * update the auxiliary data for an object object on disk + * Adjust the size of a cache file if necessary to match the DIO size. We keep + * the EOF marker a multiple of DIO blocks so that we don't fall back to doing + * non-DIO for a partial block straddling the EOF, but we also have to be + * careful of someone expanding the file and accidentally accreting the + * padding. */ -static void cachefiles_update_object(struct fscache_object *_object) +static int cachefiles_adjust_size(struct cachefiles_object *object) { - struct cachefiles_object *object; - struct cachefiles_xattr *auxdata; - struct cachefiles_cache *cache; - struct fscache_cookie *cookie; - const struct cred *saved_cred; - const void *aux; - unsigned auxlen; + struct iattr newattrs; + struct file *file = object->file; + uint64_t ni_size; + loff_t oi_size; + int ret; - _enter("{OBJ%x}", _object->debug_id); + ni_size = object->cookie->object_size; + ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); - object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, struct cachefiles_cache, - cache); + _enter("{OBJ%x},[%llu]", + object->debug_id, (unsigned long long) ni_size); - if (!fscache_use_cookie(_object)) { - _leave(" [relinq]"); - return; - } + if (!file) + return -ENOBUFS; - cookie = object->fscache.cookie; - auxlen = cookie->aux_len; + oi_size = i_size_read(file_inode(file)); + if (oi_size == ni_size) + return 0; - if (!auxlen) { - fscache_unuse_cookie(_object); - _leave(" [no aux]"); - return; - } + inode_lock(file_inode(file)); - auxdata = kmalloc(2 + auxlen + 3, cachefiles_gfp); - if (!auxdata) { - fscache_unuse_cookie(_object); - _leave(" [nomem]"); - return; + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = notify_change(&init_user_ns, file->f_path.dentry, + &newattrs, NULL); + if (ret < 0) + goto truncate_failed; } - aux = (auxlen <= sizeof(cookie->inline_aux)) ? - cookie->inline_aux : cookie->aux; + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = notify_change(&init_user_ns, file->f_path.dentry, + &newattrs, NULL); - memcpy(auxdata->data, aux, auxlen); - fscache_unuse_cookie(_object); +truncate_failed: + inode_unlock(file_inode(file)); - auxdata->len = auxlen + 1; - auxdata->type = cookie->type; + if (ret < 0) + trace_cachefiles_io_error(NULL, file_inode(file), ret, + cachefiles_trace_notify_change_error); + if (ret == -EIO) { + cachefiles_io_error_obj(object, "Size set failed"); + ret = -ENOBUFS; + } - cachefiles_begin_secure(cache, &saved_cred); - cachefiles_update_object_xattr(object, auxdata); - cachefiles_end_secure(cache, saved_cred); - kfree(auxdata); - _leave(""); + _leave(" = %d", ret); + return ret; } /* - * discard the resources pinned by an object and effect retirement if - * requested + * Attempt to look up the nominated node in this cache */ -static void cachefiles_drop_object(struct fscache_object *_object) +static bool cachefiles_lookup_cookie(struct fscache_cookie *cookie) { struct cachefiles_object *object; - struct cachefiles_cache *cache; + struct cachefiles_cache *cache = cookie->volume->cache->cache_priv; const struct cred *saved_cred; - struct inode *inode; - blkcnt_t i_blocks = 0; + bool success; - ASSERT(_object); + object = cachefiles_alloc_object(cookie); + if (!object) + goto fail; - object = container_of(_object, struct cachefiles_object, fscache); + _enter("{OBJ%x}", object->debug_id); - _enter("{OBJ%x,%d}", - object->fscache.debug_id, atomic_read(&object->usage)); + if (!cachefiles_cook_key(object)) + goto fail_put; - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); + cookie->cache_priv = object; -#ifdef CACHEFILES_DEBUG_SLAB - ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); -#endif + cachefiles_begin_secure(cache, &saved_cred); - /* We need to tidy the object up if we did in fact manage to open it. - * It's possible for us to get here before the object is fully - * initialised if the parent goes away or the object gets retired - * before we set it up. - */ - if (object->dentry) { - /* delete retired objects */ - if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) && - _object != cache->cache.fsdef - ) { - _debug("- retire object OBJ%x", object->fscache.debug_id); - inode = d_backing_inode(object->dentry); - if (inode) - i_blocks = inode->i_blocks; - - cachefiles_begin_secure(cache, &saved_cred); - cachefiles_delete_object(cache, object); - cachefiles_end_secure(cache, saved_cred); - } + success = cachefiles_look_up_object(object); + if (!success) + goto fail_withdraw; - /* close the filesystem stuff attached to the object */ - if (object->backer != object->dentry) - dput(object->backer); - object->backer = NULL; - } + cachefiles_see_object(object, cachefiles_obj_see_lookup_cookie); + + spin_lock(&cache->object_list_lock); + list_add(&object->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + cachefiles_adjust_size(object); - /* note that the object is now inactive */ - if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) - cachefiles_mark_object_inactive(cache, object, i_blocks); + cachefiles_end_secure(cache, saved_cred); + _leave(" = t"); + return true; - dput(object->dentry); - object->dentry = NULL; +fail_withdraw: + cachefiles_end_secure(cache, saved_cred); + cachefiles_see_object(object, cachefiles_obj_see_lookup_failed); + fscache_caching_failed(cookie); + _debug("failed c=%08x o=%08x", cookie->debug_id, object->debug_id); + /* The caller holds an access count on the cookie, so we need them to + * drop it before we can withdraw the object. + */ + return false; - _leave(""); +fail_put: + cachefiles_put_object(object, cachefiles_obj_put_alloc_fail); +fail: + return false; } /* - * dispose of a reference to an object + * Shorten the backing object to discard any dirty data and free up + * any unused granules. */ -void cachefiles_put_object(struct fscache_object *_object, - enum fscache_obj_ref_trace why) +static bool cachefiles_shorten_object(struct cachefiles_object *object, + struct file *file, loff_t new_size) { - struct cachefiles_object *object; - struct fscache_cache *cache; - int u; - - ASSERT(_object); - - object = container_of(_object, struct cachefiles_object, fscache); - - _enter("{OBJ%x,%d}", - object->fscache.debug_id, atomic_read(&object->usage)); - -#ifdef CACHEFILES_DEBUG_SLAB - ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); -#endif - - ASSERTIFCMP(object->fscache.parent, - object->fscache.parent->n_children, >, 0); - - u = atomic_dec_return(&object->usage); - trace_cachefiles_ref(object, _object->cookie, - (enum cachefiles_obj_ref_trace)why, u); - ASSERTCMP(u, !=, -1); - if (u == 0) { - _debug("- kill object OBJ%x", object->fscache.debug_id); + struct cachefiles_cache *cache = object->volume->cache; + struct inode *inode = file_inode(file); + loff_t i_size, dio_size; + int ret; - ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); - ASSERTCMP(object->fscache.parent, ==, NULL); - ASSERTCMP(object->backer, ==, NULL); - ASSERTCMP(object->dentry, ==, NULL); - ASSERTCMP(object->fscache.n_ops, ==, 0); - ASSERTCMP(object->fscache.n_children, ==, 0); + dio_size = round_up(new_size, CACHEFILES_DIO_BLOCK_SIZE); + i_size = i_size_read(inode); + + trace_cachefiles_trunc(object, inode, i_size, dio_size, + cachefiles_trunc_shrink); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_truncate(&file->f_path, dio_size); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_trunc_error); + cachefiles_io_error_obj(object, "Trunc-to-size failed %d", ret); + cachefiles_remove_object_xattr(cache, object, file->f_path.dentry); + return false; + } - if (object->lookup_data) { - kfree(object->lookup_data->key); - kfree(object->lookup_data->auxdata); - kfree(object->lookup_data); - object->lookup_data = NULL; + if (new_size < dio_size) { + trace_cachefiles_trunc(object, inode, dio_size, new_size, + cachefiles_trunc_dio_adjust); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_fallocate(file, FALLOC_FL_ZERO_RANGE, + new_size, dio_size); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_fallocate_error); + cachefiles_io_error_obj(object, "Trunc-to-dio-size failed %d", ret); + cachefiles_remove_object_xattr(cache, object, file->f_path.dentry); + return false; } - - cache = object->fscache.cache; - fscache_object_destroy(&object->fscache); - kmem_cache_free(cachefiles_object_jar, object); - fscache_object_destroyed(cache); } - _leave(""); + return true; } /* - * sync a cache + * Resize the backing object. */ -static void cachefiles_sync_cache(struct fscache_cache *_cache) +static void cachefiles_resize_cookie(struct netfs_cache_resources *cres, + loff_t new_size) { - struct cachefiles_cache *cache; + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + struct fscache_cookie *cookie = object->cookie; const struct cred *saved_cred; - int ret; + struct file *file = cachefiles_cres_file(cres); + loff_t old_size = cookie->object_size; - _enter("%s", _cache->tag->name); + _enter("%llu->%llu", old_size, new_size); - cache = container_of(_cache, struct cachefiles_cache, cache); - - /* make sure all pages pinned by operations on behalf of the netfs are - * written to disc */ - cachefiles_begin_secure(cache, &saved_cred); - down_read(&cache->mnt->mnt_sb->s_umount); - ret = sync_filesystem(cache->mnt->mnt_sb); - up_read(&cache->mnt->mnt_sb->s_umount); - cachefiles_end_secure(cache, saved_cred); + if (new_size < old_size) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_shorten_object(object, file, new_size); + cachefiles_end_secure(cache, saved_cred); + object->cookie->object_size = new_size; + return; + } - if (ret == -EIO) - cachefiles_io_error(cache, - "Attempt to sync backing fs superblock" - " returned error %d", - ret); + /* The file is being expanded. We don't need to do anything + * particularly. cookie->initial_size doesn't change and so the point + * at which we have to download before doesn't change. + */ + cookie->object_size = new_size; } /* - * check if the backing cache is updated to FS-Cache - * - called by FS-Cache when evaluates if need to invalidate the cache + * Commit changes to the object as we drop it. */ -static int cachefiles_check_consistency(struct fscache_operation *op) +static void cachefiles_commit_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) { - struct cachefiles_object *object; - struct cachefiles_cache *cache; - const struct cred *saved_cred; - int ret; + bool update = false; - _enter("{OBJ%x}", op->object->debug_id); + if (test_and_clear_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags)) + update = true; + if (test_and_clear_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags)) + update = true; + if (update) + cachefiles_set_object_xattr(object); - object = container_of(op->object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); + if (test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) + cachefiles_commit_tmpfile(cache, object); +} - cachefiles_begin_secure(cache, &saved_cred); - ret = cachefiles_check_auxdata(object); - cachefiles_end_secure(cache, saved_cred); +/* + * Finalise and object and close the VFS structs that we have. + */ +static void cachefiles_clean_up_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) +{ + if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) { + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_see_object(object, cachefiles_obj_see_clean_delete); + _debug("- inval object OBJ%x", object->debug_id); + cachefiles_delete_object(object, FSCACHE_OBJECT_WAS_RETIRED); + } else { + cachefiles_see_object(object, cachefiles_obj_see_clean_drop_tmp); + _debug("- inval object OBJ%x tmpfile", object->debug_id); + } + } else { + cachefiles_see_object(object, cachefiles_obj_see_clean_commit); + cachefiles_commit_object(object, cache); + } - _leave(" = %d", ret); - return ret; + cachefiles_unmark_inode_in_use(object, object->file); + if (object->file) { + fput(object->file); + object->file = NULL; + } } /* - * notification the attributes on an object have changed - * - called with reads/writes excluded by FS-Cache + * Withdraw caching for a cookie. */ -static int cachefiles_attr_changed(struct fscache_object *_object) +static void cachefiles_withdraw_cookie(struct fscache_cookie *cookie) { - struct cachefiles_object *object; - struct cachefiles_cache *cache; + struct cachefiles_object *object = cookie->cache_priv; + struct cachefiles_cache *cache = object->volume->cache; const struct cred *saved_cred; - struct iattr newattrs; - uint64_t ni_size; - loff_t oi_size; - int ret; - - ni_size = _object->store_limit_l; - - _enter("{OBJ%x},[%llu]", - _object->debug_id, (unsigned long long) ni_size); - - object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - if (ni_size == object->i_size) - return 0; - - if (!object->backer) - return -ENOBUFS; - ASSERT(d_is_reg(object->backer)); + _enter("o=%x", object->debug_id); + cachefiles_see_object(object, cachefiles_obj_see_withdraw_cookie); - fscache_set_store_limit(&object->fscache, ni_size); - - oi_size = i_size_read(d_backing_inode(object->backer)); - if (oi_size == ni_size) - return 0; - - cachefiles_begin_secure(cache, &saved_cred); - inode_lock(d_inode(object->backer)); - - /* if there's an extension to a partial page at the end of the backing - * file, we need to discard the partial page so that we pick up new - * data after it */ - if (oi_size & ~PAGE_MASK && ni_size > oi_size) { - _debug("discard tail %llx", oi_size); - newattrs.ia_valid = ATTR_SIZE; - newattrs.ia_size = oi_size & PAGE_MASK; - ret = notify_change(&init_user_ns, object->backer, &newattrs, NULL); - if (ret < 0) - goto truncate_failed; + if (!list_empty(&object->cache_link)) { + spin_lock(&cache->object_list_lock); + cachefiles_see_object(object, cachefiles_obj_see_withdrawal); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); } - newattrs.ia_valid = ATTR_SIZE; - newattrs.ia_size = ni_size; - ret = notify_change(&init_user_ns, object->backer, &newattrs, NULL); - -truncate_failed: - inode_unlock(d_inode(object->backer)); - cachefiles_end_secure(cache, saved_cred); - - if (ret == -EIO) { - fscache_set_store_limit(&object->fscache, 0); - cachefiles_io_error_obj(object, "Size set failed"); - ret = -ENOBUFS; + if (object->file) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_clean_up_object(object, cache); + cachefiles_end_secure(cache, saved_cred); } - _leave(" = %d", ret); - return ret; + cookie->cache_priv = NULL; + cachefiles_put_object(object, cachefiles_obj_put_detach); } /* - * Invalidate an object + * Invalidate the storage associated with a cookie. */ -static void cachefiles_invalidate_object(struct fscache_operation *op) +static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie) { - struct cachefiles_object *object; - struct cachefiles_cache *cache; - const struct cred *saved_cred; - struct path path; - uint64_t ni_size; - int ret; + struct cachefiles_object *object = cookie->cache_priv; + struct file *new_file, *old_file; + bool old_tmpfile; - object = container_of(op->object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); + _enter("o=%x,[%llu]", object->debug_id, object->cookie->object_size); - ni_size = op->object->store_limit_l; + old_tmpfile = test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); - _enter("{OBJ%x},[%llu]", - op->object->debug_id, (unsigned long long)ni_size); + if (!object->file) { + fscache_resume_after_invalidation(cookie); + _leave(" = t [light]"); + return true; + } - if (object->backer) { - ASSERT(d_is_reg(object->backer)); + new_file = cachefiles_create_tmpfile(object); + if (IS_ERR(new_file)) + goto failed; - fscache_set_store_limit(&object->fscache, ni_size); + /* Substitute the VFS target */ + _debug("sub"); + spin_lock(&object->lock); - path.dentry = object->backer; - path.mnt = cache->mnt; + old_file = object->file; + object->file = new_file; + object->content_info = CACHEFILES_CONTENT_NO_DATA; + set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags); - cachefiles_begin_secure(cache, &saved_cred); - ret = vfs_truncate(&path, 0); - if (ret == 0) - ret = vfs_truncate(&path, ni_size); - cachefiles_end_secure(cache, saved_cred); + spin_unlock(&object->lock); + _debug("subbed"); + + /* Allow I/O to take place again */ + fscache_resume_after_invalidation(cookie); + + if (old_file) { + if (!old_tmpfile) { + struct cachefiles_volume *volume = object->volume; + struct dentry *fan = volume->fanout[(u8)cookie->key_hash]; - if (ret != 0) { - fscache_set_store_limit(&object->fscache, 0); - if (ret == -EIO) - cachefiles_io_error_obj(object, - "Invalidate failed"); + inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); + cachefiles_bury_object(volume->cache, object, fan, + old_file->f_path.dentry, + FSCACHE_OBJECT_INVALIDATED); } + fput(old_file); } - fscache_op_complete(op, true); - _leave(""); -} + _leave(" = t"); + return true; -/* - * dissociate a cache from all the pages it was backing - */ -static void cachefiles_dissociate_pages(struct fscache_cache *cache) -{ - _enter(""); +failed: + _leave(" = f"); + return false; } const struct fscache_cache_ops cachefiles_cache_ops = { .name = "cachefiles", - .alloc_object = cachefiles_alloc_object, - .lookup_object = cachefiles_lookup_object, - .lookup_complete = cachefiles_lookup_complete, - .grab_object = cachefiles_grab_object, - .update_object = cachefiles_update_object, - .invalidate_object = cachefiles_invalidate_object, - .drop_object = cachefiles_drop_object, - .put_object = cachefiles_put_object, - .sync_cache = cachefiles_sync_cache, - .attr_changed = cachefiles_attr_changed, - .read_or_alloc_page = cachefiles_read_or_alloc_page, - .read_or_alloc_pages = cachefiles_read_or_alloc_pages, - .allocate_page = cachefiles_allocate_page, - .allocate_pages = cachefiles_allocate_pages, - .write_page = cachefiles_write_page, - .uncache_page = cachefiles_uncache_page, - .dissociate_pages = cachefiles_dissociate_pages, - .check_consistency = cachefiles_check_consistency, - .begin_read_operation = cachefiles_begin_read_operation, + .acquire_volume = cachefiles_acquire_volume, + .free_volume = cachefiles_free_volume, + .lookup_cookie = cachefiles_lookup_cookie, + .withdraw_cookie = cachefiles_withdraw_cookie, + .invalidate_cookie = cachefiles_invalidate_cookie, + .begin_operation = cachefiles_begin_operation, + .resize_cookie = cachefiles_resize_cookie, + .prepare_to_write = cachefiles_prepare_to_write, }; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 0a511c36dab8..8dd54d9375b6 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* General netfs cache on cache files internal defs * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ @@ -13,58 +13,72 @@ #include <linux/fscache-cache.h> -#include <linux/timer.h> -#include <linux/wait_bit.h> #include <linux/cred.h> -#include <linux/workqueue.h> #include <linux/security.h> +#define CACHEFILES_DIO_BLOCK_SIZE 4096 + struct cachefiles_cache; struct cachefiles_object; -extern unsigned cachefiles_debug; -#define CACHEFILES_DEBUG_KENTER 1 -#define CACHEFILES_DEBUG_KLEAVE 2 -#define CACHEFILES_DEBUG_KDEBUG 4 +enum cachefiles_content { + /* These values are saved on disk */ + CACHEFILES_CONTENT_NO_DATA = 0, /* No content stored */ + CACHEFILES_CONTENT_SINGLE = 1, /* Content is monolithic, all is present */ + CACHEFILES_CONTENT_ALL = 2, /* Content is all present, no map */ + CACHEFILES_CONTENT_BACKFS_MAP = 3, /* Content is piecemeal, mapped through backing fs */ + CACHEFILES_CONTENT_DIRTY = 4, /* Content is dirty (only seen on disk) */ + nr__cachefiles_content +}; -#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC) +/* + * Cached volume representation. + */ +struct cachefiles_volume { + struct cachefiles_cache *cache; + struct list_head cache_link; /* Link in cache->volumes */ + struct fscache_volume *vcookie; /* The netfs's representation */ + struct dentry *dentry; /* The volume dentry */ + struct dentry *fanout[256]; /* Fanout subdirs */ +}; /* - * node records + * Backing file state. */ struct cachefiles_object { - struct fscache_object fscache; /* fscache handle */ - struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ - struct dentry *dentry; /* the file/dir representing this object */ - struct dentry *backer; /* backing file */ - loff_t i_size; /* object size */ + struct fscache_cookie *cookie; /* Netfs data storage object cookie */ + struct cachefiles_volume *volume; /* Cache volume that holds this object */ + struct list_head cache_link; /* Link in cache->*_list */ + struct file *file; /* The file representing this object */ + char *d_name; /* Backing file name */ + int debug_id; + spinlock_t lock; + refcount_t ref; + u8 d_name_len; /* Length of filename */ + enum cachefiles_content content_info:8; /* Info about content presence */ unsigned long flags; -#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ - atomic_t usage; /* object usage count */ - uint8_t type; /* object type */ - uint8_t new; /* T if object new */ - spinlock_t work_lock; - struct rb_node active_node; /* link in active tree (dentry is key) */ +#define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ }; -extern struct kmem_cache *cachefiles_object_jar; - /* * Cache files cache definition */ struct cachefiles_cache { - struct fscache_cache cache; /* FS-Cache record */ + struct fscache_cache *cache; /* Cache cookie */ struct vfsmount *mnt; /* mountpoint holding the cache */ + struct dentry *store; /* Directory into which live objects go */ struct dentry *graveyard; /* directory into which dead objects go */ struct file *cachefilesd; /* manager daemon handle */ + struct list_head volumes; /* List of volume objects */ + struct list_head object_list; /* List of active objects */ + spinlock_t object_list_lock; /* Lock for volumes and object_list */ const struct cred *cache_cred; /* security override for accessing cache */ struct mutex daemon_mutex; /* command serialisation mutex */ wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ - struct rb_root active_nodes; /* active nodes (can't be culled) */ - rwlock_t active_lock; /* lock for active_nodes */ atomic_t gravecounter; /* graveyard uniquifier */ atomic_t f_released; /* number of objects released lately */ atomic_long_t b_released; /* number of blocks released lately */ + atomic_long_t b_writing; /* Number of blocks being written */ unsigned frun_percent; /* when to stop culling (% files) */ unsigned fcull_percent; /* when to start culling (% files) */ unsigned fstop_percent; /* when to stop allocating (% files) */ @@ -89,38 +103,19 @@ struct cachefiles_cache { char *tag; /* cache binding tag */ }; -/* - * backing file read tracking - */ -struct cachefiles_one_read { - wait_queue_entry_t monitor; /* link into monitored waitqueue */ - struct page *back_page; /* backing file page we're waiting for */ - struct page *netfs_page; /* netfs page we're going to fill */ - struct fscache_retrieval *op; /* retrieval op covering this */ - struct list_head op_link; /* link in op's todo list */ -}; - -/* - * backing file write tracking - */ -struct cachefiles_one_write { - struct page *netfs_page; /* netfs page to copy */ - struct cachefiles_object *object; - struct list_head obj_link; /* link in object's lists */ - fscache_rw_complete_t end_io_func; - void *context; -}; +#include <trace/events/cachefiles.h> -/* - * auxiliary data xattr buffer - */ -struct cachefiles_xattr { - uint16_t len; - uint8_t type; - uint8_t data[]; -}; +static inline +struct file *cachefiles_cres_file(struct netfs_cache_resources *cres) +{ + return cres->cache_priv2; +} -#include <trace/events/cachefiles.h> +static inline +struct cachefiles_object *cachefiles_cres_object(struct netfs_cache_resources *cres) +{ + return fscache_cres_cookie(cres)->cache_priv; +} /* * note change of state for daemon @@ -132,74 +127,118 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache) } /* - * bind.c + * cache.c */ -extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); -extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); +extern int cachefiles_add_cache(struct cachefiles_cache *cache); +extern void cachefiles_withdraw_cache(struct cachefiles_cache *cache); + +enum cachefiles_has_space_for { + cachefiles_has_space_check, + cachefiles_has_space_for_write, + cachefiles_has_space_for_create, +}; +extern int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr, + enum cachefiles_has_space_for reason); /* * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; -extern int cachefiles_has_space(struct cachefiles_cache *cache, - unsigned fnr, unsigned bnr); +/* + * error_inject.c + */ +#ifdef CONFIG_CACHEFILES_ERROR_INJECTION +extern unsigned int cachefiles_error_injection_state; +extern int cachefiles_register_error_injection(void); +extern void cachefiles_unregister_error_injection(void); + +#else +#define cachefiles_error_injection_state 0 + +static inline int cachefiles_register_error_injection(void) +{ + return 0; +} + +static inline void cachefiles_unregister_error_injection(void) +{ +} +#endif + + +static inline int cachefiles_inject_read_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : 0; +} + +static inline int cachefiles_inject_write_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : + cachefiles_error_injection_state & 1 ? -ENOSPC : + 0; +} + +static inline int cachefiles_inject_remove_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : 0; +} /* * interface.c */ extern const struct fscache_cache_ops cachefiles_cache_ops; +extern void cachefiles_see_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); +extern struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); +extern void cachefiles_put_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); -void cachefiles_put_object(struct fscache_object *_object, - enum fscache_obj_ref_trace why); +/* + * io.c + */ +extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres, + enum fscache_want_state want_state); /* * key.c */ -extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); +extern bool cachefiles_cook_key(struct cachefiles_object *object); + +/* + * main.c + */ +extern struct kmem_cache *cachefiles_object_jar; /* * namei.c */ -extern void cachefiles_mark_object_inactive(struct cachefiles_cache *cache, - struct cachefiles_object *object, - blkcnt_t i_blocks); -extern int cachefiles_delete_object(struct cachefiles_cache *cache, - struct cachefiles_object *object); -extern int cachefiles_walk_to_object(struct cachefiles_object *parent, - struct cachefiles_object *object, - const char *key, - struct cachefiles_xattr *auxdata); +extern void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct file *file); +extern int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, + struct dentry *rep, + enum fscache_why_object_killed why); +extern int cachefiles_delete_object(struct cachefiles_object *object, + enum fscache_why_object_killed why); +extern bool cachefiles_look_up_object(struct cachefiles_object *object); extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, struct dentry *dir, - const char *name); + const char *name, + bool *_is_new); +extern void cachefiles_put_directory(struct dentry *dir); extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, char *filename); extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); - -/* - * rdwr.c - */ -extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, - struct page *, gfp_t); -extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, - struct list_head *, unsigned *, - gfp_t); -extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, - gfp_t); -extern int cachefiles_allocate_pages(struct fscache_retrieval *, - struct list_head *, unsigned *, gfp_t); -extern int cachefiles_write_page(struct fscache_storage *, struct page *); -extern void cachefiles_uncache_page(struct fscache_object *, struct page *); - -/* - * rdwr2.c - */ -extern int cachefiles_begin_read_operation(struct netfs_read_request *, - struct fscache_retrieval *); +extern struct file *cachefiles_create_tmpfile(struct cachefiles_object *object); +extern bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, + struct cachefiles_object *object); /* * security.c @@ -222,28 +261,32 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache, } /* + * volume.c + */ +void cachefiles_acquire_volume(struct fscache_volume *volume); +void cachefiles_free_volume(struct fscache_volume *volume); +void cachefiles_withdraw_volume(struct cachefiles_volume *volume); + +/* * xattr.c */ -extern int cachefiles_check_object_type(struct cachefiles_object *object); -extern int cachefiles_set_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata); -extern int cachefiles_update_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata); -extern int cachefiles_check_auxdata(struct cachefiles_object *object); -extern int cachefiles_check_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata); +extern int cachefiles_set_object_xattr(struct cachefiles_object *object); +extern int cachefiles_check_auxdata(struct cachefiles_object *object, + struct file *file); extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct cachefiles_object *object, struct dentry *dentry); - +extern void cachefiles_prepare_to_write(struct fscache_cookie *cookie); +extern bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume); +extern int cachefiles_check_volume_xattr(struct cachefiles_volume *volume); /* - * error handling + * Error handling */ - #define cachefiles_io_error(___cache, FMT, ...) \ do { \ pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ - fscache_io_error(&(___cache)->cache); \ + fscache_io_error((___cache)->cache); \ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ } while (0) @@ -251,15 +294,20 @@ do { \ do { \ struct cachefiles_cache *___cache; \ \ - ___cache = container_of((object)->fscache.cache, \ - struct cachefiles_cache, cache); \ - cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \ + ___cache = (object)->volume->cache; \ + cachefiles_io_error(___cache, FMT " [o=%08x]", ##__VA_ARGS__, \ + (object)->debug_id); \ } while (0) /* - * debug tracing + * Debug tracing */ +extern unsigned cachefiles_debug; +#define CACHEFILES_DEBUG_KENTER 1 +#define CACHEFILES_DEBUG_KLEAVE 2 +#define CACHEFILES_DEBUG_KDEBUG 4 + #define dbgprintk(FMT, ...) \ printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index effe37ef8629..60b1eac2ce78 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -9,8 +9,9 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/uio.h> +#include <linux/falloc.h> #include <linux/sched/mm.h> -#include <linux/netfs.h> +#include <trace/events/fscache.h> #include "internal.h" struct cachefiles_kiocb { @@ -21,14 +22,18 @@ struct cachefiles_kiocb { size_t skipped; size_t len; }; + struct cachefiles_object *object; netfs_io_terminated_t term_func; void *term_func_priv; bool was_async; + unsigned int inval_counter; /* Copy of cookie->inval_counter */ + u64 b_writing; }; static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) { if (refcount_dec_and_test(&ki->ki_refcnt)) { + cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq); fput(ki->iocb.ki_filp); kfree(ki); } @@ -40,12 +45,22 @@ static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) static void cachefiles_read_complete(struct kiocb *iocb, long ret) { struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct inode *inode = file_inode(ki->iocb.ki_filp); _enter("%ld", ret); + if (ret < 0) + trace_cachefiles_io_error(ki->object, inode, ret, + cachefiles_trace_read_error); + if (ki->term_func) { - if (ret >= 0) - ret += ki->skipped; + if (ret >= 0) { + if (ki->object->cookie->inval_counter == ki->inval_counter) + ki->skipped += ret; + else + ret = -ESTALE; + } + ki->term_func(ki->term_func_priv, ret, ki->was_async); } @@ -58,16 +73,24 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret) static int cachefiles_read(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, - bool seek_data, + enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv) { + struct cachefiles_object *object; struct cachefiles_kiocb *ki; - struct file *file = cres->cache_priv2; + struct file *file; unsigned int old_nofs; ssize_t ret = -ENOBUFS; size_t len = iov_iter_count(iter), skipped = 0; + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + goto presubmission_error; + + fscache_count_read(); + object = cachefiles_cres_object(cres); + file = cachefiles_cres_file(cres); + _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, i_size_read(file_inode(file))); @@ -75,10 +98,12 @@ static int cachefiles_read(struct netfs_cache_resources *cres, /* If the caller asked us to seek for data before doing the read, then * we should do that now. If we find a gap, we fill it with zeros. */ - if (seek_data) { + if (read_hole != NETFS_READ_HOLE_IGNORE) { loff_t off = start_pos, off2; - off2 = vfs_llseek(file, off, SEEK_DATA); + off2 = cachefiles_inject_read_error(); + if (off2 == 0) + off2 = vfs_llseek(file, off, SEEK_DATA); if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { skipped = 0; ret = off2; @@ -90,6 +115,10 @@ static int cachefiles_read(struct netfs_cache_resources *cres, * in the region, so clear the rest of the buffer and * return success. */ + ret = -ENODATA; + if (read_hole == NETFS_READ_HOLE_FAIL) + goto presubmission_error; + iov_iter_zero(len, iter); skipped = len; ret = 0; @@ -100,7 +129,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres, iov_iter_zero(skipped, iter); } - ret = -ENOBUFS; + ret = -ENOMEM; ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) goto presubmission_error; @@ -112,6 +141,8 @@ static int cachefiles_read(struct netfs_cache_resources *cres, ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); ki->iocb.ki_ioprio = get_current_ioprio(); ki->skipped = skipped; + ki->object = object; + ki->inval_counter = cres->inval_counter; ki->term_func = term_func; ki->term_func_priv = term_func_priv; ki->was_async = true; @@ -120,9 +151,13 @@ static int cachefiles_read(struct netfs_cache_resources *cres, ki->iocb.ki_complete = cachefiles_read_complete; get_file(ki->iocb.ki_filp); + cachefiles_grab_object(object, cachefiles_obj_get_ioreq); + trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); old_nofs = memalloc_nofs_save(); - ret = vfs_iocb_iter_read(file, &ki->iocb, iter); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_iocb_iter_read(file, &ki->iocb, iter); memalloc_nofs_restore(old_nofs); switch (ret) { case -EIOCBQUEUED: @@ -162,6 +197,7 @@ presubmission_error: static void cachefiles_write_complete(struct kiocb *iocb, long ret) { struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct cachefiles_object *object = ki->object; struct inode *inode = file_inode(ki->iocb.ki_filp); _enter("%ld", ret); @@ -170,9 +206,14 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); + if (ret < 0) + trace_cachefiles_io_error(object, inode, ret, + cachefiles_trace_write_error); + + atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); + set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); if (ki->term_func) ki->term_func(ki->term_func_priv, ret, ki->was_async); - cachefiles_put_kiocb(ki); } @@ -185,17 +226,27 @@ static int cachefiles_write(struct netfs_cache_resources *cres, netfs_io_terminated_t term_func, void *term_func_priv) { + struct cachefiles_object *object; + struct cachefiles_cache *cache; struct cachefiles_kiocb *ki; struct inode *inode; - struct file *file = cres->cache_priv2; + struct file *file; unsigned int old_nofs; ssize_t ret = -ENOBUFS; size_t len = iov_iter_count(iter); + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) + goto presubmission_error; + fscache_count_write(); + object = cachefiles_cres_object(cres); + cache = object->volume->cache; + file = cachefiles_cres_file(cres); + _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, i_size_read(file_inode(file))); + ret = -ENOMEM; ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) goto presubmission_error; @@ -206,14 +257,18 @@ static int cachefiles_write(struct netfs_cache_resources *cres, ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); ki->iocb.ki_ioprio = get_current_ioprio(); + ki->object = object; + ki->inval_counter = cres->inval_counter; ki->start = start_pos; ki->len = len; ki->term_func = term_func; ki->term_func_priv = term_func_priv; ki->was_async = true; + ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift; if (ki->term_func) ki->iocb.ki_complete = cachefiles_write_complete; + atomic_long_add(ki->b_writing, &cache->b_writing); /* Open-code file_start_write here to grab freeze protection, which * will be released by another thread in aio_complete_rw(). Fool @@ -225,9 +280,13 @@ static int cachefiles_write(struct netfs_cache_resources *cres, __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); get_file(ki->iocb.ki_filp); + cachefiles_grab_object(object, cachefiles_obj_get_ioreq); + trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len); old_nofs = memalloc_nofs_save(); - ret = vfs_iocb_iter_write(file, &ki->iocb, iter); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_iocb_iter_write(file, &ki->iocb, iter); memalloc_nofs_restore(old_nofs); switch (ret) { case -EIOCBQUEUED: @@ -257,8 +316,8 @@ in_progress: presubmission_error: if (term_func) - term_func(term_func_priv, -ENOMEM, false); - return -ENOMEM; + term_func(term_func_priv, ret, false); + return ret; } /* @@ -268,47 +327,82 @@ presubmission_error: static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq, loff_t i_size) { - struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv; + enum cachefiles_prepare_read_trace why; + struct netfs_read_request *rreq = subreq->rreq; + struct netfs_cache_resources *cres = &rreq->cache_resources; struct cachefiles_object *object; struct cachefiles_cache *cache; + struct fscache_cookie *cookie = fscache_cres_cookie(cres); const struct cred *saved_cred; - struct file *file = subreq->rreq->cache_resources.cache_priv2; + struct file *file = cachefiles_cres_file(cres); + enum netfs_read_source ret = NETFS_DOWNLOAD_FROM_SERVER; loff_t off, to; + ino_t ino = file ? file_inode(file)->i_ino : 0; _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); + if (subreq->start >= i_size) { + ret = NETFS_FILL_WITH_ZEROES; + why = cachefiles_trace_read_after_eof; + goto out_no_object; + } - if (!file) - goto cache_fail_nosec; + if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { + __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); + why = cachefiles_trace_read_no_data; + goto out_no_object; + } - if (subreq->start >= i_size) - return NETFS_FILL_WITH_ZEROES; + /* The object and the file may be being created in the background. */ + if (!file) { + why = cachefiles_trace_read_no_file; + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + goto out_no_object; + file = cachefiles_cres_file(cres); + if (!file) + goto out_no_object; + ino = file_inode(file)->i_ino; + } + object = cachefiles_cres_object(cres); + cache = object->volume->cache; cachefiles_begin_secure(cache, &saved_cred); - off = vfs_llseek(file, subreq->start, SEEK_DATA); + off = cachefiles_inject_read_error(); + if (off == 0) + off = vfs_llseek(file, subreq->start, SEEK_DATA); if (off < 0 && off >= (loff_t)-MAX_ERRNO) { - if (off == (loff_t)-ENXIO) + if (off == (loff_t)-ENXIO) { + why = cachefiles_trace_read_seek_nxio; goto download_and_store; - goto cache_fail; + } + trace_cachefiles_io_error(object, file_inode(file), off, + cachefiles_trace_seek_error); + why = cachefiles_trace_read_seek_error; + goto out; } - if (off >= subreq->start + subreq->len) + if (off >= subreq->start + subreq->len) { + why = cachefiles_trace_read_found_hole; goto download_and_store; + } if (off > subreq->start) { off = round_up(off, cache->bsize); subreq->len = off - subreq->start; + why = cachefiles_trace_read_found_part; goto download_and_store; } - to = vfs_llseek(file, subreq->start, SEEK_HOLE); - if (to < 0 && to >= (loff_t)-MAX_ERRNO) - goto cache_fail; + to = cachefiles_inject_read_error(); + if (to == 0) + to = vfs_llseek(file, subreq->start, SEEK_HOLE); + if (to < 0 && to >= (loff_t)-MAX_ERRNO) { + trace_cachefiles_io_error(object, file_inode(file), to, + cachefiles_trace_seek_error); + why = cachefiles_trace_read_seek_error; + goto out; + } if (to < subreq->start + subreq->len) { if (subreq->start + subreq->len >= i_size) @@ -318,32 +412,119 @@ static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subreque subreq->len = to - subreq->start; } - cachefiles_end_secure(cache, saved_cred); - return NETFS_READ_FROM_CACHE; + why = cachefiles_trace_read_have_data; + ret = NETFS_READ_FROM_CACHE; + goto out; download_and_store: - if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0) - __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); -cache_fail: + __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); +out: cachefiles_end_secure(cache, saved_cred); -cache_fail_nosec: - return NETFS_DOWNLOAD_FROM_SERVER; +out_no_object: + trace_cachefiles_prep_read(subreq, ret, why, ino); + return ret; } /* * Prepare for a write to occur. */ -static int cachefiles_prepare_write(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size) +static int __cachefiles_prepare_write(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size, + bool no_space_allocated_yet) { - loff_t start = *_start; + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + struct file *file = cachefiles_cres_file(cres); + loff_t start = *_start, pos; size_t len = *_len, down; + int ret; /* Round to DIO size */ down = start - round_down(start, PAGE_SIZE); *_start = start - down; *_len = round_up(down + len, PAGE_SIZE); - return 0; + + /* We need to work out whether there's sufficient disk space to perform + * the write - but we can skip that check if we have space already + * allocated. + */ + if (no_space_allocated_yet) + goto check_space; + + pos = cachefiles_inject_read_error(); + if (pos == 0) + pos = vfs_llseek(file, *_start, SEEK_DATA); + if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { + if (pos == -ENXIO) + goto check_space; /* Unallocated tail */ + trace_cachefiles_io_error(object, file_inode(file), pos, + cachefiles_trace_seek_error); + return pos; + } + if ((u64)pos >= (u64)*_start + *_len) + goto check_space; /* Unallocated region */ + + /* We have a block that's at least partially filled - if we're low on + * space, we need to see if it's fully allocated. If it's not, we may + * want to cull it. + */ + if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, + cachefiles_has_space_check) == 0) + return 0; /* Enough space to simply overwrite the whole block */ + + pos = cachefiles_inject_read_error(); + if (pos == 0) + pos = vfs_llseek(file, *_start, SEEK_HOLE); + if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { + trace_cachefiles_io_error(object, file_inode(file), pos, + cachefiles_trace_seek_error); + return pos; + } + if ((u64)pos >= (u64)*_start + *_len) + return 0; /* Fully allocated */ + + /* Partially allocated, but insufficient space: cull. */ + fscache_count_no_write_space(); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + *_start, *_len); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_fallocate_error); + cachefiles_io_error_obj(object, + "CacheFiles: fallocate failed (%d)\n", ret); + ret = -EIO; + } + + return ret; + +check_space: + return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, + cachefiles_has_space_for_write); +} + +static int cachefiles_prepare_write(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size, + bool no_space_allocated_yet) +{ + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + const struct cred *saved_cred; + int ret; + + if (!cachefiles_cres_file(cres)) { + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) + return -ENOBUFS; + if (!cachefiles_cres_file(cres)) + return -ENOBUFS; + } + + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(cres, _start, _len, i_size, + no_space_allocated_yet); + cachefiles_end_secure(cache, saved_cred); + return ret; } /* @@ -351,19 +532,11 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres, */ static void cachefiles_end_operation(struct netfs_cache_resources *cres) { - struct fscache_retrieval *op = cres->cache_priv; - struct file *file = cres->cache_priv2; - - _enter(""); + struct file *file = cachefiles_cres_file(cres); if (file) fput(file); - if (op) { - fscache_op_complete(&op->op, false); - fscache_put_retrieval(op); - } - - _leave(""); + fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end); } static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { @@ -377,44 +550,25 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { /* * Open the cache file when beginning a cache operation. */ -int cachefiles_begin_read_operation(struct netfs_read_request *rreq, - struct fscache_retrieval *op) +bool cachefiles_begin_operation(struct netfs_cache_resources *cres, + enum fscache_want_state want_state) { - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct path path; - struct file *file; + struct cachefiles_object *object = cachefiles_cres_object(cres); + + if (!cachefiles_cres_file(cres)) { + cres->ops = &cachefiles_netfs_cache_ops; + if (object->file) { + spin_lock(&object->lock); + if (!cres->cache_priv2 && object->file) + cres->cache_priv2 = get_file(object->file); + spin_unlock(&object->lock); + } + } - _enter(""); - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - path.mnt = cache->mnt; - path.dentry = object->backer; - file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, - d_inode(object->backer), cache->cache_cred); - if (IS_ERR(file)) - return PTR_ERR(file); - if (!S_ISREG(file_inode(file)->i_mode)) - goto error_file; - if (unlikely(!file->f_op->read_iter) || - unlikely(!file->f_op->write_iter)) { - pr_notice("Cache does not support read_iter and write_iter\n"); - goto error_file; + if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) { + pr_err("failed to get cres->file\n"); + return false; } - fscache_get_retrieval(op); - rreq->cache_resources.cache_priv = op; - rreq->cache_resources.cache_priv2 = file; - rreq->cache_resources.ops = &cachefiles_netfs_cache_ops; - rreq->cache_resources.debug_id = object->fscache.debug_id; - _leave(""); - return 0; - -error_file: - fput(file); - return -EIO; + return true; } diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index 7f94efc97e23..bf935e25bdbe 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Key to pathname encoder * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ @@ -22,134 +22,117 @@ static const char cachefiles_filecharmap[256] = { [48 ... 127] = 1, /* '0' -> '~' */ }; +static inline unsigned int how_many_hex_digits(unsigned int x) +{ + return x ? round_up(ilog2(x) + 1, 4) / 4 : 0; +} + /* * turn the raw key into something cooked - * - the raw key should include the length in the two bytes at the front - * - the key may be up to 514 bytes in length (including the length word) + * - the key may be up to NAME_MAX in length (including the length word) * - "base64" encode the strange keys, mapping 3 bytes of raw to four of * cooked * - need to cut the cooked key into 252 char lengths (189 raw bytes) */ -char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) +bool cachefiles_cook_key(struct cachefiles_object *object) { - unsigned char csum, ch; - unsigned int acc; - char *key; - int loop, len, max, seg, mark, print; + const u8 *key = fscache_get_key(object->cookie), *kend; + unsigned char ch; + unsigned int acc, i, n, nle, nbe, keylen = object->cookie->key_len; + unsigned int b64len, len, print, pad; + char *name, sep; - _enter(",%d", keylen); + _enter(",%u,%*phN", keylen, keylen, key); - BUG_ON(keylen < 2 || keylen > 514); + BUG_ON(keylen > NAME_MAX - 3); - csum = raw[0] + raw[1]; print = 1; - for (loop = 2; loop < keylen; loop++) { - ch = raw[loop]; - csum += ch; + for (i = 0; i < keylen; i++) { + ch = key[i]; print &= cachefiles_filecharmap[ch]; } + /* If the path is usable ASCII, then we render it directly */ if (print) { - /* if the path is usable ASCII, then we render it directly */ - max = keylen - 2; - max += 2; /* two base64'd length chars on the front */ - max += 5; /* @checksum/M */ - max += 3 * 2; /* maximum number of segment dividers (".../M") - * is ((514 + 251) / 252) = 3 - */ - max += 1; /* NUL on end */ - } else { - /* calculate the maximum length of the cooked key */ - keylen = (keylen + 2) / 3; - - max = keylen * 4; - max += 5; /* @checksum/M */ - max += 3 * 2; /* maximum number of segment dividers (".../M") - * is ((514 + 188) / 189) = 3 - */ - max += 1; /* NUL on end */ + len = 1 + keylen; + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return false; + + name[0] = 'D'; /* Data object type, string encoding */ + memcpy(name + 1, key, keylen); + goto success; } - max += 1; /* 2nd NUL on end */ - - _debug("max: %d", max); - - key = kmalloc(max, cachefiles_gfp); - if (!key) - return NULL; - - len = 0; - - /* build the cooked key */ - sprintf(key, "@%02x%c+", (unsigned) csum, 0); - len = 5; - mark = len - 1; - - if (print) { - acc = *(uint16_t *) raw; - raw += 2; - - key[len + 1] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len] = cachefiles_charmap[acc & 63]; - len += 2; - - seg = 250; - for (loop = keylen; loop > 0; loop--) { - if (seg <= 0) { - key[len++] = '\0'; - mark = len; - key[len++] = '+'; - seg = 252; - } - - key[len++] = *raw++; - ASSERT(len < max); - } - - switch (type) { - case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; - case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; - default: type = 'S'; break; - } - } else { - seg = 252; - for (loop = keylen; loop > 0; loop--) { - if (seg <= 0) { - key[len++] = '\0'; - mark = len; - key[len++] = '+'; - seg = 252; - } - - acc = *raw++; - acc |= *raw++ << 8; - acc |= *raw++ << 16; - - _debug("acc: %06x", acc); - - key[len++] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len++] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len++] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len++] = cachefiles_charmap[acc & 63]; - - ASSERT(len < max); - } + /* See if it makes sense to encode it as "hex,hex,hex" for each 32-bit + * chunk. We rely on the key having been padded out to a whole number + * of 32-bit words. + */ + n = round_up(keylen, 4); + nbe = nle = 0; + for (i = 0; i < n; i += 4) { + u32 be = be32_to_cpu(*(__be32 *)(key + i)); + u32 le = le32_to_cpu(*(__le32 *)(key + i)); + + nbe += 1 + how_many_hex_digits(be); + nle += 1 + how_many_hex_digits(le); + } - switch (type) { - case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; - case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; - default: type = 'T'; break; + b64len = DIV_ROUND_UP(keylen, 3); + pad = b64len * 3 - keylen; + b64len = 2 + b64len * 4; /* Length if we base64-encode it */ + _debug("len=%u nbe=%u nle=%u b64=%u", keylen, nbe, nle, b64len); + if (nbe < b64len || nle < b64len) { + unsigned int nlen = min(nbe, nle) + 1; + name = kmalloc(nlen, GFP_KERNEL); + if (!name) + return false; + sep = (nbe <= nle) ? 'S' : 'T'; /* Encoding indicator */ + len = 0; + for (i = 0; i < n; i += 4) { + u32 x; + if (nbe <= nle) + x = be32_to_cpu(*(__be32 *)(key + i)); + else + x = le32_to_cpu(*(__le32 *)(key + i)); + name[len++] = sep; + if (x != 0) + len += snprintf(name + len, nlen - len, "%x", x); + sep = ','; } + goto success; } - key[mark] = type; - key[len++] = 0; - key[len] = 0; + /* We need to base64-encode it */ + name = kmalloc(b64len + 1, GFP_KERNEL); + if (!name) + return false; + + name[0] = 'E'; + name[1] = '0' + pad; + len = 2; + kend = key + keylen; + do { + acc = *key++; + if (key < kend) { + acc |= *key++ << 8; + if (key < kend) + acc |= *key++ << 16; + } - _leave(" = %s %d", key, len); - return key; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + } while (key < kend); + +success: + name[len] = 0; + object->d_name = name; + object->d_name_len = len; + _leave(" = %s", object->d_name); + return true; } diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index 9c8d34c49b12..3f369c6f816d 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -2,7 +2,7 @@ /* Network filesystem caching backend to use cache files on a premounted * filesystem * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ @@ -18,6 +18,8 @@ #include <linux/statfs.h> #include <linux/sysctl.h> #include <linux/miscdevice.h> +#include <linux/netfs.h> +#include <trace/events/netfs.h> #define CREATE_TRACE_POINTS #include "internal.h" @@ -37,14 +39,6 @@ static struct miscdevice cachefiles_dev = { .fops = &cachefiles_daemon_fops, }; -static void cachefiles_object_init_once(void *_object) -{ - struct cachefiles_object *object = _object; - - memset(object, 0, sizeof(*object)); - spin_lock_init(&object->work_lock); -} - /* * initialise the fs caching module */ @@ -52,6 +46,9 @@ static int __init cachefiles_init(void) { int ret; + ret = cachefiles_register_error_injection(); + if (ret < 0) + goto error_einj; ret = misc_register(&cachefiles_dev); if (ret < 0) goto error_dev; @@ -61,9 +58,7 @@ static int __init cachefiles_init(void) cachefiles_object_jar = kmem_cache_create("cachefiles_object_jar", sizeof(struct cachefiles_object), - 0, - SLAB_HWCACHE_ALIGN, - cachefiles_object_init_once); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!cachefiles_object_jar) { pr_notice("Failed to allocate an object jar\n"); goto error_object_jar; @@ -75,6 +70,8 @@ static int __init cachefiles_init(void) error_object_jar: misc_deregister(&cachefiles_dev); error_dev: + cachefiles_unregister_error_injection(); +error_einj: pr_err("failed to register: %d\n", ret); return ret; } @@ -90,6 +87,7 @@ static void __exit cachefiles_exit(void) kmem_cache_destroy(cachefiles_object_jar); misc_deregister(&cachefiles_dev); + cachefiles_unregister_error_injection(); } module_exit(cachefiles_exit); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index a9aca5ab5970..9bd692870617 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -1,295 +1,268 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles path walking and related routines * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/file.h> #include <linux/fs.h> -#include <linux/fsnotify.h> -#include <linux/quotaops.h> -#include <linux/xattr.h> -#include <linux/mount.h> #include <linux/namei.h> -#include <linux/security.h> -#include <linux/slab.h> #include "internal.h" -#define CACHEFILES_KEYBUF_SIZE 512 - /* - * dump debugging info about an object + * Mark the backing file as being a cache file if it's not already in use. The + * mark tells the culling request command that it's not allowed to cull the + * file or directory. The caller must hold the inode lock. */ -static noinline -void __cachefiles_printk_object(struct cachefiles_object *object, - const char *prefix) +static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object, + struct dentry *dentry) { - struct fscache_cookie *cookie; - const u8 *k; - unsigned loop; - - pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id); - pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", - prefix, object->fscache.state->name, - object->fscache.flags, work_busy(&object->fscache.work), - object->fscache.events, object->fscache.event_mask); - pr_err("%sops=%u inp=%u exc=%u\n", - prefix, object->fscache.n_ops, object->fscache.n_in_progress, - object->fscache.n_exclusive); - pr_err("%sparent=%x\n", - prefix, object->fscache.parent ? object->fscache.parent->debug_id : 0); - - spin_lock(&object->fscache.lock); - cookie = object->fscache.cookie; - if (cookie) { - pr_err("%scookie=%x [pr=%x nd=%p fl=%lx]\n", - prefix, - cookie->debug_id, - cookie->parent ? cookie->parent->debug_id : 0, - cookie->netfs_data, - cookie->flags); - pr_err("%skey=[%u] '", prefix, cookie->key_len); - k = (cookie->key_len <= sizeof(cookie->inline_key)) ? - cookie->inline_key : cookie->key; - for (loop = 0; loop < cookie->key_len; loop++) - pr_cont("%02x", k[loop]); - pr_cont("'\n"); + struct inode *inode = d_backing_inode(dentry); + bool can_use = false; + + if (!(inode->i_flags & S_KERNEL_FILE)) { + inode->i_flags |= S_KERNEL_FILE; + trace_cachefiles_mark_active(object, inode); + can_use = true; } else { - pr_err("%scookie=NULL\n", prefix); + pr_notice("cachefiles: Inode already in use: %pd\n", dentry); } - spin_unlock(&object->fscache.lock); + + return can_use; } -/* - * dump debugging info about a pair of objects - */ -static noinline void cachefiles_printk_object(struct cachefiles_object *object, - struct cachefiles_object *xobject) +static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object, + struct dentry *dentry) { - if (object) - __cachefiles_printk_object(object, ""); - if (xobject) - __cachefiles_printk_object(xobject, "x"); + struct inode *inode = d_backing_inode(dentry); + bool can_use; + + inode_lock(inode); + can_use = __cachefiles_mark_inode_in_use(object, dentry); + inode_unlock(inode); + return can_use; } /* - * mark the owner of a dentry, if there is one, to indicate that that dentry - * has been preemptively deleted - * - the caller must hold the i_mutex on the dentry's parent as required to - * call vfs_unlink(), vfs_rmdir() or vfs_rename() + * Unmark a backing inode. The caller must hold the inode lock. */ -static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, - struct dentry *dentry, - enum fscache_why_object_killed why) +static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct dentry *dentry) { - struct cachefiles_object *object; - struct rb_node *p; - - _enter(",'%pd'", dentry); + struct inode *inode = d_backing_inode(dentry); - write_lock(&cache->active_lock); + inode->i_flags &= ~S_KERNEL_FILE; + trace_cachefiles_mark_inactive(object, inode); +} - p = cache->active_nodes.rb_node; - while (p) { - object = rb_entry(p, struct cachefiles_object, active_node); - if (object->dentry > dentry) - p = p->rb_left; - else if (object->dentry < dentry) - p = p->rb_right; - else - goto found_dentry; +/* + * Unmark a backing inode and tell cachefilesd that there's something that can + * be culled. + */ +void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct file *file) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct inode *inode = file_inode(file); + + if (inode) { + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, file->f_path.dentry); + inode_unlock(inode); + + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + atomic_long_add(inode->i_blocks, &cache->b_released); + if (atomic_inc_return(&cache->f_released)) + cachefiles_state_changed(cache); + } } +} - write_unlock(&cache->active_lock); - trace_cachefiles_mark_buried(NULL, dentry, why); - _leave(" [no owner]"); - return; +/* + * get a subdirectory + */ +struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *dirname, + bool *_is_new) +{ + struct dentry *subdir; + struct path path; + int ret; - /* found the dentry for */ -found_dentry: - kdebug("preemptive burial: OBJ%x [%s] %pd", - object->fscache.debug_id, - object->fscache.state->name, - dentry); + _enter(",,%s", dirname); - trace_cachefiles_mark_buried(object, dentry, why); + /* search the current directory for the element name */ + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - if (fscache_object_is_live(&object->fscache)) { - pr_err("\n"); - pr_err("Error: Can't preemptively bury live object\n"); - cachefiles_printk_object(object, NULL); - } else { - if (why != FSCACHE_OBJECT_IS_STALE) - fscache_object_mark_killed(&object->fscache, why); +retry: + ret = cachefiles_inject_read_error(); + if (ret == 0) + subdir = lookup_one_len(dirname, dir, strlen(dirname)); + else + subdir = ERR_PTR(ret); + if (IS_ERR(subdir)) { + trace_cachefiles_vfs_error(NULL, d_backing_inode(dir), + PTR_ERR(subdir), + cachefiles_trace_lookup_error); + if (PTR_ERR(subdir) == -ENOMEM) + goto nomem_d_alloc; + goto lookup_error; } - write_unlock(&cache->active_lock); - _leave(" [owner marked]"); -} + _debug("subdir -> %pd %s", + subdir, d_backing_inode(subdir) ? "positive" : "negative"); -/* - * record the fact that an object is now active - */ -static int cachefiles_mark_object_active(struct cachefiles_cache *cache, - struct cachefiles_object *object) -{ - struct cachefiles_object *xobject; - struct rb_node **_p, *_parent = NULL; - struct dentry *dentry; + /* we need to create the subdir if it doesn't exist yet */ + if (d_is_negative(subdir)) { + ret = cachefiles_has_space(cache, 1, 0, + cachefiles_has_space_for_create); + if (ret < 0) + goto mkdir_error; - _enter(",%x", object->fscache.debug_id); + _debug("attempt mkdir"); -try_again: - write_lock(&cache->active_lock); + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_mkdir(&path, subdir, 0700); + if (ret < 0) + goto mkdir_error; + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700); + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, + cachefiles_trace_mkdir_error); + goto mkdir_error; + } - dentry = object->dentry; - trace_cachefiles_mark_active(object, dentry); + if (unlikely(d_unhashed(subdir))) { + cachefiles_put_directory(subdir); + goto retry; + } + ASSERT(d_backing_inode(subdir)); - if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { - pr_err("Error: Object already active\n"); - cachefiles_printk_object(object, NULL); - BUG(); + _debug("mkdir -> %pd{ino=%lu}", + subdir, d_backing_inode(subdir)->i_ino); + if (_is_new) + *_is_new = true; } - _p = &cache->active_nodes.rb_node; - while (*_p) { - _parent = *_p; - xobject = rb_entry(_parent, - struct cachefiles_object, active_node); + /* Tell rmdir() it's not allowed to delete the subdir */ + inode_lock(d_inode(subdir)); + inode_unlock(d_inode(dir)); - ASSERT(xobject != object); + if (!__cachefiles_mark_inode_in_use(NULL, subdir)) + goto mark_error; - if (xobject->dentry > dentry) - _p = &(*_p)->rb_left; - else if (xobject->dentry < dentry) - _p = &(*_p)->rb_right; - else - goto wait_for_old_object; - } + inode_unlock(d_inode(subdir)); - rb_link_node(&object->active_node, _parent, _p); - rb_insert_color(&object->active_node, &cache->active_nodes); + /* we need to make sure the subdir is a directory */ + ASSERT(d_backing_inode(subdir)); - write_unlock(&cache->active_lock); - _leave(" = 0"); - return 0; + if (!d_can_lookup(subdir)) { + pr_err("%s is not a directory\n", dirname); + ret = -EIO; + goto check_error; + } - /* an old object from a previous incarnation is hogging the slot - we - * need to wait for it to be destroyed */ -wait_for_old_object: - trace_cachefiles_wait_active(object, dentry, xobject); - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); - - if (fscache_object_is_live(&xobject->fscache)) { - pr_err("\n"); - pr_err("Error: Unexpected object collision\n"); - cachefiles_printk_object(object, xobject); - } - atomic_inc(&xobject->usage); - write_unlock(&cache->active_lock); - - if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { - wait_queue_head_t *wq; - - signed long timeout = 60 * HZ; - wait_queue_entry_t wait; - bool requeue; - - /* if the object we're waiting for is queued for processing, - * then just put ourselves on the queue behind it */ - if (work_pending(&xobject->fscache.work)) { - _debug("queue OBJ%x behind OBJ%x immediately", - object->fscache.debug_id, - xobject->fscache.debug_id); - goto requeue; - } + ret = -EPERM; + if (!(d_backing_inode(subdir)->i_opflags & IOP_XATTR) || + !d_backing_inode(subdir)->i_op->lookup || + !d_backing_inode(subdir)->i_op->mkdir || + !d_backing_inode(subdir)->i_op->rename || + !d_backing_inode(subdir)->i_op->rmdir || + !d_backing_inode(subdir)->i_op->unlink) + goto check_error; - /* otherwise we sleep until either the object we're waiting for - * is done, or the fscache_object is congested */ - wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); - init_wait(&wait); - requeue = false; - do { - prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); - if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) - break; - - requeue = fscache_object_sleep_till_congested(&timeout); - } while (timeout > 0 && !requeue); - finish_wait(wq, &wait); - - if (requeue && - test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { - _debug("queue OBJ%x behind OBJ%x after wait", - object->fscache.debug_id, - xobject->fscache.debug_id); - goto requeue; - } + _leave(" = [%lu]", d_backing_inode(subdir)->i_ino); + return subdir; - if (timeout <= 0) { - pr_err("\n"); - pr_err("Error: Overlong wait for old active object to go away\n"); - cachefiles_printk_object(object, xobject); - goto requeue; - } - } +check_error: + cachefiles_put_directory(subdir); + _leave(" = %d [check]", ret); + return ERR_PTR(ret); - ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); +mark_error: + inode_unlock(d_inode(subdir)); + dput(subdir); + return ERR_PTR(-EBUSY); - cache->cache.ops->put_object(&xobject->fscache, - (enum fscache_obj_ref_trace)cachefiles_obj_put_wait_retry); - goto try_again; +mkdir_error: + inode_unlock(d_inode(dir)); + dput(subdir); + pr_err("mkdir %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +lookup_error: + inode_unlock(d_inode(dir)); + ret = PTR_ERR(subdir); + pr_err("Lookup %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); -requeue: - cache->cache.ops->put_object(&xobject->fscache, - (enum fscache_obj_ref_trace)cachefiles_obj_put_wait_timeo); - _leave(" = -ETIMEDOUT"); - return -ETIMEDOUT; +nomem_d_alloc: + inode_unlock(d_inode(dir)); + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); } /* - * Mark an object as being inactive. + * Put a subdirectory. */ -void cachefiles_mark_object_inactive(struct cachefiles_cache *cache, - struct cachefiles_object *object, - blkcnt_t i_blocks) +void cachefiles_put_directory(struct dentry *dir) { - struct dentry *dentry = object->dentry; - struct inode *inode = d_backing_inode(dentry); - - trace_cachefiles_mark_inactive(object, dentry, inode); + if (dir) { + inode_lock(dir->d_inode); + __cachefiles_unmark_inode_in_use(NULL, dir); + inode_unlock(dir->d_inode); + dput(dir); + } +} - write_lock(&cache->active_lock); - rb_erase(&object->active_node, &cache->active_nodes); - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); - write_unlock(&cache->active_lock); +/* + * Remove a regular file from the cache. + */ +static int cachefiles_unlink(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, struct dentry *dentry, + enum fscache_why_object_killed why) +{ + struct path path = { + .mnt = cache->mnt, + .dentry = dir, + }; + int ret; - wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); + trace_cachefiles_unlink(object, dentry, why); + ret = security_path_unlink(&path, dentry); + if (ret < 0) { + cachefiles_io_error(cache, "Unlink security error"); + return ret; + } - /* This object can now be culled, so we need to let the daemon know - * that there is something it can remove if it needs to. - */ - atomic_long_add(i_blocks, &cache->b_released); - if (atomic_inc_return(&cache->f_released)) - cachefiles_state_changed(cache); + ret = cachefiles_inject_remove_error(); + if (ret == 0) { + ret = vfs_unlink(&init_user_ns, d_backing_inode(dir), dentry, NULL); + if (ret == -EIO) + cachefiles_io_error(cache, "Unlink failed"); + } + if (ret != 0) + trace_cachefiles_vfs_error(object, d_backing_inode(dir), ret, + cachefiles_trace_unlink_error); + return ret; } /* - * delete an object representation from the cache - * - file backed objects are unlinked - * - directory backed objects are stuffed into the graveyard for userspace to + * Delete an object representation from the cache + * - File backed objects are unlinked + * - Directory backed objects are stuffed into the graveyard for userspace to * delete - * - unlocks the directory mutex */ -static int cachefiles_bury_object(struct cachefiles_cache *cache, - struct cachefiles_object *object, - struct dentry *dir, - struct dentry *rep, - bool preemptive, - enum fscache_why_object_killed why) +int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, + struct dentry *rep, + enum fscache_why_object_killed why) { struct dentry *grave, *trap; struct path path, path_to_graveyard; @@ -298,29 +271,21 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, _enter(",'%pd','%pd'", dir, rep); + if (rep->d_parent != dir) { + inode_unlock(d_inode(dir)); + _leave(" = -ESTALE"); + return -ESTALE; + } + /* non-directories can just be unlinked */ if (!d_is_dir(rep)) { - _debug("unlink stale object"); - - path.mnt = cache->mnt; - path.dentry = dir; - ret = security_path_unlink(&path, rep); - if (ret < 0) { - cachefiles_io_error(cache, "Unlink security error"); - } else { - trace_cachefiles_unlink(object, rep, why); - ret = vfs_unlink(&init_user_ns, d_inode(dir), rep, - NULL); - - if (preemptive) - cachefiles_mark_object_buried(cache, rep, why); - } + dget(rep); /* Stop the dentry being negated if it's only pinned + * by a file struct. + */ + ret = cachefiles_unlink(cache, object, dir, rep, why); + dput(rep); inode_unlock(d_inode(dir)); - - if (ret == -EIO) - cachefiles_io_error(cache, "Unlink failed"); - _leave(" = %d", ret); return ret; } @@ -368,14 +333,16 @@ try_again: grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); if (IS_ERR(grave)) { unlock_rename(cache->graveyard, dir); + trace_cachefiles_vfs_error(object, d_inode(cache->graveyard), + PTR_ERR(grave), + cachefiles_trace_lookup_error); if (PTR_ERR(grave) == -ENOMEM) { _leave(" = -ENOMEM"); return -ENOMEM; } - cachefiles_io_error(cache, "Lookup error %ld", - PTR_ERR(grave)); + cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave)); return -EIO; } @@ -420,15 +387,18 @@ try_again: .new_dentry = grave, }; trace_cachefiles_rename(object, rep, grave, why); - ret = vfs_rename(&rd); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_rename(&rd); + if (ret != 0) + trace_cachefiles_vfs_error(object, d_inode(dir), ret, + cachefiles_trace_rename_error); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); - - if (preemptive) - cachefiles_mark_object_buried(cache, rep, why); } + __cachefiles_unmark_inode_in_use(object, rep); unlock_rename(cache->graveyard, dir); dput(grave); _leave(" = 0"); @@ -436,493 +406,358 @@ try_again: } /* - * delete an object representation from the cache + * Delete a cache file. */ -int cachefiles_delete_object(struct cachefiles_cache *cache, - struct cachefiles_object *object) +int cachefiles_delete_object(struct cachefiles_object *object, + enum fscache_why_object_killed why) { - struct dentry *dir; + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry = object->file->f_path.dentry; + struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash]; int ret; - _enter(",OBJ%x{%pd}", object->fscache.debug_id, object->dentry); - - ASSERT(object->dentry); - ASSERT(d_backing_inode(object->dentry)); - ASSERT(object->dentry->d_parent); + _enter(",OBJ%x{%pD}", object->debug_id, object->file); - dir = dget_parent(object->dentry); + /* Stop the dentry being negated if it's only pinned by a file struct. */ + dget(dentry); - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - - if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) { - /* object allocation for the same key preemptively deleted this - * object's file so that it could create its own file */ - _debug("object preemptively buried"); - inode_unlock(d_inode(dir)); - ret = 0; - } else { - /* we need to check that our parent is _still_ our parent - it - * may have been renamed */ - if (dir == object->dentry->d_parent) { - ret = cachefiles_bury_object(cache, object, dir, - object->dentry, false, - FSCACHE_OBJECT_WAS_RETIRED); - } else { - /* it got moved, presumably by cachefilesd culling it, - * so it's no longer in the key path and we can ignore - * it */ - inode_unlock(d_inode(dir)); - ret = 0; - } - } - - dput(dir); - _leave(" = %d", ret); + inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT); + ret = cachefiles_unlink(volume->cache, object, fan, dentry, why); + inode_unlock(d_backing_inode(fan)); + dput(dentry); return ret; } /* - * walk from the parent object to the child object through the backing - * filesystem, creating directories as we go + * Create a temporary file and leave it unattached and un-xattr'd until the + * time comes to discard the object from memory. */ -int cachefiles_walk_to_object(struct cachefiles_object *parent, - struct cachefiles_object *object, - const char *key, - struct cachefiles_xattr *auxdata) +struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) { - struct cachefiles_cache *cache; - struct dentry *dir, *next = NULL; - struct inode *inode; + struct cachefiles_volume *volume = object->volume; + struct cachefiles_cache *cache = volume->cache; + const struct cred *saved_cred; + struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash]; + struct file *file; struct path path; - const char *name; - int ret, nlen; - - _enter("OBJ%x{%pd},OBJ%x,%s,", - parent->fscache.debug_id, parent->dentry, - object->fscache.debug_id, key); - - cache = container_of(parent->fscache.cache, - struct cachefiles_cache, cache); - path.mnt = cache->mnt; - - ASSERT(parent->dentry); - ASSERT(d_backing_inode(parent->dentry)); - - if (!(d_is_dir(parent->dentry))) { - // TODO: convert file to dir - _leave("looking up in none directory"); - return -ENOBUFS; - } - - dir = dget(parent->dentry); - -advance: - /* attempt to transit the first directory component */ - name = key; - nlen = strlen(key); - - /* key ends in a double NUL */ - key = key + nlen + 1; - if (!*key) - key = NULL; - -lookup_again: - /* search the current directory for the element name */ - _debug("lookup '%s'", name); - - inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + uint64_t ni_size = object->cookie->object_size; + long ret; - next = lookup_one_len(name, dir, nlen); - if (IS_ERR(next)) { - trace_cachefiles_lookup(object, next, NULL); - goto lookup_error; - } + ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); - inode = d_backing_inode(next); - trace_cachefiles_lookup(object, next, inode); - _debug("next -> %pd %s", next, inode ? "positive" : "negative"); - - if (!key) - object->new = !inode; - - /* if this element of the path doesn't exist, then the lookup phase - * failed, and we can release any readers in the certain knowledge that - * there's nothing for them to actually read */ - if (d_is_negative(next)) - fscache_object_lookup_negative(&object->fscache); - - /* we need to create the object if it's negative */ - if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { - /* index objects and intervening tree levels must be subdirs */ - if (d_is_negative(next)) { - ret = cachefiles_has_space(cache, 1, 0); - if (ret < 0) - goto no_space_error; - - path.dentry = dir; - ret = security_path_mkdir(&path, next, 0); - if (ret < 0) - goto create_error; - ret = vfs_mkdir(&init_user_ns, d_inode(dir), next, 0); - if (!key) - trace_cachefiles_mkdir(object, next, ret); - if (ret < 0) - goto create_error; - - if (unlikely(d_unhashed(next))) { - dput(next); - inode_unlock(d_inode(dir)); - goto lookup_again; - } - ASSERT(d_backing_inode(next)); - - _debug("mkdir -> %pd{ino=%lu}", - next, d_backing_inode(next)->i_ino); - - } else if (!d_can_lookup(next)) { - pr_err("inode %lu is not a directory\n", - d_backing_inode(next)->i_ino); - ret = -ENOBUFS; - goto error; - } + cachefiles_begin_secure(cache, &saved_cred); - } else { - /* non-index objects start out life as files */ - if (d_is_negative(next)) { - ret = cachefiles_has_space(cache, 1, 0); - if (ret < 0) - goto no_space_error; - - path.dentry = dir; - ret = security_path_mknod(&path, next, S_IFREG, 0); - if (ret < 0) - goto create_error; - ret = vfs_create(&init_user_ns, d_inode(dir), next, - S_IFREG, true); - trace_cachefiles_create(object, next, ret); - if (ret < 0) - goto create_error; - - ASSERT(d_backing_inode(next)); - - _debug("create -> %pd{ino=%lu}", - next, d_backing_inode(next)->i_ino); - - } else if (!d_can_lookup(next) && - !d_is_reg(next) - ) { - pr_err("inode %lu is not a file or directory\n", - d_backing_inode(next)->i_ino); - ret = -ENOBUFS; - goto error; + path.mnt = cache->mnt; + ret = cachefiles_inject_write_error(); + if (ret == 0) + path.dentry = vfs_tmpfile(&init_user_ns, fan, S_IFREG, O_RDWR); + else + path.dentry = ERR_PTR(ret); + if (IS_ERR(path.dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(path.dentry), + cachefiles_trace_tmpfile_error); + if (PTR_ERR(path.dentry) == -EIO) + cachefiles_io_error_obj(object, "Failed to create tmpfile"); + file = ERR_CAST(path.dentry); + goto out; + } + + trace_cachefiles_tmpfile(object, d_backing_inode(path.dentry)); + + if (!cachefiles_mark_inode_in_use(object, path.dentry)) { + file = ERR_PTR(-EBUSY); + goto out_dput; + } + + if (ni_size > 0) { + trace_cachefiles_trunc(object, d_backing_inode(path.dentry), 0, ni_size, + cachefiles_trunc_expand_tmpfile); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_truncate(&path, ni_size); + if (ret < 0) { + trace_cachefiles_vfs_error( + object, d_backing_inode(path.dentry), ret, + cachefiles_trace_trunc_error); + file = ERR_PTR(ret); + goto out_dput; } } - /* process the next component */ - if (key) { - _debug("advance"); - inode_unlock(d_inode(dir)); - dput(dir); - dir = next; - next = NULL; - goto advance; + file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, + d_backing_inode(path.dentry), cache->cache_cred); + if (IS_ERR(file)) { + trace_cachefiles_vfs_error(object, d_backing_inode(path.dentry), + PTR_ERR(file), + cachefiles_trace_open_error); + goto out_dput; + } + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + fput(file); + pr_notice("Cache does not support read_iter and write_iter\n"); + file = ERR_PTR(-EINVAL); } - /* we've found the object we were looking for */ - object->dentry = next; - - /* if we've found that the terminal object exists, then we need to - * check its attributes and delete it if it's out of date */ - if (!object->new) { - _debug("validate '%pd'", next); - - ret = cachefiles_check_object_xattr(object, auxdata); - if (ret == -ESTALE) { - /* delete the object (the deleter drops the directory - * mutex) */ - object->dentry = NULL; +out_dput: + dput(path.dentry); +out: + cachefiles_end_secure(cache, saved_cred); + return file; +} - ret = cachefiles_bury_object(cache, object, dir, next, - true, - FSCACHE_OBJECT_IS_STALE); - dput(next); - next = NULL; +/* + * Create a new file. + */ +static bool cachefiles_create_file(struct cachefiles_object *object) +{ + struct file *file; + int ret; - if (ret < 0) - goto delete_error; + ret = cachefiles_has_space(object->volume->cache, 1, 0, + cachefiles_has_space_for_create); + if (ret < 0) + return false; - _debug("redo lookup"); - fscache_object_retrying_stale(&object->fscache); - goto lookup_again; - } - } + file = cachefiles_create_tmpfile(object); + if (IS_ERR(file)) + return false; - /* note that we're now using this object */ - ret = cachefiles_mark_object_active(cache, object); + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags); + set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + _debug("create -> %pD{ino=%lu}", file, file_inode(file)->i_ino); + object->file = file; + return true; +} - inode_unlock(d_inode(dir)); - dput(dir); - dir = NULL; +/* + * Open an existing file, checking its attributes and replacing it if it is + * stale. + */ +static bool cachefiles_open_file(struct cachefiles_object *object, + struct dentry *dentry) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct file *file; + struct path path; + int ret; - if (ret == -ETIMEDOUT) - goto mark_active_timed_out; + _enter("%pd", dentry); - _debug("=== OBTAINED_OBJECT ==="); + if (!cachefiles_mark_inode_in_use(object, dentry)) + return false; - if (object->new) { - /* attach data to a newly constructed terminal object */ - ret = cachefiles_set_object_xattr(object, auxdata); - if (ret < 0) - goto check_error; - } else { - /* always update the atime on an object we've just looked up - * (this is used to keep track of culling, and atimes are only - * updated by read, write and readdir but not lookup or - * open) */ - path.dentry = next; - touch_atime(&path); - } - - /* open a file interface onto a data file */ - if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { - if (d_is_reg(object->dentry)) { - const struct address_space_operations *aops; - - ret = -EPERM; - aops = d_backing_inode(object->dentry)->i_mapping->a_ops; - if (!aops->bmap) - goto check_error; - if (object->dentry->d_sb->s_blocksize > PAGE_SIZE) - goto check_error; - - object->backer = object->dentry; - } else { - BUG(); // TODO: open file in data-class subdir - } + /* We need to open a file interface onto a data file now as we can't do + * it on demand because writeback called from do_exit() sees + * current->fs == NULL - which breaks d_path() called from ext4 open. + */ + path.mnt = cache->mnt; + path.dentry = dentry; + file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, + d_backing_inode(dentry), cache->cache_cred); + if (IS_ERR(file)) { + trace_cachefiles_vfs_error(object, d_backing_inode(dentry), + PTR_ERR(file), + cachefiles_trace_open_error); + goto error; } - object->new = 0; - fscache_obtained_object(&object->fscache); - - _leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino); - return 0; - -no_space_error: - fscache_object_mark_killed(&object->fscache, FSCACHE_OBJECT_NO_SPACE); -create_error: - _debug("create error %d", ret); - if (ret == -EIO) - cachefiles_io_error(cache, "Create/mkdir failed"); - goto error; + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + pr_notice("Cache does not support read_iter and write_iter\n"); + goto error_fput; + } + _debug("file -> %pd positive", dentry); -mark_active_timed_out: - _debug("mark active timed out"); - goto release_dentry; + ret = cachefiles_check_auxdata(object, file); + if (ret < 0) + goto check_failed; -check_error: - _debug("check error %d", ret); - cachefiles_mark_object_inactive( - cache, object, d_backing_inode(object->dentry)->i_blocks); -release_dentry: - dput(object->dentry); - object->dentry = NULL; - goto error_out; - -delete_error: - _debug("delete error %d", ret); - goto error_out2; + object->file = file; -lookup_error: - _debug("lookup error %ld", PTR_ERR(next)); - ret = PTR_ERR(next); - if (ret == -EIO) - cachefiles_io_error(cache, "Lookup failed"); - next = NULL; + /* Always update the atime on an object we've just looked up (this is + * used to keep track of culling, and atimes are only updated by read, + * write and readdir but not lookup or open). + */ + touch_atime(&file->f_path); + dput(dentry); + return true; + +check_failed: + fscache_cookie_lookup_negative(object->cookie); + cachefiles_unmark_inode_in_use(object, file); + if (ret == -ESTALE) { + fput(file); + dput(dentry); + return cachefiles_create_file(object); + } +error_fput: + fput(file); error: - inode_unlock(d_inode(dir)); - dput(next); -error_out2: - dput(dir); -error_out: - _leave(" = error %d", -ret); - return ret; + dput(dentry); + return false; } /* - * get a subdirectory + * walk from the parent object to the child object through the backing + * filesystem, creating directories as we go */ -struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, - struct dentry *dir, - const char *dirname) +bool cachefiles_look_up_object(struct cachefiles_object *object) { - struct dentry *subdir; - struct path path; + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash]; int ret; - _enter(",,%s", dirname); - - /* search the current directory for the element name */ - inode_lock(d_inode(dir)); - -retry: - subdir = lookup_one_len(dirname, dir, strlen(dirname)); - if (IS_ERR(subdir)) { - if (PTR_ERR(subdir) == -ENOMEM) - goto nomem_d_alloc; - goto lookup_error; + _enter("OBJ%x,%s,", object->debug_id, object->d_name); + + /* Look up path "cache/vol/fanout/file". */ + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_positive_unlocked(object->d_name, fan, + object->d_name_len); + else + dentry = ERR_PTR(ret); + trace_cachefiles_lookup(object, dentry); + if (IS_ERR(dentry)) { + if (dentry == ERR_PTR(-ENOENT)) + goto new_file; + if (dentry == ERR_PTR(-EIO)) + cachefiles_io_error_obj(object, "Lookup failed"); + return false; + } + + if (!d_is_reg(dentry)) { + pr_err("%pd is not a file\n", dentry); + inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); + ret = cachefiles_bury_object(volume->cache, object, fan, dentry, + FSCACHE_OBJECT_IS_WEIRD); + dput(dentry); + if (ret < 0) + return false; + goto new_file; } - _debug("subdir -> %pd %s", - subdir, d_backing_inode(subdir) ? "positive" : "negative"); + if (!cachefiles_open_file(object, dentry)) + return false; - /* we need to create the subdir if it doesn't exist yet */ - if (d_is_negative(subdir)) { - ret = cachefiles_has_space(cache, 1, 0); - if (ret < 0) - goto mkdir_error; + _leave(" = t [%lu]", file_inode(object->file)->i_ino); + return true; - _debug("attempt mkdir"); +new_file: + fscache_cookie_lookup_negative(object->cookie); + return cachefiles_create_file(object); +} - path.mnt = cache->mnt; - path.dentry = dir; - ret = security_path_mkdir(&path, subdir, 0700); - if (ret < 0) - goto mkdir_error; - ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700); - if (ret < 0) - goto mkdir_error; +/* + * Attempt to link a temporary file into its rightful place in the cache. + */ +bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash]; + bool success = false; + int ret; - if (unlikely(d_unhashed(subdir))) { - dput(subdir); - goto retry; + _enter(",%pD", object->file); + + inode_lock_nested(d_inode(fan), I_MUTEX_PARENT); + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_one_len(object->d_name, fan, object->d_name_len); + else + dentry = ERR_PTR(ret); + if (IS_ERR(dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), + cachefiles_trace_lookup_error); + _debug("lookup fail %ld", PTR_ERR(dentry)); + goto out_unlock; + } + + if (!d_is_negative(dentry)) { + if (d_backing_inode(dentry) == file_inode(object->file)) { + success = true; + goto out_dput; } - ASSERT(d_backing_inode(subdir)); - _debug("mkdir -> %pd{ino=%lu}", - subdir, d_backing_inode(subdir)->i_ino); - } - - inode_unlock(d_inode(dir)); - - /* we need to make sure the subdir is a directory */ - ASSERT(d_backing_inode(subdir)); + ret = cachefiles_unlink(volume->cache, object, fan, dentry, + FSCACHE_OBJECT_IS_STALE); + if (ret < 0) + goto out_dput; - if (!d_can_lookup(subdir)) { - pr_err("%s is not a directory\n", dirname); - ret = -EIO; - goto check_error; + dput(dentry); + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_one_len(object->d_name, fan, object->d_name_len); + else + dentry = ERR_PTR(ret); + if (IS_ERR(dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), + cachefiles_trace_lookup_error); + _debug("lookup fail %ld", PTR_ERR(dentry)); + goto out_unlock; + } } - ret = -EPERM; - if (!(d_backing_inode(subdir)->i_opflags & IOP_XATTR) || - !d_backing_inode(subdir)->i_op->lookup || - !d_backing_inode(subdir)->i_op->mkdir || - !d_backing_inode(subdir)->i_op->create || - !d_backing_inode(subdir)->i_op->rename || - !d_backing_inode(subdir)->i_op->rmdir || - !d_backing_inode(subdir)->i_op->unlink) - goto check_error; - - _leave(" = [%lu]", d_backing_inode(subdir)->i_ino); - return subdir; - -check_error: - dput(subdir); - _leave(" = %d [check]", ret); - return ERR_PTR(ret); - -mkdir_error: - inode_unlock(d_inode(dir)); - dput(subdir); - pr_err("mkdir %s failed with error %d\n", dirname, ret); - return ERR_PTR(ret); - -lookup_error: - inode_unlock(d_inode(dir)); - ret = PTR_ERR(subdir); - pr_err("Lookup %s failed with error %d\n", dirname, ret); - return ERR_PTR(ret); - -nomem_d_alloc: - inode_unlock(d_inode(dir)); - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_link(object->file->f_path.dentry, &init_user_ns, + d_inode(fan), dentry, NULL); + if (ret < 0) { + trace_cachefiles_vfs_error(object, d_inode(fan), ret, + cachefiles_trace_link_error); + _debug("link fail %d", ret); + } else { + trace_cachefiles_link(object, file_inode(object->file)); + spin_lock(&object->lock); + /* TODO: Do we want to switch the file pointer to the new dentry? */ + clear_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + spin_unlock(&object->lock); + success = true; + } + +out_dput: + dput(dentry); +out_unlock: + inode_unlock(d_inode(fan)); + _leave(" = %u", success); + return success; } /* - * find out if an object is in use or not - * - if finds object and it's not in use: - * - returns a pointer to the object and a reference on it - * - returns with the directory locked + * Look up an inode to be checked or culled. Return -EBUSY if the inode is + * marked in use. */ -static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, - struct dentry *dir, - char *filename) +static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache, + struct dentry *dir, + char *filename) { - struct cachefiles_object *object; - struct rb_node *_n; struct dentry *victim; - int ret; - - //_enter(",%pd/,%s", - // dir, filename); + int ret = -ENOENT; - /* look up the victim */ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); victim = lookup_one_len(filename, dir, strlen(filename)); if (IS_ERR(victim)) goto lookup_error; - - //_debug("victim -> %pd %s", - // victim, d_backing_inode(victim) ? "positive" : "negative"); - - /* if the object is no longer there then we probably retired the object - * at the netfs's request whilst the cull was in progress - */ - if (d_is_negative(victim)) { - inode_unlock(d_inode(dir)); - dput(victim); - _leave(" = -ENOENT [absent]"); - return ERR_PTR(-ENOENT); - } - - /* check to see if we're using this object */ - read_lock(&cache->active_lock); - - _n = cache->active_nodes.rb_node; - - while (_n) { - object = rb_entry(_n, struct cachefiles_object, active_node); - - if (object->dentry > victim) - _n = _n->rb_left; - else if (object->dentry < victim) - _n = _n->rb_right; - else - goto object_in_use; - } - - read_unlock(&cache->active_lock); - - //_leave(" = %pd", victim); + if (d_is_negative(victim)) + goto lookup_put; + if (d_inode(victim)->i_flags & S_KERNEL_FILE) + goto lookup_busy; return victim; -object_in_use: - read_unlock(&cache->active_lock); +lookup_busy: + ret = -EBUSY; +lookup_put: inode_unlock(d_inode(dir)); dput(victim); - //_leave(" = -EBUSY [in use]"); - return ERR_PTR(-EBUSY); + return ERR_PTR(ret); lookup_error: inode_unlock(d_inode(dir)); ret = PTR_ERR(victim); - if (ret == -ENOENT) { - /* file or dir now absent - probably retired by netfs */ - _leave(" = -ESTALE [absent]"); - return ERR_PTR(-ESTALE); - } + if (ret == -ENOENT) + return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */ if (ret == -EIO) { cachefiles_io_error(cache, "Lookup failed"); @@ -931,46 +766,46 @@ lookup_error: ret = -EIO; } - _leave(" = %d", ret); return ERR_PTR(ret); } /* - * cull an object if it's not in use + * Cull an object if it's not in use * - called only by cache manager daemon */ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct dentry *victim; + struct inode *inode; int ret; _enter(",%pd/,%s", dir, filename); - victim = cachefiles_check_active(cache, dir, filename); + victim = cachefiles_lookup_for_cull(cache, dir, filename); if (IS_ERR(victim)) return PTR_ERR(victim); - _debug("victim -> %pd %s", - victim, d_backing_inode(victim) ? "positive" : "negative"); - - /* okay... the victim is not being used so we can cull it - * - start by marking it as stale - */ - _debug("victim is cullable"); - - ret = cachefiles_remove_object_xattr(cache, victim); + /* check to see if someone is using this object */ + inode = d_inode(victim); + inode_lock(inode); + if (inode->i_flags & S_KERNEL_FILE) { + ret = -EBUSY; + } else { + /* Stop the cache from picking it back up */ + inode->i_flags |= S_KERNEL_FILE; + ret = 0; + } + inode_unlock(inode); if (ret < 0) goto error_unlock; - /* actually remove the victim (drops the dir mutex) */ - _debug("bury"); - - ret = cachefiles_bury_object(cache, NULL, dir, victim, false, + ret = cachefiles_bury_object(cache, NULL, dir, victim, FSCACHE_OBJECT_WAS_CULLED); if (ret < 0) goto error; + fscache_count_culled(); dput(victim); _leave(" = 0"); return 0; @@ -979,11 +814,8 @@ error_unlock: inode_unlock(d_inode(dir)); error: dput(victim); - if (ret == -ENOENT) { - /* file or dir now absent - probably retired by netfs */ - _leave(" = -ESTALE [absent]"); - return -ESTALE; - } + if (ret == -ENOENT) + return -ESTALE; /* Probably got retired by the netfs */ if (ret != -ENOMEM) { pr_err("Internal error: %d\n", ret); @@ -995,7 +827,7 @@ error: } /* - * find out if an object is in use or not + * Find out if an object is in use or not * - called only by cache manager daemon * - returns -EBUSY or 0 to indicate whether an object is in use or not */ @@ -1003,16 +835,13 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct dentry *victim; + int ret = 0; - //_enter(",%pd/,%s", - // dir, filename); - - victim = cachefiles_check_active(cache, dir, filename); + victim = cachefiles_lookup_for_cull(cache, dir, filename); if (IS_ERR(victim)) return PTR_ERR(victim); inode_unlock(d_inode(dir)); dput(victim); - //_leave(" = 0"); - return 0; + return ret; } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c deleted file mode 100644 index fcf4f3b72923..000000000000 --- a/fs/cachefiles/rdwr.c +++ /dev/null @@ -1,972 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Storage object read/write - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#include <linux/mount.h> -#include <linux/slab.h> -#include <linux/file.h> -#include <linux/swap.h> -#include "internal.h" - -/* - * detect wake up events generated by the unlocking of pages in which we're - * interested - * - we use this to detect read completion of backing pages - * - the caller holds the waitqueue lock - */ -static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, - int sync, void *_key) -{ - struct cachefiles_one_read *monitor = - container_of(wait, struct cachefiles_one_read, monitor); - struct cachefiles_object *object; - struct fscache_retrieval *op = monitor->op; - struct wait_page_key *key = _key; - struct folio *folio = wait->private; - - ASSERT(key); - - _enter("{%lu},%u,%d,{%p,%u}", - monitor->netfs_page->index, mode, sync, - key->folio, key->bit_nr); - - if (key->folio != folio || key->bit_nr != PG_locked) - return 0; - - _debug("--- monitor %p %lx ---", folio, folio->flags); - - if (!folio_test_uptodate(folio) && !folio_test_error(folio)) { - /* unlocked, not uptodate and not erronous? */ - _debug("page probably truncated"); - } - - /* remove from the waitqueue */ - list_del(&wait->entry); - - /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(op); - - /* We need to temporarily bump the usage count as we don't own a ref - * here otherwise cachefiles_read_copier() may free the op between the - * monitor being enqueued on the op->to_do list and the op getting - * enqueued on the work queue. - */ - fscache_get_retrieval(op); - - object = container_of(op->op.object, struct cachefiles_object, fscache); - spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &op->to_do); - fscache_enqueue_retrieval(op); - spin_unlock(&object->work_lock); - - fscache_put_retrieval(op); - return 0; -} - -/* - * handle a probably truncated page - * - check to see if the page is still relevant and reissue the read if - * possible - * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we - * must wait again and 0 if successful - */ -static int cachefiles_read_reissue(struct cachefiles_object *object, - struct cachefiles_one_read *monitor) -{ - struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping; - struct page *backpage = monitor->back_page, *backpage2; - int ret; - - _enter("{ino=%lx},{%lx,%lx}", - d_backing_inode(object->backer)->i_ino, - backpage->index, backpage->flags); - - /* skip if the page was truncated away completely */ - if (backpage->mapping != bmapping) { - _leave(" = -ENODATA [mapping]"); - return -ENODATA; - } - - backpage2 = find_get_page(bmapping, backpage->index); - if (!backpage2) { - _leave(" = -ENODATA [gone]"); - return -ENODATA; - } - - if (backpage != backpage2) { - put_page(backpage2); - _leave(" = -ENODATA [different]"); - return -ENODATA; - } - - /* the page is still there and we already have a ref on it, so we don't - * need a second */ - put_page(backpage2); - - INIT_LIST_HEAD(&monitor->op_link); - folio_add_wait_queue(page_folio(backpage), &monitor->monitor); - - if (trylock_page(backpage)) { - ret = -EIO; - if (PageError(backpage)) - goto unlock_discard; - ret = 0; - if (PageUptodate(backpage)) - goto unlock_discard; - - _debug("reissue read"); - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto discard; - } - - /* but the page may have been read before the monitor was installed, so - * the monitor may miss the event - so we have to ensure that we do get - * one in such a case */ - if (trylock_page(backpage)) { - _debug("jumpstart %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - - /* it'll reappear on the todo list */ - _leave(" = -EINPROGRESS"); - return -EINPROGRESS; - -unlock_discard: - unlock_page(backpage); -discard: - spin_lock_irq(&object->work_lock); - list_del(&monitor->op_link); - spin_unlock_irq(&object->work_lock); - _leave(" = %d", ret); - return ret; -} - -/* - * copy data from backing pages to netfs pages to complete a read operation - * - driven by FS-Cache's thread pool - */ -static void cachefiles_read_copier(struct fscache_operation *_op) -{ - struct cachefiles_one_read *monitor; - struct cachefiles_object *object; - struct fscache_retrieval *op; - int error, max; - - op = container_of(_op, struct fscache_retrieval, op); - object = container_of(op->op.object, - struct cachefiles_object, fscache); - - _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino); - - max = 8; - spin_lock_irq(&object->work_lock); - - while (!list_empty(&op->to_do)) { - monitor = list_entry(op->to_do.next, - struct cachefiles_one_read, op_link); - list_del(&monitor->op_link); - - spin_unlock_irq(&object->work_lock); - - _debug("- copy {%lu}", monitor->back_page->index); - - recheck: - if (test_bit(FSCACHE_COOKIE_INVALIDATING, - &object->fscache.cookie->flags)) { - error = -ESTALE; - } else if (PageUptodate(monitor->back_page)) { - copy_highpage(monitor->netfs_page, monitor->back_page); - fscache_mark_page_cached(monitor->op, - monitor->netfs_page); - error = 0; - } else if (!PageError(monitor->back_page)) { - /* the page has probably been truncated */ - error = cachefiles_read_reissue(object, monitor); - if (error == -EINPROGRESS) - goto next; - goto recheck; - } else { - cachefiles_io_error_obj( - object, - "Readpage failed on backing file %lx", - (unsigned long) monitor->back_page->flags); - error = -EIO; - } - - put_page(monitor->back_page); - - fscache_end_io(op, monitor->netfs_page, error); - put_page(monitor->netfs_page); - fscache_retrieval_complete(op, 1); - fscache_put_retrieval(op); - kfree(monitor); - - next: - /* let the thread pool have some air occasionally */ - max--; - if (max < 0 || need_resched()) { - if (!list_empty(&op->to_do)) - fscache_enqueue_retrieval(op); - _leave(" [maxed out]"); - return; - } - - spin_lock_irq(&object->work_lock); - } - - spin_unlock_irq(&object->work_lock); - _leave(""); -} - -/* - * read the corresponding page to the given set from the backing file - * - an uncertain page is simply discarded, to be tried again another time - */ -static int cachefiles_read_backing_file_one(struct cachefiles_object *object, - struct fscache_retrieval *op, - struct page *netpage) -{ - struct cachefiles_one_read *monitor; - struct address_space *bmapping; - struct page *newpage, *backpage; - int ret; - - _enter(""); - - _debug("read back %p{%lu,%d}", - netpage, netpage->index, page_count(netpage)); - - monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); - if (!monitor) - goto nomem; - - monitor->netfs_page = netpage; - monitor->op = fscache_get_retrieval(op); - - init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); - - /* attempt to get hold of the backing page */ - bmapping = d_backing_inode(object->backer)->i_mapping; - newpage = NULL; - - for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) - goto backing_page_already_present; - - if (!newpage) { - newpage = __page_cache_alloc(cachefiles_gfp); - if (!newpage) - goto nomem_monitor; - } - - ret = add_to_page_cache_lru(newpage, bmapping, - netpage->index, cachefiles_gfp); - if (ret == 0) - goto installed_new_backing_page; - if (ret != -EEXIST) - goto nomem_page; - } - - /* we've installed a new backing page, so now we need to start - * it reading */ -installed_new_backing_page: - _debug("- new %p", newpage); - - backpage = newpage; - newpage = NULL; - -read_backing_page: - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto read_error; - - /* set the monitor to transfer the data across */ -monitor_backing_page: - _debug("- monitor add"); - - /* install the monitor */ - get_page(monitor->netfs_page); - get_page(backpage); - monitor->back_page = backpage; - monitor->monitor.private = backpage; - folio_add_wait_queue(page_folio(backpage), &monitor->monitor); - monitor = NULL; - - /* but the page may have been read before the monitor was installed, so - * the monitor may miss the event - so we have to ensure that we do get - * one in such a case */ - if (trylock_page(backpage)) { - _debug("jumpstart %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - goto success; - - /* if the backing page is already present, it can be in one of - * three states: read in progress, read failed or read okay */ -backing_page_already_present: - _debug("- present"); - - if (newpage) { - put_page(newpage); - newpage = NULL; - } - - if (PageError(backpage)) - goto io_error; - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate; - - if (!trylock_page(backpage)) - goto monitor_backing_page; - _debug("read %p {%lx}", backpage, backpage->flags); - goto read_backing_page; - - /* the backing page is already up to date, attach the netfs - * page to the pagecache and LRU and copy the data across */ -backing_page_already_uptodate: - _debug("- uptodate"); - - fscache_mark_page_cached(op, netpage); - - copy_highpage(netpage, backpage); - fscache_end_io(op, netpage, 0); - fscache_retrieval_complete(op, 1); - -success: - _debug("success"); - ret = 0; - -out: - if (backpage) - put_page(backpage); - if (monitor) { - fscache_put_retrieval(monitor->op); - kfree(monitor); - } - _leave(" = %d", ret); - return ret; - -read_error: - _debug("read error %d", ret); - if (ret == -ENOMEM) { - fscache_retrieval_complete(op, 1); - goto out; - } -io_error: - cachefiles_io_error_obj(object, "Page read error on backing file"); - fscache_retrieval_complete(op, 1); - ret = -ENOBUFS; - goto out; - -nomem_page: - put_page(newpage); -nomem_monitor: - fscache_put_retrieval(monitor->op); - kfree(monitor); -nomem: - fscache_retrieval_complete(op, 1); - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * read a page from the cache or allocate a block in which to store it - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if no buffers can be made available - * - returns -ENOBUFS if page is beyond EOF - * - if the page is backed by a block in the cache: - * - a read will be started which will call the callback on completion - * - 0 will be returned - * - else if the page is unbacked: - * - the metadata will be retained - * - -ENODATA will be returned - */ -int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct inode *inode; - sector_t block; - unsigned shift; - int ret, ret2; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("{%p},{%lx},,,", object, page->index); - - if (!object->backer) - goto enobufs; - - inode = d_backing_inode(object->backer); - ASSERT(S_ISREG(inode->i_mode)); - - /* calculate the shift required to use bmap */ - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - - op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_ASYNC; - op->op.processor = cachefiles_read_copier; - - /* we assume the absence or presence of the first block is a good - * enough indication for the page as a whole - * - TODO: don't use bmap() for this as it is _not_ actually good - * enough for this as it doesn't indicate errors, but it's all we've - * got for the moment - */ - block = page->index; - block <<= shift; - - ret2 = bmap(inode, &block); - ASSERT(ret2 == 0); - - _debug("%llx -> %llx", - (unsigned long long) (page->index << shift), - (unsigned long long) block); - - if (block) { - /* submit the apparently valid page to the backing fs to be - * read from disk */ - ret = cachefiles_read_backing_file_one(object, op, page); - } else if (cachefiles_has_space(cache, 0, 1) == 0) { - /* there's space in the cache we can use */ - fscache_mark_page_cached(op, page); - fscache_retrieval_complete(op, 1); - ret = -ENODATA; - } else { - goto enobufs; - } - - _leave(" = %d", ret); - return ret; - -enobufs: - fscache_retrieval_complete(op, 1); - _leave(" = -ENOBUFS"); - return -ENOBUFS; -} - -/* - * read the corresponding pages to the given set from the backing file - * - any uncertain pages are simply discarded, to be tried again another time - */ -static int cachefiles_read_backing_file(struct cachefiles_object *object, - struct fscache_retrieval *op, - struct list_head *list) -{ - struct cachefiles_one_read *monitor = NULL; - struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping; - struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; - int ret = 0; - - _enter(""); - - list_for_each_entry_safe(netpage, _n, list, lru) { - list_del(&netpage->lru); - - _debug("read back %p{%lu,%d}", - netpage, netpage->index, page_count(netpage)); - - if (!monitor) { - monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); - if (!monitor) - goto nomem; - - monitor->op = fscache_get_retrieval(op); - init_waitqueue_func_entry(&monitor->monitor, - cachefiles_read_waiter); - } - - for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) - goto backing_page_already_present; - - if (!newpage) { - newpage = __page_cache_alloc(cachefiles_gfp); - if (!newpage) - goto nomem; - } - - ret = add_to_page_cache_lru(newpage, bmapping, - netpage->index, - cachefiles_gfp); - if (ret == 0) - goto installed_new_backing_page; - if (ret != -EEXIST) - goto nomem; - } - - /* we've installed a new backing page, so now we need - * to start it reading */ - installed_new_backing_page: - _debug("- new %p", newpage); - - backpage = newpage; - newpage = NULL; - - reread_backing_page: - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto read_error; - - /* add the netfs page to the pagecache and LRU, and set the - * monitor to transfer the data across */ - monitor_backing_page: - _debug("- monitor add"); - - ret = add_to_page_cache_lru(netpage, op->mapping, - netpage->index, cachefiles_gfp); - if (ret < 0) { - if (ret == -EEXIST) { - put_page(backpage); - backpage = NULL; - put_page(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - goto nomem; - } - - /* install a monitor */ - get_page(netpage); - monitor->netfs_page = netpage; - - get_page(backpage); - monitor->back_page = backpage; - monitor->monitor.private = backpage; - folio_add_wait_queue(page_folio(backpage), &monitor->monitor); - monitor = NULL; - - /* but the page may have been read before the monitor was - * installed, so the monitor may miss the event - so we have to - * ensure that we do get one in such a case */ - if (trylock_page(backpage)) { - _debug("2unlock %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - - put_page(backpage); - backpage = NULL; - - put_page(netpage); - netpage = NULL; - continue; - - /* if the backing page is already present, it can be in one of - * three states: read in progress, read failed or read okay */ - backing_page_already_present: - _debug("- present %p", backpage); - - if (PageError(backpage)) - goto io_error; - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate; - - _debug("- not ready %p{%lx}", backpage, backpage->flags); - - if (!trylock_page(backpage)) - goto monitor_backing_page; - - if (PageError(backpage)) { - _debug("error %lx", backpage->flags); - unlock_page(backpage); - goto io_error; - } - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate_unlock; - - /* we've locked a page that's neither up to date nor erroneous, - * so we need to attempt to read it again */ - goto reread_backing_page; - - /* the backing page is already up to date, attach the netfs - * page to the pagecache and LRU and copy the data across */ - backing_page_already_uptodate_unlock: - _debug("uptodate %lx", backpage->flags); - unlock_page(backpage); - backing_page_already_uptodate: - _debug("- uptodate"); - - ret = add_to_page_cache_lru(netpage, op->mapping, - netpage->index, cachefiles_gfp); - if (ret < 0) { - if (ret == -EEXIST) { - put_page(backpage); - backpage = NULL; - put_page(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - goto nomem; - } - - copy_highpage(netpage, backpage); - - put_page(backpage); - backpage = NULL; - - fscache_mark_page_cached(op, netpage); - - /* the netpage is unlocked and marked up to date here */ - fscache_end_io(op, netpage, 0); - put_page(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - - netpage = NULL; - - _debug("out"); - -out: - /* tidy up */ - if (newpage) - put_page(newpage); - if (netpage) - put_page(netpage); - if (backpage) - put_page(backpage); - if (monitor) { - fscache_put_retrieval(op); - kfree(monitor); - } - - list_for_each_entry_safe(netpage, _n, list, lru) { - list_del(&netpage->lru); - put_page(netpage); - fscache_retrieval_complete(op, 1); - } - - _leave(" = %d", ret); - return ret; - -nomem: - _debug("nomem"); - ret = -ENOMEM; - goto record_page_complete; - -read_error: - _debug("read error %d", ret); - if (ret == -ENOMEM) - goto record_page_complete; -io_error: - cachefiles_io_error_obj(object, "Page read error on backing file"); - ret = -ENOBUFS; -record_page_complete: - fscache_retrieval_complete(op, 1); - goto out; -} - -/* - * read a list of pages from the cache or allocate blocks in which to store - * them - */ -int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct list_head backpages; - struct pagevec pagevec; - struct inode *inode; - struct page *page, *_n; - unsigned shift, nrbackpages; - int ret, ret2, space; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("{OBJ%x,%d},,%d,,", - object->fscache.debug_id, atomic_read(&op->op.usage), - *nr_pages); - - if (!object->backer) - goto all_enobufs; - - space = 1; - if (cachefiles_has_space(cache, 0, *nr_pages) < 0) - space = 0; - - inode = d_backing_inode(object->backer); - ASSERT(S_ISREG(inode->i_mode)); - - /* calculate the shift required to use bmap */ - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - - pagevec_init(&pagevec); - - op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_ASYNC; - op->op.processor = cachefiles_read_copier; - - INIT_LIST_HEAD(&backpages); - nrbackpages = 0; - - ret = space ? -ENODATA : -ENOBUFS; - list_for_each_entry_safe(page, _n, pages, lru) { - sector_t block; - - /* we assume the absence or presence of the first block is a - * good enough indication for the page as a whole - * - TODO: don't use bmap() for this as it is _not_ actually - * good enough for this as it doesn't indicate errors, but - * it's all we've got for the moment - */ - block = page->index; - block <<= shift; - - ret2 = bmap(inode, &block); - ASSERT(ret2 == 0); - - _debug("%llx -> %llx", - (unsigned long long) (page->index << shift), - (unsigned long long) block); - - if (block) { - /* we have data - add it to the list to give to the - * backing fs */ - list_move(&page->lru, &backpages); - (*nr_pages)--; - nrbackpages++; - } else if (space && pagevec_add(&pagevec, page) == 0) { - fscache_mark_pages_cached(op, &pagevec); - fscache_retrieval_complete(op, 1); - ret = -ENODATA; - } else { - fscache_retrieval_complete(op, 1); - } - } - - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - - if (list_empty(pages)) - ret = 0; - - /* submit the apparently valid pages to the backing fs to be read from - * disk */ - if (nrbackpages > 0) { - ret2 = cachefiles_read_backing_file(object, op, &backpages); - if (ret2 == -ENOMEM || ret2 == -EINTR) - ret = ret2; - } - - _leave(" = %d [nr=%u%s]", - ret, *nr_pages, list_empty(pages) ? " empty" : ""); - return ret; - -all_enobufs: - fscache_retrieval_complete(op, *nr_pages); - return -ENOBUFS; -} - -/* - * allocate a block in the cache in which to store a page - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if no buffers can be made available - * - returns -ENOBUFS if page is beyond EOF - * - otherwise: - * - the metadata will be retained - * - 0 will be returned - */ -int cachefiles_allocate_page(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,{%lx},", object, page->index); - - ret = cachefiles_has_space(cache, 0, 1); - if (ret == 0) - fscache_mark_page_cached(op, page); - else - ret = -ENOBUFS; - - fscache_retrieval_complete(op, 1); - _leave(" = %d", ret); - return ret; -} - -/* - * allocate blocks in the cache in which to store a set of pages - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if some buffers couldn't be made available - * - returns -ENOBUFS if some pages are beyond EOF - * - otherwise: - * - -ENODATA will be returned - * - metadata will be retained for any page marked - */ -int cachefiles_allocate_pages(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct pagevec pagevec; - struct page *page; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,,,%d,", object, *nr_pages); - - ret = cachefiles_has_space(cache, 0, *nr_pages); - if (ret == 0) { - pagevec_init(&pagevec); - - list_for_each_entry(page, pages, lru) { - if (pagevec_add(&pagevec, page) == 0) - fscache_mark_pages_cached(op, &pagevec); - } - - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - ret = -ENODATA; - } else { - ret = -ENOBUFS; - } - - fscache_retrieval_complete(op, *nr_pages); - _leave(" = %d", ret); - return ret; -} - -/* - * request a page be stored in the cache - * - cache withdrawal is prevented by the caller - * - this request may be ignored if there's no cache block available, in which - * case -ENOBUFS will be returned - * - if the op is in progress, 0 will be returned - */ -int cachefiles_write_page(struct fscache_storage *op, struct page *page) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct file *file; - struct path path; - loff_t pos, eof; - size_t len; - void *data; - int ret = -ENOBUFS; - - ASSERT(op != NULL); - ASSERT(page != NULL); - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - - _enter("%p,%p{%lx},,,", object, page, page->index); - - if (!object->backer) { - _leave(" = -ENOBUFS"); - return -ENOBUFS; - } - - ASSERT(d_is_reg(object->backer)); - - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - pos = (loff_t)page->index << PAGE_SHIFT; - - /* We mustn't write more data than we have, so we have to beware of a - * partial page at EOF. - */ - eof = object->fscache.store_limit_l; - if (pos >= eof) - goto error; - - /* write the page to the backing filesystem and let it store it in its - * own time */ - path.mnt = cache->mnt; - path.dentry = object->backer; - file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto error_2; - } - - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } - } - - data = kmap(page); - ret = kernel_write(file, data, len, &pos); - kunmap(page); - fput(file); - if (ret != len) - goto error_eio; - - _leave(" = 0"); - return 0; - -error_eio: - ret = -EIO; -error_2: - if (ret == -EIO) - cachefiles_io_error_obj(object, - "Write page to backing file failed"); -error: - _leave(" = -ENOBUFS [%d]", ret); - return -ENOBUFS; -} - -/* - * detach a backing block from a page - * - cache withdrawal is prevented by the caller - */ -void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) - __releases(&object->fscache.cookie->lock) -{ - struct cachefiles_object *object; - - object = container_of(_object, struct cachefiles_object, fscache); - - _enter("%p,{%lu}", object, page->index); - - spin_unlock(&object->fscache.cookie->lock); -} diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index aec13fd94692..fe777164f1d8 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles security management * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c new file mode 100644 index 000000000000..89df0ba8ba5e --- /dev/null +++ b/fs/cachefiles/volume.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Volume handling. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include "internal.h" +#include <trace/events/fscache.h> + +/* + * Allocate and set up a volume representation. We make sure all the fanout + * directories are created and pinned. + */ +void cachefiles_acquire_volume(struct fscache_volume *vcookie) +{ + struct cachefiles_volume *volume; + struct cachefiles_cache *cache = vcookie->cache->cache_priv; + const struct cred *saved_cred; + struct dentry *vdentry, *fan; + size_t len; + char *name; + bool is_new = false; + int ret, n_accesses, i; + + _enter(""); + + volume = kzalloc(sizeof(struct cachefiles_volume), GFP_KERNEL); + if (!volume) + return; + volume->vcookie = vcookie; + volume->cache = cache; + INIT_LIST_HEAD(&volume->cache_link); + + cachefiles_begin_secure(cache, &saved_cred); + + len = vcookie->key[0]; + name = kmalloc(len + 3, GFP_NOFS); + if (!name) + goto error_vol; + name[0] = 'I'; + memcpy(name + 1, vcookie->key + 1, len); + name[len + 1] = 0; + +retry: + vdentry = cachefiles_get_directory(cache, cache->store, name, &is_new); + if (IS_ERR(vdentry)) + goto error_name; + volume->dentry = vdentry; + + if (is_new) { + if (!cachefiles_set_volume_xattr(volume)) + goto error_dir; + } else { + ret = cachefiles_check_volume_xattr(volume); + if (ret < 0) { + if (ret != -ESTALE) + goto error_dir; + inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT); + cachefiles_bury_object(cache, NULL, cache->store, vdentry, + FSCACHE_VOLUME_IS_WEIRD); + cachefiles_put_directory(volume->dentry); + cond_resched(); + goto retry; + } + } + + for (i = 0; i < 256; i++) { + sprintf(name, "@%02x", i); + fan = cachefiles_get_directory(cache, vdentry, name, NULL); + if (IS_ERR(fan)) + goto error_fan; + volume->fanout[i] = fan; + } + + cachefiles_end_secure(cache, saved_cred); + + vcookie->cache_priv = volume; + n_accesses = atomic_inc_return(&vcookie->n_accesses); /* Stop wakeups on dec-to-0 */ + trace_fscache_access_volume(vcookie->debug_id, 0, + refcount_read(&vcookie->ref), + n_accesses, fscache_access_cache_pin); + + spin_lock(&cache->object_list_lock); + list_add(&volume->cache_link, &volume->cache->volumes); + spin_unlock(&cache->object_list_lock); + + kfree(name); + return; + +error_fan: + for (i = 0; i < 256; i++) + cachefiles_put_directory(volume->fanout[i]); +error_dir: + cachefiles_put_directory(volume->dentry); +error_name: + kfree(name); +error_vol: + kfree(volume); + cachefiles_end_secure(cache, saved_cred); +} + +/* + * Release a volume representation. + */ +static void __cachefiles_free_volume(struct cachefiles_volume *volume) +{ + int i; + + _enter(""); + + volume->vcookie->cache_priv = NULL; + + for (i = 0; i < 256; i++) + cachefiles_put_directory(volume->fanout[i]); + cachefiles_put_directory(volume->dentry); + kfree(volume); +} + +void cachefiles_free_volume(struct fscache_volume *vcookie) +{ + struct cachefiles_volume *volume = vcookie->cache_priv; + + if (volume) { + spin_lock(&volume->cache->object_list_lock); + list_del_init(&volume->cache_link); + spin_unlock(&volume->cache->object_list_lock); + __cachefiles_free_volume(volume); + } +} + +void cachefiles_withdraw_volume(struct cachefiles_volume *volume) +{ + fscache_withdraw_volume(volume->vcookie); + cachefiles_set_volume_xattr(volume); + __cachefiles_free_volume(volume); +} diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 9e82de668595..83f41bd0c3a9 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles extended attribute management * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ @@ -15,310 +15,245 @@ #include <linux/slab.h> #include "internal.h" +#define CACHEFILES_COOKIE_TYPE_DATA 1 + +struct cachefiles_xattr { + __be64 object_size; /* Actual size of the object */ + __be64 zero_point; /* Size after which server has no data not written by us */ + __u8 type; /* Type of object */ + __u8 content; /* Content presence (enum cachefiles_content) */ + __u8 data[]; /* netfs coherency data */ +} __packed; + static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache"; /* - * check the type label on an object - * - done using xattrs + * set the state xattr on a cache file */ -int cachefiles_check_object_type(struct cachefiles_object *object) +int cachefiles_set_object_xattr(struct cachefiles_object *object) { - struct dentry *dentry = object->dentry; - char type[3], xtype[3]; + struct cachefiles_xattr *buf; + struct dentry *dentry; + struct file *file = object->file; + unsigned int len = object->cookie->aux_len; int ret; - ASSERT(dentry); - ASSERT(d_backing_inode(dentry)); - - if (!object->fscache.cookie) - strcpy(type, "C3"); - else - snprintf(type, 3, "%02x", object->fscache.cookie->def->type); - - _enter("%x{%s}", object->fscache.debug_id, type); + if (!file) + return -ESTALE; + dentry = file->f_path.dentry; - /* attempt to install a type label directly */ - ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, type, - 2, XATTR_CREATE); - if (ret == 0) { - _debug("SET"); /* we succeeded */ - goto error; - } + _enter("%x,#%d", object->debug_id, len); - if (ret != -EEXIST) { - pr_err("Can't set xattr on %pd [%lu] (err %d)\n", - dentry, d_backing_inode(dentry)->i_ino, - -ret); - goto error; - } + buf = kmalloc(sizeof(struct cachefiles_xattr) + len, GFP_KERNEL); + if (!buf) + return -ENOMEM; - /* read the current type label */ - ret = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, xtype, - 3); + buf->object_size = cpu_to_be64(object->cookie->object_size); + buf->zero_point = 0; + buf->type = CACHEFILES_COOKIE_TYPE_DATA; + buf->content = object->content_info; + if (test_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags)) + buf->content = CACHEFILES_CONTENT_DIRTY; + if (len > 0) + memcpy(buf->data, fscache_get_aux(object->cookie), len); + + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, + buf, sizeof(struct cachefiles_xattr) + len, 0); if (ret < 0) { - if (ret == -ERANGE) - goto bad_type_length; - - pr_err("Can't read xattr on %pd [%lu] (err %d)\n", - dentry, d_backing_inode(dentry)->i_ino, - -ret); - goto error; + trace_cachefiles_vfs_error(object, file_inode(file), ret, + cachefiles_trace_setxattr_error); + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + buf->content, + cachefiles_coherency_set_fail); + if (ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to set xattr with error %d", ret); + } else { + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + buf->content, + cachefiles_coherency_set_ok); } - /* check the type is what we're expecting */ - if (ret != 2) - goto bad_type_length; - - if (xtype[0] != type[0] || xtype[1] != type[1]) - goto bad_type; - - ret = 0; - -error: + kfree(buf); _leave(" = %d", ret); return ret; - -bad_type_length: - pr_err("Cache object %lu type xattr length incorrect\n", - d_backing_inode(dentry)->i_ino); - ret = -EIO; - goto error; - -bad_type: - xtype[2] = 0; - pr_err("Cache object %pd [%lu] type %s not %s\n", - dentry, d_backing_inode(dentry)->i_ino, - xtype, type); - ret = -EIO; - goto error; } /* - * set the state xattr on a cache file + * check the consistency between the backing cache and the FS-Cache cookie */ -int cachefiles_set_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata) +int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file) { - struct dentry *dentry = object->dentry; - int ret; - - ASSERT(dentry); - - _enter("%p,#%d", object, auxdata->len); + struct cachefiles_xattr *buf; + struct dentry *dentry = file->f_path.dentry; + unsigned int len = object->cookie->aux_len, tlen; + const void *p = fscache_get_aux(object->cookie); + enum cachefiles_coherency_trace why; + ssize_t xlen; + int ret = -ESTALE; - /* attempt to install the cache metadata directly */ - _debug("SET #%u", auxdata->len); + tlen = sizeof(struct cachefiles_xattr) + len; + buf = kmalloc(tlen, GFP_KERNEL); + if (!buf) + return -ENOMEM; - clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags); - ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, - &auxdata->type, auxdata->len, XATTR_CREATE); - if (ret < 0 && ret != -ENOMEM) - cachefiles_io_error_obj( - object, - "Failed to set xattr with error %d", ret); + xlen = cachefiles_inject_read_error(); + if (xlen == 0) + xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, tlen); + if (xlen != tlen) { + if (xlen < 0) + trace_cachefiles_vfs_error(object, file_inode(file), xlen, + cachefiles_trace_getxattr_error); + if (xlen == -EIO) + cachefiles_io_error_obj( + object, + "Failed to read aux with error %zd", xlen); + why = cachefiles_coherency_check_xattr; + } else if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) { + why = cachefiles_coherency_check_type; + } else if (memcmp(buf->data, p, len) != 0) { + why = cachefiles_coherency_check_aux; + } else if (be64_to_cpu(buf->object_size) != object->cookie->object_size) { + why = cachefiles_coherency_check_objsize; + } else if (buf->content == CACHEFILES_CONTENT_DIRTY) { + // TODO: Begin conflict resolution + pr_warn("Dirty object in cache\n"); + why = cachefiles_coherency_check_dirty; + } else { + why = cachefiles_coherency_check_ok; + ret = 0; + } - _leave(" = %d", ret); + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + buf->content, why); + kfree(buf); return ret; } /* - * update the state xattr on a cache file + * remove the object's xattr to mark it stale */ -int cachefiles_update_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata) +int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dentry) { - struct dentry *dentry = object->dentry; int ret; - if (!dentry) - return -ESTALE; - - _enter("%x,#%d", object->fscache.debug_id, auxdata->len); - - /* attempt to install the cache metadata directly */ - _debug("SET #%u", auxdata->len); - - clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags); - ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, - &auxdata->type, auxdata->len, XATTR_REPLACE); - if (ret < 0 && ret != -ENOMEM) - cachefiles_io_error_obj( - object, - "Failed to update xattr with error %d", ret); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache); + if (ret < 0) { + trace_cachefiles_vfs_error(object, d_inode(dentry), ret, + cachefiles_trace_remxattr_error); + if (ret == -ENOENT || ret == -ENODATA) + ret = 0; + else if (ret != -ENOMEM) + cachefiles_io_error(cache, + "Can't remove xattr from %lu" + " (error %d)", + d_backing_inode(dentry)->i_ino, -ret); + } _leave(" = %d", ret); return ret; } /* - * check the consistency between the backing cache and the FS-Cache cookie + * Stick a marker on the cache object to indicate that it's dirty. */ -int cachefiles_check_auxdata(struct cachefiles_object *object) +void cachefiles_prepare_to_write(struct fscache_cookie *cookie) { - struct cachefiles_xattr *auxbuf; - enum fscache_checkaux validity; - struct dentry *dentry = object->dentry; - ssize_t xlen; - int ret; - - ASSERT(dentry); - ASSERT(d_backing_inode(dentry)); - ASSERT(object->fscache.cookie->def->check_aux); - - auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); - if (!auxbuf) - return -ENOMEM; + const struct cred *saved_cred; + struct cachefiles_object *object = cookie->cache_priv; + struct cachefiles_cache *cache = object->volume->cache; - xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, - &auxbuf->type, 512 + 1); - ret = -ESTALE; - if (xlen < 1 || - auxbuf->type != object->fscache.cookie->def->type) - goto error; + _enter("c=%08x", object->cookie->debug_id); - xlen--; - validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen, - i_size_read(d_backing_inode(dentry))); - if (validity != FSCACHE_CHECKAUX_OKAY) - goto error; - - ret = 0; -error: - kfree(auxbuf); - return ret; + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_set_object_xattr(object); + cachefiles_end_secure(cache, saved_cred); + } } /* - * check the state xattr on a cache file - * - return -ESTALE if the object should be deleted + * Set the state xattr on a volume directory. */ -int cachefiles_check_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata) +bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) { - struct cachefiles_xattr *auxbuf; - struct dentry *dentry = object->dentry; + unsigned int len = volume->vcookie->coherency_len; + const void *p = volume->vcookie->coherency; + struct dentry *dentry = volume->dentry; int ret; - _enter("%p,#%d", object, auxdata->len); - - ASSERT(dentry); - ASSERT(d_backing_inode(dentry)); - - auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp); - if (!auxbuf) { - _leave(" = -ENOMEM"); - return -ENOMEM; - } + _enter("%x,#%d", volume->vcookie->debug_id, len); - /* read the current type label */ - ret = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, - &auxbuf->type, 512 + 1); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, + p, len, 0); if (ret < 0) { - if (ret == -ENODATA) - goto stale; /* no attribute - power went off - * mid-cull? */ - - if (ret == -ERANGE) - goto bad_type_length; - - cachefiles_io_error_obj(object, - "Can't read xattr on %lu (err %d)", - d_backing_inode(dentry)->i_ino, -ret); - goto error; + trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, + cachefiles_trace_setxattr_error); + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, + cachefiles_coherency_vol_set_fail); + if (ret != -ENOMEM) + cachefiles_io_error( + volume->cache, "Failed to set xattr with error %d", ret); + } else { + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, + cachefiles_coherency_vol_set_ok); } - /* check the on-disk object */ - if (ret < 1) - goto bad_type_length; - - if (auxbuf->type != auxdata->type) - goto stale; - - auxbuf->len = ret; - - /* consult the netfs */ - if (object->fscache.cookie->def->check_aux) { - enum fscache_checkaux result; - unsigned int dlen; - - dlen = auxbuf->len - 1; - - _debug("checkaux %s #%u", - object->fscache.cookie->def->name, dlen); - - result = fscache_check_aux(&object->fscache, - &auxbuf->data, dlen, - i_size_read(d_backing_inode(dentry))); - - switch (result) { - /* entry okay as is */ - case FSCACHE_CHECKAUX_OKAY: - goto okay; - - /* entry requires update */ - case FSCACHE_CHECKAUX_NEEDS_UPDATE: - break; - - /* entry requires deletion */ - case FSCACHE_CHECKAUX_OBSOLETE: - goto stale; - - default: - BUG(); - } - - /* update the current label */ - ret = vfs_setxattr(&init_user_ns, dentry, - cachefiles_xattr_cache, &auxdata->type, - auxdata->len, XATTR_REPLACE); - if (ret < 0) { - cachefiles_io_error_obj(object, - "Can't update xattr on %lu" - " (error %d)", - d_backing_inode(dentry)->i_ino, -ret); - goto error; - } - } - -okay: - ret = 0; - -error: - kfree(auxbuf); _leave(" = %d", ret); - return ret; - -bad_type_length: - pr_err("Cache object %lu xattr length incorrect\n", - d_backing_inode(dentry)->i_ino); - ret = -EIO; - goto error; - -stale: - ret = -ESTALE; - goto error; + return ret == 0; } /* - * remove the object's xattr to mark it stale + * Check the consistency between the backing cache and the volume cookie. */ -int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, - struct dentry *dentry) +int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) { - int ret; + struct cachefiles_xattr *buf; + struct dentry *dentry = volume->dentry; + unsigned int len = volume->vcookie->coherency_len; + const void *p = volume->vcookie->coherency; + enum cachefiles_coherency_trace why; + ssize_t xlen; + int ret = -ESTALE; - ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache); - if (ret < 0) { - if (ret == -ENOENT || ret == -ENODATA) - ret = 0; - else if (ret != -ENOMEM) - cachefiles_io_error(cache, - "Can't remove xattr from %lu" - " (error %d)", - d_backing_inode(dentry)->i_ino, -ret); + _enter(""); + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + xlen = cachefiles_inject_read_error(); + if (xlen == 0) + xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, len); + if (xlen != len) { + if (xlen < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, + cachefiles_trace_getxattr_error); + if (xlen == -EIO) + cachefiles_io_error( + volume->cache, + "Failed to read xattr with error %zd", xlen); + } + why = cachefiles_coherency_vol_check_xattr; + } else if (memcmp(buf->data, p, len) != 0) { + why = cachefiles_coherency_vol_check_cmp; + } else { + why = cachefiles_coherency_vol_check_ok; + ret = 0; } + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, why); + kfree(buf); _leave(" = %d", ret); return ret; } |