diff options
author | David Howells <dhowells@redhat.com> | 2018-04-06 16:17:25 +0300 |
---|---|---|
committer | David Howells <dhowells@redhat.com> | 2018-04-09 23:54:48 +0300 |
commit | f3ddee8dc4e2cff37936afbeed2fdaa95b7fb7c6 (patch) | |
tree | e077005245794c5cacfc36eb6100cb3c819a8954 /fs/afs/dir.c | |
parent | 66c7e1d319a5b3a57de688a36200e463ec87e88e (diff) | |
download | linux-f3ddee8dc4e2cff37936afbeed2fdaa95b7fb7c6.tar.xz |
afs: Fix directory handling
AFS directories are structured blobs that are downloaded just like files
and then parsed by the lookup and readdir code and, as such, are currently
handled in the pagecache like any other file, with the entire directory
content being thrown away each time the directory changes.
However, since the blob is a known structure and since the data version
counter on a directory increases by exactly one for each change committed
to that directory, we can actually edit the directory locally rather than
fetching it from the server after each locally-induced change.
What we can't do, though, is mix data from the server and data from the
client since the server is technically at liberty to rearrange or compress
a directory if it sees fit, provided it updates the data version number
when it does so and breaks the callback (ie. sends a notification).
Further, lookup with lookup-ahead, readdir and, when it arrives, local
editing are likely want to scan the whole of a directory.
So directory handling needs to be improved to maintain the coherency of the
directory blob prior to permitting local directory editing.
To this end:
(1) If any directory page gets discarded, invalidate and reread the entire
directory.
(2) If readpage notes that if when it fetches a single page that the
version number has changed, the entire directory is flagged for
invalidation.
(3) Read as much of the directory in one go as we can.
Note that this removes local caching of directories in fscache for the
moment as we can't pass the pages to fscache_read_or_alloc_pages() since
page->lru is in use by the LRU.
Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs/dir.c')
-rw-r--r-- | fs/afs/dir.c | 294 |
1 files changed, 226 insertions, 68 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 405ebd609b87..8beecbcd9679 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1,6 +1,6 @@ /* dir.c: AFS filesystem directory handling * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -13,8 +13,10 @@ #include <linux/fs.h> #include <linux/namei.h> #include <linux/pagemap.h> +#include <linux/swap.h> #include <linux/ctype.h> #include <linux/sched.h> +#include <linux/task_io_accounting_ops.h> #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, @@ -39,6 +41,14 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags); +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); + +static int afs_dir_set_page_dirty(struct page *page) +{ + BUG(); /* This should never happen. */ +} const struct file_operations afs_dir_file_operations = { .open = afs_dir_open, @@ -63,6 +73,12 @@ const struct inode_operations afs_dir_inode_operations = { .listxattr = afs_listxattr, }; +const struct address_space_operations afs_dir_aops = { + .set_page_dirty = afs_dir_set_page_dirty, + .releasepage = afs_dir_releasepage, + .invalidatepage = afs_dir_invalidatepage, +}; + const struct dentry_operations afs_fs_dentry_operations = { .d_revalidate = afs_d_revalidate, .d_delete = afs_d_delete, @@ -140,32 +156,17 @@ struct afs_lookup_cookie { /* * check that a directory page is valid */ -bool afs_dir_check_page(struct inode *dir, struct page *page) +static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, + loff_t i_size) { struct afs_dir_page *dbuf; - struct afs_vnode *vnode = AFS_FS_I(dir); - loff_t latter, i_size, off; + loff_t latter, off; int tmp, qty; -#if 0 - /* check the page count */ - qty = desc.size / sizeof(dbuf->blocks[0]); - if (qty == 0) - goto error; - - if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { - printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", - __func__, dir->i_ino, qty, - ntohs(dbuf->blocks[0].pagehdr.npages)); - goto error; - } -#endif - /* Determine how many magic numbers there should be in this page, but * we must take care because the directory may change size under us. */ off = page_offset(page); - i_size = i_size_read(dir); if (i_size <= off) goto checked; @@ -181,60 +182,22 @@ bool afs_dir_check_page(struct inode *dir, struct page *page) for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", - __func__, dir->i_ino, tmp, qty, + __func__, dvnode->vfs_inode.i_ino, tmp, qty, ntohs(dbuf->blocks[tmp].pagehdr.magic)); - trace_afs_dir_check_failed(vnode, off, i_size); + trace_afs_dir_check_failed(dvnode, off, i_size); goto error; } } checked: - afs_stat_v(vnode, n_read_dir); - SetPageChecked(page); + afs_stat_v(dvnode, n_read_dir); return true; error: - SetPageError(page); return false; } /* - * discard a page cached in the pagecache - */ -static inline void afs_dir_put_page(struct page *page) -{ - kunmap(page); - unlock_page(page); - put_page(page); -} - -/* - * get a page into the pagecache - */ -static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, - struct key *key) -{ - struct page *page; - _enter("{%lu},%lu", dir->i_ino, index); - - page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); - if (!IS_ERR(page)) { - lock_page(page); - kmap(page); - if (unlikely(!PageChecked(page))) { - if (PageError(page)) - goto fail; - } - } - return page; - -fail: - afs_dir_put_page(page); - _leave(" = -EIO"); - return ERR_PTR(-EIO); -} - -/* * open an AFS directory file */ static int afs_dir_open(struct inode *inode, struct file *file) @@ -251,6 +214,147 @@ static int afs_dir_open(struct inode *inode, struct file *file) } /* + * Read the directory into the pagecache in one go, scrubbing the previous + * contents. The list of pages is returned, pinning them so that they don't + * get reclaimed during the iteration. + */ +static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) +{ + struct afs_read *req; + loff_t i_size; + int nr_pages, nr_inline, i, n; + int ret = -ENOMEM; + +retry: + i_size = i_size_read(&dvnode->vfs_inode); + if (i_size < 2048) + return ERR_PTR(-EIO); + if (i_size > 2048 * 1024) + return ERR_PTR(-EFBIG); + + _enter("%llu", i_size); + + /* Get a request record to hold the page list. We want to hold it + * inline if we can, but we don't want to make an order 1 allocation. + */ + nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE; + nr_inline = nr_pages; + if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) + nr_inline = 0; + + req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline, + GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + refcount_set(&req->usage, 1); + req->nr_pages = nr_pages; + req->actual_len = i_size; /* May change */ + req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ + req->data_version = dvnode->status.data_version; /* May change */ + if (nr_inline > 0) { + req->pages = req->array; + } else { + req->pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!req->pages) + goto error; + } + + /* Get a list of all the pages that hold or will hold the directory + * content. We need to fill in any gaps that we might find where the + * memory reclaimer has been at work. If there are any gaps, we will + * need to reread the entire directory contents. + */ + i = 0; + do { + n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i, + req->nr_pages - i, + req->pages + i); + _debug("find %u at %u/%u", n, i, req->nr_pages); + if (n == 0) { + gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask; + + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + ret = -ENOMEM; + req->pages[i] = __page_cache_alloc(gfp); + if (!req->pages[i]) + goto error; + ret = add_to_page_cache_lru(req->pages[i], + dvnode->vfs_inode.i_mapping, + i, gfp); + if (ret < 0) + goto error; + + set_page_private(req->pages[i], 1); + SetPagePrivate(req->pages[i]); + unlock_page(req->pages[i]); + i++; + } else { + i += n; + } + } while (i < req->nr_pages); + + /* If we're going to reload, we need to lock all the pages to prevent + * races. + */ + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + ret = -ERESTARTSYS; + for (i = 0; i < req->nr_pages; i++) + if (lock_page_killable(req->pages[i]) < 0) + goto error_unlock; + + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; + + ret = afs_fetch_data(dvnode, key, req); + if (ret < 0) + goto error_unlock_all; + + task_io_account_read(PAGE_SIZE * req->nr_pages); + + if (req->len < req->file_size) + goto content_has_grown; + + /* Validate the data we just read. */ + ret = -EIO; + for (i = 0; i < req->nr_pages; i++) + if (!afs_dir_check_page(dvnode, req->pages[i], + req->actual_len)) + goto error_unlock_all; + + // TODO: Trim excess pages + + set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); + } + +success: + i = req->nr_pages; + while (i > 0) + unlock_page(req->pages[--i]); + return req; + +error_unlock_all: + i = req->nr_pages; +error_unlock: + while (i > 0) + unlock_page(req->pages[--i]); +error: + afs_put_read(req); + _leave(" = %d", ret); + return ERR_PTR(ret); + +content_has_grown: + i = req->nr_pages; + while (i > 0) + unlock_page(req->pages[--i]); + afs_put_read(req); + goto retry; +} + +/* * deal with one block in an AFS directory */ static int afs_dir_iterate_block(struct dir_context *ctx, @@ -347,8 +451,10 @@ static int afs_dir_iterate_block(struct dir_context *ctx, static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, struct key *key) { + struct afs_vnode *dvnode = AFS_FS_I(dir); union afs_dir_block *dblock; struct afs_dir_page *dbuf; + struct afs_read *req; struct page *page; unsigned blkoff, limit; int ret; @@ -360,25 +466,32 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, return -ESTALE; } + req = afs_read_dir(dvnode, key); + if (IS_ERR(req)) + return PTR_ERR(req); + /* round the file position up to the next entry boundary */ ctx->pos += sizeof(union afs_dirent) - 1; ctx->pos &= ~(sizeof(union afs_dirent) - 1); /* walk through the blocks in sequence */ ret = 0; - while (ctx->pos < dir->i_size) { + while (ctx->pos < req->actual_len) { blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1); - /* fetch the appropriate page from the directory */ - page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + /* Fetch the appropriate page from the directory and re-add it + * to the LRU. + */ + page = req->pages[blkoff / PAGE_SIZE]; + if (!page) { + ret = -EIO; break; } + mark_page_accessed(page); limit = blkoff & ~(PAGE_SIZE - 1); - dbuf = page_address(page); + dbuf = kmap(page); /* deal with the individual blocks stashed on this page */ do { @@ -386,7 +499,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, sizeof(union afs_dir_block)]; ret = afs_dir_iterate_block(ctx, dblock, blkoff); if (ret != 1) { - afs_dir_put_page(page); + kunmap(page); goto out; } @@ -394,11 +507,12 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, } while (ctx->pos < dir->i_size && blkoff < limit); - afs_dir_put_page(page); + kunmap(page); ret = 0; } out: + afs_put_read(req); _leave(" = %d", ret); return ret; } @@ -1491,3 +1605,47 @@ error: _leave(" = %d", ret); return ret; } + +/* + * Release a directory page and clean up its private state if it's not busy + * - return true if the page can now be released, false if not + */ +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + + set_page_private(page, 0); + ClearPagePrivate(page); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_relpg); + return 1; +} + +/* + * invalidate part or all of a page + * - release a page and clean up its private data if offset is 0 (indicating + * the entire page) + */ +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{%lu},%u,%u", page->index, offset, length); + + BUG_ON(!PageLocked(page)); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == PAGE_SIZE) { + set_page_private(page, 0); + ClearPagePrivate(page); + } +} |