diff options
Diffstat (limited to 'fs')
269 files changed, 6101 insertions, 3837 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 45b7fc405fa6..532acae25453 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -12,6 +12,8 @@ kafs-objs := \ cell.o \ cmservice.o \ dir.o \ + dir_edit.o \ + dynroot.o \ file.o \ flock.o \ fsclient.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index fd9f28b8a933..3bedfed608a2 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -243,9 +243,9 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) xport == a->sin6_port) return; if (xdr == a->sin6_addr.s6_addr32[3] && - xport < a->sin6_port) + (u16 __force)xport < (u16 __force)a->sin6_port) break; - if (xdr < a->sin6_addr.s6_addr32[3]) + if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3]) break; } @@ -280,7 +280,7 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) xport == a->sin6_port) return; if (diff == 0 && - xport < a->sin6_port) + (u16 __force)xport < (u16 __force)a->sin6_port) break; if (diff < 0) break; diff --git a/fs/afs/afs.h b/fs/afs/afs.h index b94d0edc2b78..b4ff1f7ae4ab 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -67,10 +67,14 @@ typedef enum { } afs_callback_type_t; struct afs_callback { - struct afs_fid fid; /* file identifier */ - unsigned version; /* callback version */ - unsigned expiry; /* time at which expires */ - afs_callback_type_t type; /* type of callback */ + unsigned version; /* Callback version */ + unsigned expiry; /* Time at which expires */ + afs_callback_type_t type; /* Type of callback */ +}; + +struct afs_callback_break { + struct afs_fid fid; /* File identifier */ + struct afs_callback cb; /* Callback details */ }; #define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ @@ -123,21 +127,20 @@ typedef u32 afs_access_t; * AFS file status information */ struct afs_file_status { - unsigned if_version; /* interface version */ -#define AFS_FSTATUS_VERSION 1 + u64 size; /* file size */ + afs_dataversion_t data_version; /* current data version */ + time_t mtime_client; /* last time client changed data */ + time_t mtime_server; /* last time server changed data */ + unsigned abort_code; /* Abort if bulk-fetching this failed */ afs_file_type_t type; /* file type */ unsigned nlink; /* link count */ - u64 size; /* file size */ - afs_dataversion_t data_version; /* current data version */ u32 author; /* author ID */ - kuid_t owner; /* owner ID */ - kgid_t group; /* group ID */ + u32 owner; /* owner ID */ + u32 group; /* group ID */ afs_access_t caller_access; /* access rights for authenticated caller */ afs_access_t anon_access; /* access rights for unauthenticated caller */ umode_t mode; /* UNIX mode */ - time_t mtime_client; /* last time client changed data */ - time_t mtime_server; /* last time server changed data */ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ }; diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index d47b6d01e4c0..ddfa88a7a9c0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -31,10 +31,12 @@ enum AFS_FS_Operations { FSGETVOLUMEINFO = 148, /* AFS Get information about a volume */ FSGETVOLUMESTATUS = 149, /* AFS Get volume status information */ FSGETROOTVOLUME = 151, /* AFS Get root volume name */ + FSBULKSTATUS = 155, /* AFS Fetch multiple file statuses */ FSSETLOCK = 156, /* AFS Request a file lock */ FSEXTENDLOCK = 157, /* AFS Extend a file lock */ FSRELEASELOCK = 158, /* AFS Release a file lock */ FSLOOKUP = 161, /* AFS lookup file in directory */ + FSINLINEBULKSTATUS = 65536, /* AFS Fetch multiple file statuses with inline errors */ FSFETCHDATA64 = 65537, /* AFS Fetch file data */ FSSTOREDATA64 = 65538, /* AFS Store file data */ FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index f4291b576054..abd9a84f4e88 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -97,26 +97,6 @@ again: } /* - * Set a vnode's interest on a server. - */ -void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi) -{ - struct afs_cb_interest *old_cbi = NULL; - - if (vnode->cb_interest == cbi) - return; - - write_seqlock(&vnode->cb_lock); - if (vnode->cb_interest != cbi) { - afs_get_cb_interest(cbi); - old_cbi = vnode->cb_interest; - vnode->cb_interest = cbi; - } - write_sequnlock(&vnode->cb_lock); - afs_put_cb_interest(afs_v2net(vnode), cbi); -} - -/* * Remove an interest on a server. */ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) @@ -150,6 +130,7 @@ void afs_break_callback(struct afs_vnode *vnode) write_seqlock(&vnode->cb_lock); + clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; afs_clear_permits(vnode); @@ -207,7 +188,7 @@ static void afs_break_one_callback(struct afs_server *server, * allow the fileserver to break callback promises */ void afs_break_callbacks(struct afs_server *server, size_t count, - struct afs_callback callbacks[]) + struct afs_callback_break *callbacks) { _enter("%p,%zu,", server, count); @@ -219,9 +200,9 @@ void afs_break_callbacks(struct afs_server *server, size_t count, callbacks->fid.vid, callbacks->fid.vnode, callbacks->fid.unique, - callbacks->version, - callbacks->expiry, - callbacks->type + callbacks->cb.version, + callbacks->cb.expiry, + callbacks->cb.type ); afs_break_one_callback(server, &callbacks->fid); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 4235a05afc76..fdf4c36cff79 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -18,7 +18,7 @@ #include <keys/rxrpc-type.h> #include "internal.h" -unsigned __read_mostly afs_cell_gc_delay = 10; +static unsigned __read_mostly afs_cell_gc_delay = 10; static void afs_manage_cell(struct work_struct *); @@ -75,7 +75,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, cell = rcu_dereference_raw(net->ws_cell); if (cell) { afs_get_cell(cell); - continue; + break; } ret = -EDESTADDRREQ; continue; @@ -130,6 +130,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } + if (namelen == 5 && memcmp(name, "@cell", 5) == 0) + return ERR_PTR(-EINVAL); _enter("%*.*s,%s", namelen, namelen, name, vllist); @@ -334,8 +336,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) return PTR_ERR(new_root); } - set_bit(AFS_CELL_FL_NO_GC, &new_root->flags); - afs_get_cell(new_root); + if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) + afs_get_cell(new_root); /* install the new cell */ write_seqlock(&net->cells_lock); @@ -411,7 +413,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); - afs_put_addrlist(cell->vl_addrs); + afs_put_addrlist(rcu_access_pointer(cell->vl_addrs)); key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 41e277f57b20..357de908df3a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -178,8 +178,8 @@ static void SRXAFSCB_CallBack(struct work_struct *work) */ static int afs_deliver_cb_callback(struct afs_call *call) { + struct afs_callback_break *cb; struct sockaddr_rxrpc srx; - struct afs_callback *cb; struct afs_server *server; __be32 *bp; int ret, loop; @@ -201,7 +201,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > AFSCBMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL); if (!call->buffer) @@ -218,7 +218,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) _debug("unmarshall FID array"); call->request = kcalloc(call->count, - sizeof(struct afs_callback), + sizeof(struct afs_callback_break), GFP_KERNEL); if (!call->request) return -ENOMEM; @@ -229,7 +229,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb->fid.vid = ntohl(*bp++); cb->fid.vnode = ntohl(*bp++); cb->fid.unique = ntohl(*bp++); - cb->type = AFSCM_CB_UNTYPED; + cb->cb.type = AFSCM_CB_UNTYPED; } call->offset = 0; @@ -245,7 +245,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count2 = ntohl(call->tmp); _debug("CB count: %u", call->count2); if (call->count2 != call->count && call->count2 != 0) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -260,9 +260,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb = call->request; bp = call->buffer; for (loop = call->count2; loop > 0; loop--, cb++) { - cb->version = ntohl(*bp++); - cb->expiry = ntohl(*bp++); - cb->type = ntohl(*bp++); + cb->cb.version = ntohl(*bp++); + cb->cb.expiry = ntohl(*bp++); + cb->cb.type = ntohl(*bp++); } call->offset = 0; @@ -500,9 +500,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) b = call->buffer; r = call->request; - r->time_low = ntohl(b[0]); - r->time_mid = ntohl(b[1]); - r->time_hi_and_version = ntohl(b[2]); + r->time_low = b[0]; + r->time_mid = htons(ntohl(b[1])); + r->time_hi_and_version = htons(ntohl(b[2])); r->clock_seq_hi_and_reserved = ntohl(b[3]); r->clock_seq_low = ntohl(b[4]); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ba2b458b36d1..5889f70d4d27 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1,6 +1,6 @@ /* dir.c: AFS filesystem directory handling * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -10,27 +10,26 @@ */ #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/pagemap.h> +#include <linux/swap.h> #include <linux/ctype.h> #include <linux/sched.h> -#include <linux/dns_resolver.h> +#include <linux/task_io_accounting_ops.h> #include "internal.h" +#include "xdr_fs.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); -static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); -static void afs_d_release(struct dentry *dentry); -static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, +static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, + loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl); static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); @@ -43,6 +42,14 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags); +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); + +static int afs_dir_set_page_dirty(struct page *page) +{ + BUG(); /* This should never happen. */ +} const struct file_operations afs_dir_file_operations = { .open = afs_dir_open, @@ -67,15 +74,10 @@ const struct inode_operations afs_dir_inode_operations = { .listxattr = afs_listxattr, }; -const struct file_operations afs_dynroot_file_operations = { - .open = dcache_dir_open, - .release = dcache_dir_close, - .iterate_shared = dcache_readdir, - .llseek = dcache_dir_lseek, -}; - -const struct inode_operations afs_dynroot_inode_operations = { - .lookup = afs_dynroot_lookup, +const struct address_space_operations afs_dir_aops = { + .set_page_dirty = afs_dir_set_page_dirty, + .releasepage = afs_dir_releasepage, + .invalidatepage = afs_dir_invalidatepage, }; const struct dentry_operations afs_fs_dentry_operations = { @@ -85,91 +87,38 @@ const struct dentry_operations afs_fs_dentry_operations = { .d_automount = afs_d_automount, }; -#define AFS_DIR_HASHTBL_SIZE 128 -#define AFS_DIR_DIRENT_SIZE 32 -#define AFS_DIRENT_PER_BLOCK 64 - -union afs_dirent { - struct { - uint8_t valid; - uint8_t unused[1]; - __be16 hash_next; - __be32 vnode; - __be32 unique; - uint8_t name[16]; - uint8_t overflow[4]; /* if any char of the name (inc - * NUL) reaches here, consume - * the next dirent too */ - } u; - uint8_t extended_name[32]; -}; - -/* AFS directory page header (one at the beginning of every 2048-byte chunk) */ -struct afs_dir_pagehdr { - __be16 npages; - __be16 magic; -#define AFS_DIR_MAGIC htons(1234) - uint8_t nentries; - uint8_t bitmap[8]; - uint8_t pad[19]; -}; - -/* directory block layout */ -union afs_dir_block { - - struct afs_dir_pagehdr pagehdr; - - struct { - struct afs_dir_pagehdr pagehdr; - uint8_t alloc_ctrs[128]; - /* dir hash table */ - uint16_t hashtable[AFS_DIR_HASHTBL_SIZE]; - } hdr; - - union afs_dirent dirents[AFS_DIRENT_PER_BLOCK]; -}; - -/* layout on a linux VM page */ -struct afs_dir_page { - union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)]; +struct afs_lookup_one_cookie { + struct dir_context ctx; + struct qstr name; + bool found; + struct afs_fid fid; }; struct afs_lookup_cookie { - struct dir_context ctx; - struct afs_fid fid; - struct qstr name; - int found; + struct dir_context ctx; + struct qstr name; + bool found; + bool one_only; + unsigned short nr_fids; + struct afs_file_status *statuses; + struct afs_callback *callbacks; + struct afs_fid fids[50]; }; /* * check that a directory page is valid */ -bool afs_dir_check_page(struct inode *dir, struct page *page) +static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, + loff_t i_size) { - struct afs_dir_page *dbuf; - struct afs_vnode *vnode = AFS_FS_I(dir); - loff_t latter, i_size, off; + struct afs_xdr_dir_page *dbuf; + loff_t latter, off; int tmp, qty; -#if 0 - /* check the page count */ - qty = desc.size / sizeof(dbuf->blocks[0]); - if (qty == 0) - goto error; - - if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { - printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", - __func__, dir->i_ino, qty, - ntohs(dbuf->blocks[0].pagehdr.npages)); - goto error; - } -#endif - /* Determine how many magic numbers there should be in this page, but * we must take care because the directory may change size under us. */ off = page_offset(page); - i_size = i_size_read(dir); if (i_size <= off) goto checked; @@ -178,112 +127,225 @@ bool afs_dir_check_page(struct inode *dir, struct page *page) qty = PAGE_SIZE; else qty = latter; - qty /= sizeof(union afs_dir_block); + qty /= sizeof(union afs_xdr_dir_block); /* check them */ - dbuf = page_address(page); + dbuf = kmap(page); for (tmp = 0; tmp < qty; tmp++) { - if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { + if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) { printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", - __func__, dir->i_ino, tmp, qty, - ntohs(dbuf->blocks[tmp].pagehdr.magic)); - trace_afs_dir_check_failed(vnode, off, i_size); + __func__, dvnode->vfs_inode.i_ino, tmp, qty, + ntohs(dbuf->blocks[tmp].hdr.magic)); + trace_afs_dir_check_failed(dvnode, off, i_size); + kunmap(page); goto error; } + + /* Make sure each block is NUL terminated so we can reasonably + * use string functions on it. The filenames in the page + * *should* be NUL-terminated anyway. + */ + ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0; } + kunmap(page); + checked: - SetPageChecked(page); + afs_stat_v(dvnode, n_read_dir); return true; error: - SetPageError(page); return false; } /* - * discard a page cached in the pagecache + * open an AFS directory file */ -static inline void afs_dir_put_page(struct page *page) +static int afs_dir_open(struct inode *inode, struct file *file) { - kunmap(page); - unlock_page(page); - put_page(page); + _enter("{%lu}", inode->i_ino); + + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags)) + return -ENOENT; + + return afs_open(inode, file); } /* - * get a page into the pagecache + * Read the directory into the pagecache in one go, scrubbing the previous + * contents. The list of pages is returned, pinning them so that they don't + * get reclaimed during the iteration. */ -static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, - struct key *key) +static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) { - struct page *page; - _enter("{%lu},%lu", dir->i_ino, index); - - page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); - if (!IS_ERR(page)) { - lock_page(page); - kmap(page); - if (unlikely(!PageChecked(page))) { - if (PageError(page)) - goto fail; - } + struct afs_read *req; + loff_t i_size; + int nr_pages, nr_inline, i, n; + int ret = -ENOMEM; + +retry: + i_size = i_size_read(&dvnode->vfs_inode); + if (i_size < 2048) + return ERR_PTR(-EIO); + if (i_size > 2048 * 1024) + return ERR_PTR(-EFBIG); + + _enter("%llu", i_size); + + /* Get a request record to hold the page list. We want to hold it + * inline if we can, but we don't want to make an order 1 allocation. + */ + nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE; + nr_inline = nr_pages; + if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) + nr_inline = 0; + + req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline, + GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + refcount_set(&req->usage, 1); + req->nr_pages = nr_pages; + req->actual_len = i_size; /* May change */ + req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ + req->data_version = dvnode->status.data_version; /* May change */ + if (nr_inline > 0) { + req->pages = req->array; + } else { + req->pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!req->pages) + goto error; } - return page; -fail: - afs_dir_put_page(page); - _leave(" = -EIO"); - return ERR_PTR(-EIO); -} + /* Get a list of all the pages that hold or will hold the directory + * content. We need to fill in any gaps that we might find where the + * memory reclaimer has been at work. If there are any gaps, we will + * need to reread the entire directory contents. + */ + i = 0; + do { + n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i, + req->nr_pages - i, + req->pages + i); + _debug("find %u at %u/%u", n, i, req->nr_pages); + if (n == 0) { + gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask; + + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + ret = -ENOMEM; + req->pages[i] = __page_cache_alloc(gfp); + if (!req->pages[i]) + goto error; + ret = add_to_page_cache_lru(req->pages[i], + dvnode->vfs_inode.i_mapping, + i, gfp); + if (ret < 0) + goto error; + + set_page_private(req->pages[i], 1); + SetPagePrivate(req->pages[i]); + unlock_page(req->pages[i]); + i++; + } else { + i += n; + } + } while (i < req->nr_pages); -/* - * open an AFS directory file - */ -static int afs_dir_open(struct inode *inode, struct file *file) -{ - _enter("{%lu}", inode->i_ino); + /* If we're going to reload, we need to lock all the pages to prevent + * races. + */ + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + ret = -ERESTARTSYS; + for (i = 0; i < req->nr_pages; i++) + if (lock_page_killable(req->pages[i]) < 0) + goto error_unlock; - BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); - BUILD_BUG_ON(sizeof(union afs_dirent) != 32); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; - if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags)) - return -ENOENT; + ret = afs_fetch_data(dvnode, key, req); + if (ret < 0) + goto error_unlock_all; - return afs_open(inode, file); + task_io_account_read(PAGE_SIZE * req->nr_pages); + + if (req->len < req->file_size) + goto content_has_grown; + + /* Validate the data we just read. */ + ret = -EIO; + for (i = 0; i < req->nr_pages; i++) + if (!afs_dir_check_page(dvnode, req->pages[i], + req->actual_len)) + goto error_unlock_all; + + // TODO: Trim excess pages + + set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); + } + +success: + i = req->nr_pages; + while (i > 0) + unlock_page(req->pages[--i]); + return req; + +error_unlock_all: + i = req->nr_pages; +error_unlock: + while (i > 0) + unlock_page(req->pages[--i]); +error: + afs_put_read(req); + _leave(" = %d", ret); + return ERR_PTR(ret); + +content_has_grown: + i = req->nr_pages; + while (i > 0) + unlock_page(req->pages[--i]); + afs_put_read(req); + goto retry; } /* * deal with one block in an AFS directory */ static int afs_dir_iterate_block(struct dir_context *ctx, - union afs_dir_block *block, + union afs_xdr_dir_block *block, unsigned blkoff) { - union afs_dirent *dire; + union afs_xdr_dirent *dire; unsigned offset, next, curr; size_t nlen; int tmp; _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block); - curr = (ctx->pos - blkoff) / sizeof(union afs_dirent); + curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent); /* walk through the block, an entry at a time */ - for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries; - offset < AFS_DIRENT_PER_BLOCK; + for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + offset < AFS_DIR_SLOTS_PER_BLOCK; offset = next ) { next = offset + 1; /* skip entries marked unused in the bitmap */ - if (!(block->pagehdr.bitmap[offset / 8] & + if (!(block->hdr.bitmap[offset / 8] & (1 << (offset % 8)))) { _debug("ENT[%zu.%u]: unused", - blkoff / sizeof(union afs_dir_block), offset); + blkoff / sizeof(union afs_xdr_dir_block), offset); if (offset >= curr) ctx->pos = blkoff + - next * sizeof(union afs_dirent); + next * sizeof(union afs_xdr_dirent); continue; } @@ -291,34 +353,34 @@ static int afs_dir_iterate_block(struct dir_context *ctx, dire = &block->dirents[offset]; nlen = strnlen(dire->u.name, sizeof(*block) - - offset * sizeof(union afs_dirent)); + offset * sizeof(union afs_xdr_dirent)); _debug("ENT[%zu.%u]: %s %zu \"%s\"", - blkoff / sizeof(union afs_dir_block), offset, + blkoff / sizeof(union afs_xdr_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); /* work out where the next possible entry is */ - for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) { - if (next >= AFS_DIRENT_PER_BLOCK) { + for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { + if (next >= AFS_DIR_SLOTS_PER_BLOCK) { _debug("ENT[%zu.%u]:" " %u travelled beyond end dir block" " (len %u/%zu)", - blkoff / sizeof(union afs_dir_block), + blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); return -EIO; } - if (!(block->pagehdr.bitmap[next / 8] & + if (!(block->hdr.bitmap[next / 8] & (1 << (next % 8)))) { _debug("ENT[%zu.%u]:" " %u unmarked extension (len %u/%zu)", - blkoff / sizeof(union afs_dir_block), + blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); return -EIO; } _debug("ENT[%zu.%u]: ext %u/%zu", - blkoff / sizeof(union afs_dir_block), + blkoff / sizeof(union afs_xdr_dir_block), next, tmp, nlen); next++; } @@ -330,13 +392,14 @@ static int afs_dir_iterate_block(struct dir_context *ctx, /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), - ctx->actor == afs_lookup_filldir ? + (ctx->actor == afs_lookup_filldir || + ctx->actor == afs_lookup_one_filldir)? ntohl(dire->u.unique) : DT_UNKNOWN)) { _leave(" = 0 [full]"); return 0; } - ctx->pos = blkoff + next * sizeof(union afs_dirent); + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); } _leave(" = 1 [more]"); @@ -349,8 +412,10 @@ static int afs_dir_iterate_block(struct dir_context *ctx, static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, struct key *key) { - union afs_dir_block *dblock; - struct afs_dir_page *dbuf; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_xdr_dir_page *dbuf; + union afs_xdr_dir_block *dblock; + struct afs_read *req; struct page *page; unsigned blkoff, limit; int ret; @@ -362,45 +427,53 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, return -ESTALE; } + req = afs_read_dir(dvnode, key); + if (IS_ERR(req)) + return PTR_ERR(req); + /* round the file position up to the next entry boundary */ - ctx->pos += sizeof(union afs_dirent) - 1; - ctx->pos &= ~(sizeof(union afs_dirent) - 1); + ctx->pos += sizeof(union afs_xdr_dirent) - 1; + ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1); /* walk through the blocks in sequence */ ret = 0; - while (ctx->pos < dir->i_size) { - blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1); + while (ctx->pos < req->actual_len) { + blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1); - /* fetch the appropriate page from the directory */ - page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + /* Fetch the appropriate page from the directory and re-add it + * to the LRU. + */ + page = req->pages[blkoff / PAGE_SIZE]; + if (!page) { + ret = -EIO; break; } + mark_page_accessed(page); limit = blkoff & ~(PAGE_SIZE - 1); - dbuf = page_address(page); + dbuf = kmap(page); /* deal with the individual blocks stashed on this page */ do { dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / - sizeof(union afs_dir_block)]; + sizeof(union afs_xdr_dir_block)]; ret = afs_dir_iterate_block(ctx, dblock, blkoff); if (ret != 1) { - afs_dir_put_page(page); + kunmap(page); goto out; } - blkoff += sizeof(union afs_dir_block); + blkoff += sizeof(union afs_xdr_dir_block); } while (ctx->pos < dir->i_size && blkoff < limit); - afs_dir_put_page(page); + kunmap(page); ret = 0; } out: + afs_put_read(req); _leave(" = %d", ret); return ret; } @@ -414,23 +487,23 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) } /* - * search the directory for a name + * Search the directory for a single name * - if afs_dir_iterate_block() spots this function, it'll pass the FID * uniquifier through dtype */ -static int afs_lookup_filldir(struct dir_context *ctx, const char *name, - int nlen, loff_t fpos, u64 ino, unsigned dtype) +static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) { - struct afs_lookup_cookie *cookie = - container_of(ctx, struct afs_lookup_cookie, ctx); + struct afs_lookup_one_cookie *cookie = + container_of(ctx, struct afs_lookup_one_cookie, ctx); _enter("{%s,%u},%s,%u,,%llu,%u", cookie->name.name, cookie->name.len, name, nlen, (unsigned long long) ino, dtype); /* insanity checks first */ - BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); - BUILD_BUG_ON(sizeof(union afs_dirent) != 32); + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); if (cookie->name.len != nlen || memcmp(cookie->name.name, name, nlen) != 0) { @@ -447,15 +520,15 @@ static int afs_lookup_filldir(struct dir_context *ctx, const char *name, } /* - * do a lookup in a directory + * Do a lookup of a single name in a directory * - just returns the FID the dentry name maps to if found */ -static int afs_do_lookup(struct inode *dir, struct dentry *dentry, - struct afs_fid *fid, struct key *key) +static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, + struct afs_fid *fid, struct key *key) { struct afs_super_info *as = dir->i_sb->s_fs_info; - struct afs_lookup_cookie cookie = { - .ctx.actor = afs_lookup_filldir, + struct afs_lookup_one_cookie cookie = { + .ctx.actor = afs_lookup_one_filldir, .name = dentry->d_name, .fid.vid = as->volume->vid }; @@ -482,70 +555,265 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, } /* - * Probe to see if a cell may exist. This prevents positive dentries from - * being created unnecessarily. + * search the directory for a name + * - if afs_dir_iterate_block() spots this function, it'll pass the FID + * uniquifier through dtype */ -static int afs_probe_cell_name(struct dentry *dentry) +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) { - struct afs_cell *cell; - const char *name = dentry->d_name.name; - size_t len = dentry->d_name.len; + struct afs_lookup_cookie *cookie = + container_of(ctx, struct afs_lookup_cookie, ctx); int ret; - /* Names prefixed with a dot are R/W mounts. */ - if (name[0] == '.') { - if (len == 1) - return -EINVAL; - name++; - len--; - } + _enter("{%s,%u},%s,%u,,%llu,%u", + cookie->name.name, cookie->name.len, name, nlen, + (unsigned long long) ino, dtype); - cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); - if (!IS_ERR(cell)) { - afs_put_cell(afs_d2net(dentry), cell); - return 0; + /* insanity checks first */ + BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048); + BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32); + + if (cookie->found) { + if (cookie->nr_fids < 50) { + cookie->fids[cookie->nr_fids].vnode = ino; + cookie->fids[cookie->nr_fids].unique = dtype; + cookie->nr_fids++; + } + } else if (cookie->name.len == nlen && + memcmp(cookie->name.name, name, nlen) == 0) { + cookie->fids[0].vnode = ino; + cookie->fids[0].unique = dtype; + cookie->found = 1; + if (cookie->one_only) + return -1; } - ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); - if (ret == -ENODATA) - ret = -EDESTADDRREQ; + ret = cookie->nr_fids >= 50 ? -1 : 0; + _leave(" = %d", ret); return ret; } /* - * Try to auto mount the mountpoint with pseudo directory, if the autocell - * operation is setted. + * Do a lookup in a directory. We make use of bulk lookup to query a slew of + * files in one go and create inodes for them. The inode of the file we were + * asked for is returned. */ -static struct inode *afs_try_auto_mntpt(struct dentry *dentry, - struct inode *dir, struct afs_fid *fid) +static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, + struct key *key) { - struct afs_vnode *vnode = AFS_FS_I(dir); - struct inode *inode; - int ret = -ENOENT; + struct afs_lookup_cookie *cookie; + struct afs_cb_interest *cbi = NULL; + struct afs_super_info *as = dir->i_sb->s_fs_info; + struct afs_iget_data data; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct inode *inode = NULL; + int ret, i; - _enter("%p{%pd}, {%x:%u}", - dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + + cookie = kzalloc(sizeof(struct afs_lookup_cookie), GFP_KERNEL); + if (!cookie) + return ERR_PTR(-ENOMEM); + + cookie->ctx.actor = afs_lookup_filldir; + cookie->name = dentry->d_name; + cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ + + read_seqlock_excl(&dvnode->cb_lock); + if (dvnode->cb_interest && + dvnode->cb_interest->server && + test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags)) + cookie->one_only = true; + read_sequnlock_excl(&dvnode->cb_lock); + + for (i = 0; i < 50; i++) + cookie->fids[i].vid = as->volume->vid; + + /* search the directory */ + ret = afs_dir_iterate(dir, &cookie->ctx, key); + if (ret < 0) { + inode = ERR_PTR(ret); + goto out; + } - if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + inode = ERR_PTR(-ENOENT); + if (!cookie->found) goto out; - ret = afs_probe_cell_name(dentry); - if (ret < 0) + /* Check to see if we already have an inode for the primary fid. */ + data.volume = dvnode->volume; + data.fid = cookie->fids[0]; + inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, afs_iget5_test, &data); + if (inode) goto out; - inode = afs_iget_pseudo_dir(dir->i_sb, false); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); + /* Need space for examining all the selected files */ + inode = ERR_PTR(-ENOMEM); + cookie->statuses = kcalloc(cookie->nr_fids, sizeof(struct afs_file_status), + GFP_KERNEL); + if (!cookie->statuses) goto out; + + cookie->callbacks = kcalloc(cookie->nr_fids, sizeof(struct afs_callback), + GFP_KERNEL); + if (!cookie->callbacks) + goto out_s; + + /* Try FS.InlineBulkStatus first. Abort codes for the individual + * lookups contained therein are stored in the reply without aborting + * the whole operation. + */ + if (cookie->one_only) + goto no_inline_bulk_status; + + inode = ERR_PTR(-ERESTARTSYS); + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + if (test_bit(AFS_SERVER_FL_NO_IBULK, + &fc.cbi->server->flags)) { + fc.ac.abort_code = RX_INVALID_OPERATION; + fc.ac.error = -ECONNABORTED; + break; + } + afs_fs_inline_bulk_status(&fc, + afs_v2net(dvnode), + cookie->fids, + cookie->statuses, + cookie->callbacks, + cookie->nr_fids, NULL); + } + + if (fc.ac.error == 0) + cbi = afs_get_cb_interest(fc.cbi); + if (fc.ac.abort_code == RX_INVALID_OPERATION) + set_bit(AFS_SERVER_FL_NO_IBULK, &fc.cbi->server->flags); + inode = ERR_PTR(afs_end_vnode_operation(&fc)); } - *fid = AFS_FS_I(inode)->fid; - _leave("= %p", inode); - return inode; + if (!IS_ERR(inode)) + goto success; + if (fc.ac.abort_code != RX_INVALID_OPERATION) + goto out_c; + +no_inline_bulk_status: + /* We could try FS.BulkStatus next, but this aborts the entire op if + * any of the lookups fails - so, for the moment, revert to + * FS.FetchStatus for just the primary fid. + */ + cookie->nr_fids = 1; + inode = ERR_PTR(-ERESTARTSYS); + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + afs_fs_fetch_status(&fc, + afs_v2net(dvnode), + cookie->fids, + cookie->statuses, + cookie->callbacks, + NULL); + } + + if (fc.ac.error == 0) + cbi = afs_get_cb_interest(fc.cbi); + inode = ERR_PTR(afs_end_vnode_operation(&fc)); + } + if (IS_ERR(inode)) + goto out_c; + + for (i = 0; i < cookie->nr_fids; i++) + cookie->statuses[i].abort_code = 0; + +success: + /* Turn all the files into inodes and save the first one - which is the + * one we actually want. + */ + if (cookie->statuses[0].abort_code != 0) + inode = ERR_PTR(afs_abort_to_error(cookie->statuses[0].abort_code)); + + for (i = 0; i < cookie->nr_fids; i++) { + struct inode *ti; + + if (cookie->statuses[i].abort_code != 0) + continue; + + ti = afs_iget(dir->i_sb, key, &cookie->fids[i], + &cookie->statuses[i], + &cookie->callbacks[i], + cbi); + if (i == 0) { + inode = ti; + } else { + if (!IS_ERR(ti)) + iput(ti); + } + } + +out_c: + afs_put_cb_interest(afs_v2net(dvnode), cbi); + kfree(cookie->callbacks); +out_s: + kfree(cookie->statuses); out: - _leave("= %d", ret); - return ERR_PTR(ret); + kfree(cookie); + return inode; +} + +/* + * Look up an entry in a directory with @sys substitution. + */ +static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry, + struct key *key) +{ + struct afs_sysnames *subs; + struct afs_net *net = afs_i2net(dir); + struct dentry *ret; + char *buf, *p, *name; + int len, i; + + _enter(""); + + ret = ERR_PTR(-ENOMEM); + p = buf = kmalloc(AFSNAMEMAX, GFP_KERNEL); + if (!buf) + goto out_p; + if (dentry->d_name.len > 4) { + memcpy(p, dentry->d_name.name, dentry->d_name.len - 4); + p += dentry->d_name.len - 4; + } + + /* There is an ordered list of substitutes that we have to try. */ + read_lock(&net->sysnames_lock); + subs = net->sysnames; + refcount_inc(&subs->usage); + read_unlock(&net->sysnames_lock); + + for (i = 0; i < subs->nr; i++) { + name = subs->subs[i]; + len = dentry->d_name.len - 4 + strlen(name); + if (len >= AFSNAMEMAX) { + ret = ERR_PTR(-ENAMETOOLONG); + goto out_s; + } + + strcpy(p, name); + ret = lookup_one_len(buf, dentry->d_parent, len); + if (IS_ERR(ret) || d_is_positive(ret)) + goto out_s; + dput(ret); + } + + /* We don't want to d_add() the @sys dentry here as we don't want to + * the cached dentry to hide changes to the sysnames list. + */ + ret = NULL; +out_s: + afs_put_sysnames(subs); + kfree(buf); +out_p: + key_put(key); + return ret; } /* @@ -554,16 +822,13 @@ out: static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode; - struct afs_fid fid; + struct afs_vnode *dvnode = AFS_FS_I(dir); struct inode *inode; struct key *key; int ret; - vnode = AFS_FS_I(dir); - _enter("{%x:%u},%p{%pd},", - vnode->fid.vid, vnode->fid.vnode, dentry, dentry); + dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry); ASSERTCMP(d_inode(dentry), ==, NULL); @@ -572,28 +837,37 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); } - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) { _leave(" = -ESTALE"); return ERR_PTR(-ESTALE); } - key = afs_request_key(vnode->volume->cell); + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { _leave(" = %ld [key]", PTR_ERR(key)); return ERR_CAST(key); } - ret = afs_validate(vnode, key); + ret = afs_validate(dvnode, key); if (ret < 0) { key_put(key); _leave(" = %d [val]", ret); return ERR_PTR(ret); } - ret = afs_do_lookup(dir, dentry, &fid, key); - if (ret < 0) { + if (dentry->d_name.len >= 4 && + dentry->d_name.name[dentry->d_name.len - 4] == '@' && + dentry->d_name.name[dentry->d_name.len - 3] == 's' && + dentry->d_name.name[dentry->d_name.len - 2] == 'y' && + dentry->d_name.name[dentry->d_name.len - 1] == 's') + return afs_lookup_atsys(dir, dentry, key); + + afs_stat_v(dvnode, n_lookup); + inode = afs_do_lookup(dir, dentry, key); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); if (ret == -ENOENT) { - inode = afs_try_auto_mntpt(dentry, dir, &fid); + inode = afs_try_auto_mntpt(dentry, dir); if (!IS_ERR(inode)) { key_put(key); goto success; @@ -611,10 +885,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, _leave(" = %d [do]", ret); return ERR_PTR(ret); } - dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version; + dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version; /* instantiate the dentry */ - inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL); key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); @@ -623,9 +896,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, success: d_add(dentry, inode); - _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }", - fid.vnode, - fid.unique, + _leave(" = 0 { ino=%lu v=%u }", d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); @@ -633,67 +904,23 @@ success: } /* - * Look up an entry in a dynroot directory. - */ -static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - struct afs_vnode *vnode; - struct afs_fid fid; - struct inode *inode; - int ret; - - vnode = AFS_FS_I(dir); - - _enter("%pd", dentry); - - ASSERTCMP(d_inode(dentry), ==, NULL); - - if (dentry->d_name.len >= AFSNAMEMAX) { - _leave(" = -ENAMETOOLONG"); - return ERR_PTR(-ENAMETOOLONG); - } - - inode = afs_try_auto_mntpt(dentry, dir, &fid); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -ENOENT) { - d_add(dentry, NULL); - _leave(" = NULL [negative]"); - return NULL; - } - _leave(" = %d [do]", ret); - return ERR_PTR(ret); - } - - d_add(dentry, inode); - _leave(" = 0 { ino=%lu v=%u }", - d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); - return NULL; -} - -/* * check that a dentry lookup hit has found a valid entry * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct afs_super_info *as = dentry->d_sb->s_fs_info; struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); struct dentry *parent; struct inode *inode; struct key *key; - void *dir_version; + long dir_version, de_version; int ret; if (flags & LOOKUP_RCU) return -ECHILD; - if (as->dyn_root) - return 1; - if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%x:%u} n=%pd fl=%lx},", @@ -729,14 +956,25 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) goto out_bad_parent; } - dir_version = (void *) (unsigned long) dir->status.data_version; - if (dentry->d_fsdata == dir_version) - goto out_valid; /* the dir contents are unchanged */ + /* We only need to invalidate a dentry if the server's copy changed + * behind our back. If we made the change, it's no problem. Note that + * on a 32-bit system, we only have 32 bits in the dentry to store the + * version. + */ + dir_version = (long)dir->status.data_version; + de_version = (long)dentry->d_fsdata; + if (de_version == dir_version) + goto out_valid; + + dir_version = (long)dir->invalid_before; + if (de_version - dir_version >= 0) + goto out_valid; _debug("dir modified"); + afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key); + ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key); switch (ret) { case 0: /* the filename maps to something */ @@ -789,7 +1027,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) } out_valid: - dentry->d_fsdata = dir_version; + dentry->d_fsdata = (void *)dir_version; dput(parent); key_put(key); _leave(" = 1 [valid]"); @@ -840,7 +1078,7 @@ zap: /* * handle dentry release */ -static void afs_d_release(struct dentry *dentry) +void afs_d_release(struct dentry *dentry) { _enter("%pd", dentry); } @@ -854,6 +1092,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, struct afs_file_status *newstatus, struct afs_callback *newcb) { + struct afs_vnode *vnode; struct inode *inode; if (fc->ac.error < 0) @@ -871,6 +1110,8 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, return; } + vnode = AFS_FS_I(inode); + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); d_add(new_dentry, inode); } @@ -885,6 +1126,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_fid newfid; struct key *key; + u64 data_version = dvnode->status.data_version; int ret; mode |= S_IFDIR; @@ -902,7 +1144,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_create(&fc, dentry->d_name.name, mode, + afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -916,6 +1158,11 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error_key; } + if (ret == 0 && + test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_for_create); + key_put(key); _leave(" = 0"); return 0; @@ -939,6 +1186,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry) clear_nlink(&vnode->vfs_inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); } } @@ -950,6 +1198,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; + u64 data_version = dvnode->status.data_version; int ret; _enter("{%x:%u},{%pd}", @@ -965,13 +1214,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_remove(&fc, dentry->d_name.name, true); + afs_fs_remove(&fc, dentry->d_name.name, true, + data_version); } afs_vnode_commit_status(&fc, dvnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); - if (ret == 0) + if (ret == 0) { afs_dir_remove_subdir(dentry); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_rmdir); + } } key_put(key); @@ -1036,6 +1290,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; unsigned long d_version = (unsigned long)dentry->d_fsdata; + u64 data_version = dvnode->status.data_version; int ret; _enter("{%x:%u},{%pd}", @@ -1062,7 +1317,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_remove(&fc, dentry->d_name.name, false); + afs_fs_remove(&fc, dentry->d_name.name, false, + data_version); } afs_vnode_commit_status(&fc, dvnode, fc.cb_break); @@ -1071,6 +1327,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = afs_dir_remove_link( dentry, key, d_version, (unsigned long)dvnode->status.data_version); + if (ret == 0 && + test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); } error_key: @@ -1092,6 +1352,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_fid newfid; struct key *key; + u64 data_version = dvnode->status.data_version; int ret; mode |= S_IFREG; @@ -1113,7 +1374,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_create(&fc, dentry->d_name.name, mode, + afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1127,6 +1388,10 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, goto error_key; } + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_for_create); + key_put(key); _leave(" = 0"); return 0; @@ -1148,10 +1413,12 @@ static int afs_link(struct dentry *from, struct inode *dir, struct afs_fs_cursor fc; struct afs_vnode *dvnode, *vnode; struct key *key; + u64 data_version; int ret; vnode = AFS_FS_I(d_inode(from)); dvnode = AFS_FS_I(dir); + data_version = dvnode->status.data_version; _enter("{%x:%u},{%x:%u},{%pd}", vnode->fid.vid, vnode->fid.vnode, @@ -1178,7 +1445,7 @@ static int afs_link(struct dentry *from, struct inode *dir, while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; - afs_fs_link(&fc, vnode, dentry->d_name.name); + afs_fs_link(&fc, vnode, dentry->d_name.name, data_version); } afs_vnode_commit_status(&fc, dvnode, fc.cb_break); @@ -1194,6 +1461,10 @@ static int afs_link(struct dentry *from, struct inode *dir, goto error_key; } + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, + afs_edit_dir_for_link); + key_put(key); _leave(" = 0"); return 0; @@ -1217,6 +1488,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_fid newfid; struct key *key; + u64 data_version = dvnode->status.data_version; int ret; _enter("{%x:%u},{%pd},%s", @@ -1241,7 +1513,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - afs_fs_symlink(&fc, dentry->d_name.name, content, + afs_fs_symlink(&fc, dentry->d_name.name, + content, data_version, &newfid, &newstatus); } @@ -1255,6 +1528,10 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error_key; } + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &dentry->d_name, &newfid, + afs_edit_dir_for_symlink); + key_put(key); _leave(" = 0"); return 0; @@ -1277,6 +1554,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct afs_fs_cursor fc; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; struct key *key; + u64 orig_data_version, new_data_version; + bool new_negative = d_is_negative(new_dentry); int ret; if (flags) @@ -1285,6 +1564,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, vnode = AFS_FS_I(d_inode(old_dentry)); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); + orig_data_version = orig_dvnode->status.data_version; + new_data_version = new_dvnode->status.data_version; _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, @@ -1310,7 +1591,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; afs_fs_rename(&fc, old_dentry->d_name.name, - new_dvnode, new_dentry->d_name.name); + new_dvnode, new_dentry->d_name.name, + orig_data_version, new_data_version); } afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break); @@ -1322,9 +1604,68 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, goto error_key; } + if (ret == 0) { + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename); + + if (!new_negative && + test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename); + + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) + afs_edit_dir_add(new_dvnode, &new_dentry->d_name, + &vnode->fid, afs_edit_dir_for_rename); + } + error_key: key_put(key); error: _leave(" = %d", ret); return ret; } + +/* + * Release a directory page and clean up its private state if it's not busy + * - return true if the page can now be released, false if not + */ +static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + + set_page_private(page, 0); + ClearPagePrivate(page); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_relpg); + return 1; +} + +/* + * invalidate part or all of a page + * - release a page and clean up its private data if offset is 0 (indicating + * the entire page) + */ +static void afs_dir_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + + _enter("{%lu},%u,%u", page->index, offset, length); + + BUG_ON(!PageLocked(page)); + + /* The directory will need reloading. */ + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_stat_v(dvnode, n_inval); + + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == PAGE_SIZE) { + set_page_private(page, 0); + ClearPagePrivate(page); + } +} diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c new file mode 100644 index 000000000000..8b400f5aead5 --- /dev/null +++ b/fs/afs/dir_edit.c @@ -0,0 +1,505 @@ +/* AFS filesystem directory editing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/pagemap.h> +#include <linux/iversion.h> +#include "internal.h" +#include "xdr_fs.h" + +/* + * Find a number of contiguous clear bits in a directory block bitmask. + * + * There are 64 slots, which means we can load the entire bitmap into a + * variable. The first bit doesn't count as it corresponds to the block header + * slot. nr_slots is between 1 and 9. + */ +static int afs_find_contig_bits(union afs_xdr_dir_block *block, unsigned int nr_slots) +{ + u64 bitmap; + u32 mask; + int bit, n; + + bitmap = (u64)block->hdr.bitmap[0] << 0 * 8; + bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8; + bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8; + bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8; + bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8; + bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8; + bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8; + bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8; + bitmap >>= 1; /* The first entry is metadata */ + bit = 1; + mask = (1 << nr_slots) - 1; + + do { + if (sizeof(unsigned long) == 8) + n = ffz(bitmap); + else + n = ((u32)bitmap) != 0 ? + ffz((u32)bitmap) : + ffz((u32)(bitmap >> 32)) + 32; + bitmap >>= n; + bit += n; + + if ((bitmap & mask) == 0) { + if (bit > 64 - nr_slots) + return -1; + return bit; + } + + n = __ffs(bitmap); + bitmap >>= n; + bit += n; + } while (bitmap); + + return -1; +} + +/* + * Set a number of contiguous bits in the directory block bitmap. + */ +static void afs_set_contig_bits(union afs_xdr_dir_block *block, + int bit, unsigned int nr_slots) +{ + u64 mask, before, after; + + mask = (1 << nr_slots) - 1; + mask <<= bit; + + before = *(u64 *)block->hdr.bitmap; + + block->hdr.bitmap[0] |= (u8)(mask >> 0 * 8); + block->hdr.bitmap[1] |= (u8)(mask >> 1 * 8); + block->hdr.bitmap[2] |= (u8)(mask >> 2 * 8); + block->hdr.bitmap[3] |= (u8)(mask >> 3 * 8); + block->hdr.bitmap[4] |= (u8)(mask >> 4 * 8); + block->hdr.bitmap[5] |= (u8)(mask >> 5 * 8); + block->hdr.bitmap[6] |= (u8)(mask >> 6 * 8); + block->hdr.bitmap[7] |= (u8)(mask >> 7 * 8); + + after = *(u64 *)block->hdr.bitmap; +} + +/* + * Clear a number of contiguous bits in the directory block bitmap. + */ +static void afs_clear_contig_bits(union afs_xdr_dir_block *block, + int bit, unsigned int nr_slots) +{ + u64 mask, before, after; + + mask = (1 << nr_slots) - 1; + mask <<= bit; + + before = *(u64 *)block->hdr.bitmap; + + block->hdr.bitmap[0] &= ~(u8)(mask >> 0 * 8); + block->hdr.bitmap[1] &= ~(u8)(mask >> 1 * 8); + block->hdr.bitmap[2] &= ~(u8)(mask >> 2 * 8); + block->hdr.bitmap[3] &= ~(u8)(mask >> 3 * 8); + block->hdr.bitmap[4] &= ~(u8)(mask >> 4 * 8); + block->hdr.bitmap[5] &= ~(u8)(mask >> 5 * 8); + block->hdr.bitmap[6] &= ~(u8)(mask >> 6 * 8); + block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8); + + after = *(u64 *)block->hdr.bitmap; +} + +/* + * Scan a directory block looking for a dirent of the right name. + */ +static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name, + unsigned int blocknum) +{ + union afs_xdr_dirent *de; + u64 bitmap; + int d, len, n; + + _enter(""); + + bitmap = (u64)block->hdr.bitmap[0] << 0 * 8; + bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8; + bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8; + bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8; + bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8; + bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8; + bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8; + bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8; + + for (d = (blocknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS); + d < AFS_DIR_SLOTS_PER_BLOCK; + d++) { + if (!((bitmap >> d) & 1)) + continue; + de = &block->dirents[d]; + if (de->u.valid != 1) + continue; + + /* The block was NUL-terminated by afs_dir_check_page(). */ + len = strlen(de->u.name); + if (len == name->len && + memcmp(de->u.name, name->name, name->len) == 0) + return d; + + n = round_up(12 + len + 1 + 4, AFS_DIR_DIRENT_SIZE); + n /= AFS_DIR_DIRENT_SIZE; + d += n - 1; + } + + return -1; +} + +/* + * Initialise a new directory block. Note that block 0 is special and contains + * some extra metadata. + */ +static void afs_edit_init_block(union afs_xdr_dir_block *meta, + union afs_xdr_dir_block *block, int block_num) +{ + memset(block, 0, sizeof(*block)); + block->hdr.npages = htons(1); + block->hdr.magic = AFS_DIR_MAGIC; + block->hdr.bitmap[0] = 1; + + if (block_num == 0) { + block->hdr.bitmap[0] = 0xff; + block->hdr.bitmap[1] = 0x1f; + memset(block->meta.alloc_ctrs, + AFS_DIR_SLOTS_PER_BLOCK, + sizeof(block->meta.alloc_ctrs)); + meta->meta.alloc_ctrs[0] = + AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS0; + } + + if (block_num < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[block_num] = + AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS; +} + +/* + * Edit a directory's file data to add a new directory entry. Doing this after + * create, mkdir, symlink, link or rename if the data version number is + * incremented by exactly one avoids the need to re-download the entire + * directory contents. + * + * The caller must hold the inode locked. + */ +void afs_edit_dir_add(struct afs_vnode *vnode, + struct qstr *name, struct afs_fid *new_fid, + enum afs_edit_dir_reason why) +{ + union afs_xdr_dir_block *meta, *block; + struct afs_xdr_dir_page *meta_page, *dir_page; + union afs_xdr_dirent *de; + struct page *page0, *page; + unsigned int need_slots, nr_blocks, b; + pgoff_t index; + loff_t i_size; + gfp_t gfp; + int slot; + + _enter(",,{%d,%s},", name->len, name->name); + + i_size = i_size_read(&vnode->vfs_inode); + if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || + (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + return; + } + + gfp = vnode->vfs_inode.i_mapping->gfp_mask; + page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp); + if (!page0) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + _leave(" [fgp]"); + return; + } + + /* Work out how many slots we're going to need. */ + need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); + need_slots /= AFS_DIR_DIRENT_SIZE; + + meta_page = kmap(page0); + meta = &meta_page->blocks[0]; + if (i_size == 0) + goto new_directory; + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + /* Find a block that has sufficient slots available. Each VM page + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks + 1; b++) { + /* If the directory extended into a new page, then we need to + * tack a new page on the end. + */ + index = b / AFS_DIR_BLOCKS_PER_PAGE; + if (index == 0) { + page = page0; + dir_page = meta_page; + } else { + if (nr_blocks >= AFS_DIR_MAX_BLOCKS) + goto error; + gfp = vnode->vfs_inode.i_mapping->gfp_mask; + page = find_or_create_page(vnode->vfs_inode.i_mapping, + index, gfp); + if (!page) + goto error; + if (!PagePrivate(page)) { + set_page_private(page, 1); + SetPagePrivate(page); + } + dir_page = kmap(page); + } + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto invalidated; + + block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; + + _debug("block %u: %2u %3u %u", + b, + (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99, + ntohs(block->hdr.npages), + ntohs(block->hdr.magic)); + + /* Initialise the block if necessary. */ + if (b == nr_blocks) { + _debug("init %u", b); + afs_edit_init_block(meta, block, b); + i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE); + } + + /* Only lower dir pages have a counter in the header. */ + if (b >= AFS_DIR_BLOCKS_WITH_CTR || + meta->meta.alloc_ctrs[b] >= need_slots) { + /* We need to try and find one or more consecutive + * slots to hold the entry. + */ + slot = afs_find_contig_bits(block, need_slots); + if (slot >= 0) { + _debug("slot %u", slot); + goto found_space; + } + } + + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + } + + /* There are no spare slots of sufficient size, yet the operation + * succeeded. Download the directory again. + */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; + +new_directory: + afs_edit_init_block(meta, meta, 0); + i_size = AFS_DIR_BLOCK_SIZE; + i_size_write(&vnode->vfs_inode, i_size); + slot = AFS_DIR_RESV_BLOCKS0; + page = page0; + block = meta; + nr_blocks = 1; + b = 0; + +found_space: + /* Set the dirent slot. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_create, b, slot, + new_fid->vnode, new_fid->unique, name->name); + de = &block->dirents[slot]; + de->u.valid = 1; + de->u.unused[0] = 0; + de->u.hash_next = 0; // TODO: Really need to maintain this + de->u.vnode = htonl(new_fid->vnode); + de->u.unique = htonl(new_fid->unique); + memcpy(de->u.name, name->name, name->len + 1); + de->u.name[name->len] = 0; + + /* Adjust the bitmap. */ + afs_set_contig_bits(block, slot, need_slots); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + + /* Adjust the allocation counter. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[b] -= need_slots; + + inode_inc_iversion_raw(&vnode->vfs_inode); + afs_stat_v(vnode, n_dir_cr); + _debug("Insert %s in %u[%u]", name->name, b, slot); + +out_unmap: + unlock_page(page0); + kunmap(page0); + put_page(page0); + _leave(""); + return; + +invalidated: + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + if (page != page0) { + kunmap(page); + put_page(page); + } + goto out_unmap; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; +} + +/* + * Edit a directory's file data to remove a new directory entry. Doing this + * after unlink, rmdir or rename if the data version number is incremented by + * exactly one avoids the need to re-download the entire directory contents. + * + * The caller must hold the inode locked. + */ +void afs_edit_dir_remove(struct afs_vnode *vnode, + struct qstr *name, enum afs_edit_dir_reason why) +{ + struct afs_xdr_dir_page *meta_page, *dir_page; + union afs_xdr_dir_block *meta, *block; + union afs_xdr_dirent *de; + struct page *page0, *page; + unsigned int need_slots, nr_blocks, b; + pgoff_t index; + loff_t i_size; + int slot; + + _enter(",,{%d,%s},", name->len, name->name); + + i_size = i_size_read(&vnode->vfs_inode); + if (i_size < AFS_DIR_BLOCK_SIZE || + i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || + (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + return; + } + nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; + + page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0); + if (!page0) { + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + _leave(" [fgp]"); + return; + } + + /* Work out how many slots we're going to discard. */ + need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); + need_slots /= AFS_DIR_DIRENT_SIZE; + + meta_page = kmap(page0); + meta = &meta_page->blocks[0]; + + /* Find a page that has sufficient slots available. Each VM page + * contains two or more directory blocks. + */ + for (b = 0; b < nr_blocks; b++) { + index = b / AFS_DIR_BLOCKS_PER_PAGE; + if (index != 0) { + page = find_lock_page(vnode->vfs_inode.i_mapping, index); + if (!page) + goto error; + dir_page = kmap(page); + } else { + page = page0; + dir_page = meta_page; + } + + /* Abandon the edit if we got a callback break. */ + if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + goto invalidated; + + block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; + + if (b > AFS_DIR_BLOCKS_WITH_CTR || + meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) { + slot = afs_dir_scan_block(block, name, b); + if (slot >= 0) + goto found_dirent; + } + + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + } + + /* Didn't find the dirent to clobber. Download the directory again. */ + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; + +found_dirent: + de = &block->dirents[slot]; + + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot, + ntohl(de->u.vnode), ntohl(de->u.unique), + name->name); + + memset(de, 0, sizeof(*de) * need_slots); + + /* Adjust the bitmap. */ + afs_clear_contig_bits(block, slot, need_slots); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + + /* Adjust the allocation counter. */ + if (b < AFS_DIR_BLOCKS_WITH_CTR) + meta->meta.alloc_ctrs[b] += need_slots; + + inode_set_iversion_raw(&vnode->vfs_inode, vnode->status.data_version); + afs_stat_v(vnode, n_dir_rm); + _debug("Remove %s from %u[%u]", name->name, b, slot); + +out_unmap: + unlock_page(page0); + kunmap(page0); + put_page(page0); + _leave(""); + return; + +invalidated: + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + if (page != page0) { + unlock_page(page); + kunmap(page); + put_page(page); + } + goto out_unmap; + +error: + trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error, + 0, 0, 0, 0, name->name); + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + goto out_unmap; +} diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c new file mode 100644 index 000000000000..983f3946ab57 --- /dev/null +++ b/fs/afs/dynroot.c @@ -0,0 +1,209 @@ +/* dir.c: AFS dynamic root handling + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/dns_resolver.h> +#include "internal.h" + +const struct file_operations afs_dynroot_file_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .iterate_shared = dcache_readdir, + .llseek = dcache_dir_lseek, +}; + +/* + * Probe to see if a cell may exist. This prevents positive dentries from + * being created unnecessarily. + */ +static int afs_probe_cell_name(struct dentry *dentry) +{ + struct afs_cell *cell; + const char *name = dentry->d_name.name; + size_t len = dentry->d_name.len; + int ret; + + /* Names prefixed with a dot are R/W mounts. */ + if (name[0] == '.') { + if (len == 1) + return -EINVAL; + name++; + len--; + } + + cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + if (!IS_ERR(cell)) { + afs_put_cell(afs_d2net(dentry), cell); + return 0; + } + + ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); + if (ret == -ENODATA) + ret = -EDESTADDRREQ; + return ret; +} + +/* + * Try to auto mount the mountpoint with pseudo directory, if the autocell + * operation is setted. + */ +struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) +{ + struct afs_vnode *vnode = AFS_FS_I(dir); + struct inode *inode; + int ret = -ENOENT; + + _enter("%p{%pd}, {%x:%u}", + dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + + if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + goto out; + + ret = afs_probe_cell_name(dentry); + if (ret < 0) + goto out; + + inode = afs_iget_pseudo_dir(dir->i_sb, false); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + + _leave("= %p", inode); + return inode; + +out: + _leave("= %d", ret); + return ERR_PTR(ret); +} + +/* + * Look up @cell in a dynroot directory. This is a substitution for the + * local cell name for the net namespace. + */ +static struct dentry *afs_lookup_atcell(struct dentry *dentry) +{ + struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); + struct dentry *ret; + unsigned int seq = 0; + char *name; + int len; + + if (!net->ws_cell) + return ERR_PTR(-ENOENT); + + ret = ERR_PTR(-ENOMEM); + name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL); + if (!name) + goto out_p; + + rcu_read_lock(); + do { + read_seqbegin_or_lock(&net->cells_lock, &seq); + cell = rcu_dereference_raw(net->ws_cell); + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len + 1); + } + } while (need_seqretry(&net->cells_lock, seq)); + done_seqretry(&net->cells_lock, seq); + rcu_read_unlock(); + + ret = ERR_PTR(-ENOENT); + if (!cell) + goto out_n; + + ret = lookup_one_len(name, dentry->d_parent, len); + + /* We don't want to d_add() the @cell dentry here as we don't want to + * the cached dentry to hide changes to the local cell name. + */ + +out_n: + kfree(name); +out_p: + return ret; +} + +/* + * Look up an entry in a dynroot directory. + */ +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct afs_vnode *vnode; + struct inode *inode; + int ret; + + vnode = AFS_FS_I(dir); + + _enter("%pd", dentry); + + ASSERTCMP(d_inode(dentry), ==, NULL); + + if (dentry->d_name.len >= AFSNAMEMAX) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + if (dentry->d_name.len == 5 && + memcmp(dentry->d_name.name, "@cell", 5) == 0) + return afs_lookup_atcell(dentry); + + inode = afs_try_auto_mntpt(dentry, dir); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + if (ret == -ENOENT) { + d_add(dentry, NULL); + _leave(" = NULL [negative]"); + return NULL; + } + _leave(" = %d [do]", ret); + return ERR_PTR(ret); + } + + d_add(dentry, inode); + _leave(" = 0 { ino=%lu v=%u }", + d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); + return NULL; +} + +const struct inode_operations afs_dynroot_inode_operations = { + .lookup = afs_dynroot_lookup, +}; + +/* + * Dirs in the dynamic root don't need revalidation. + */ +static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 1; +} + +/* + * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't + * sleep) + * - called from dput() when d_count is going to 0. + * - return 1 to request dentry be unhashed, 0 otherwise + */ +static int afs_dynroot_d_delete(const struct dentry *dentry) +{ + return d_really_is_positive(dentry); +} + +const struct dentry_operations afs_dynroot_dentry_operations = { + .d_revalidate = afs_dynroot_d_revalidate, + .d_delete = afs_dynroot_d_delete, + .d_release = afs_d_release, + .d_automount = afs_d_automount, +}; diff --git a/fs/afs/file.c b/fs/afs/file.c index 79e665a35fea..c24c08016dd9 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -30,7 +30,6 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, - .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -146,6 +145,9 @@ int afs_open(struct inode *inode, struct file *file) if (ret < 0) goto error_af; } + + if (file->f_flags & O_TRUNC) + set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); file->private_data = af; _leave(" = 0"); @@ -170,6 +172,9 @@ int afs_release(struct inode *inode, struct file *file) _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + if ((file->f_mode & FMODE_WRITE)) + return vfs_fsync(file, 0); + file->private_data = NULL; if (af->wb) afs_put_wb_key(af->wb); @@ -187,10 +192,12 @@ void afs_put_read(struct afs_read *req) { int i; - if (atomic_dec_and_test(&req->usage)) { + if (refcount_dec_and_test(&req->usage)) { for (i = 0; i < req->nr_pages; i++) if (req->pages[i]) put_page(req->pages[i]); + if (req->pages != req->array) + kfree(req->pages); kfree(req); } } @@ -240,6 +247,12 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de ret = afs_end_vnode_operation(&fc); } + if (ret == 0) { + afs_stat_v(vnode, n_fetches); + atomic_long_add(desc->actual_len, + &afs_v2net(vnode)->n_fetch_bytes); + } + _leave(" = %d", ret); return ret; } @@ -297,10 +310,11 @@ int afs_page_filler(void *data, struct page *page) * end of the file, the server will return a short read and the * unmarshalling code will clear the unfilled space. */ - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->pos = (loff_t)page->index << PAGE_SHIFT; req->len = PAGE_SIZE; req->nr_pages = 1; + req->pages = req->array; req->pages[0] = page; get_page(page); @@ -309,10 +323,6 @@ int afs_page_filler(void *data, struct page *page) ret = afs_fetch_data(vnode, key, req); afs_put_read(req); - if (ret >= 0 && S_ISDIR(inode->i_mode) && - !afs_dir_check_page(inode, page)) - ret = -EIO; - if (ret < 0) { if (ret == -ENOENT) { _debug("got NOENT from server" @@ -447,10 +457,11 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, if (!req) return -ENOMEM; - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->page_done = afs_readpages_page_done; req->pos = first->index; req->pos <<= PAGE_SHIFT; + req->pages = req->array; /* Transfer the pages to the request. We add them in until one fails * to add to the LRU and then we stop (as that'll make a hole in the diff --git a/fs/afs/flock.c b/fs/afs/flock.c index c40ba2fe3cbe..7a0e017070ec 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -613,7 +613,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, false); if (ret < 0) goto error; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 88ec38c2d83c..efacdb7c1dee 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -16,6 +16,7 @@ #include <linux/iversion.h> #include "internal.h" #include "afs_fs.h" +#include "xdr_fs.h" static const struct afs_fid afs_zero_fid; @@ -44,109 +45,194 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) } /* - * decode an AFSFetchStatus block + * Dump a bad file status record. */ -static void xdr_decode_AFSFetchStatus(const __be32 **_bp, - struct afs_file_status *status, - struct afs_vnode *vnode, - afs_dataversion_t *store_version) +static void xdr_dump_bad(const __be32 *bp) { - afs_dataversion_t expected_version; - const __be32 *bp = *_bp; + __be32 x[4]; + int i; + + pr_notice("AFS XDR: Bad status record\n"); + for (i = 0; i < 5 * 4 * 4; i += 16) { + memcpy(x, bp, 16); + bp += 4; + pr_notice("%03x: %08x %08x %08x %08x\n", + i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); + } + + memcpy(x, bp, 4); + pr_notice("0x50: %08x\n", ntohl(x[0])); +} + +/* + * Update the core inode struct from a returned status record. + */ +void afs_update_inode_from_status(struct afs_vnode *vnode, + struct afs_file_status *status, + const afs_dataversion_t *expected_version, + u8 flags) +{ + struct timespec t; umode_t mode; + + t.tv_sec = status->mtime_client; + t.tv_nsec = 0; + vnode->vfs_inode.i_ctime = t; + vnode->vfs_inode.i_mtime = t; + vnode->vfs_inode.i_atime = t; + + if (flags & (AFS_VNODE_META_CHANGED | AFS_VNODE_NOT_YET_SET)) { + vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner); + vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group); + set_nlink(&vnode->vfs_inode, status->nlink); + + mode = vnode->vfs_inode.i_mode; + mode &= ~S_IALLUGO; + mode |= status->mode; + barrier(); + vnode->vfs_inode.i_mode = mode; + } + + if (!(flags & AFS_VNODE_NOT_YET_SET)) { + if (expected_version && + *expected_version != status->data_version) { + _debug("vnode modified %llx on {%x:%u} [exp %llx]", + (unsigned long long) status->data_version, + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long) *expected_version); + vnode->invalid_before = status->data_version; + if (vnode->status.type == AFS_FTYPE_DIR) { + if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + afs_stat_v(vnode, n_inval); + } else { + set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); + } + } else if (vnode->status.type == AFS_FTYPE_DIR) { + /* Expected directory change is handled elsewhere so + * that we can locally edit the directory and save on a + * download. + */ + if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) + flags &= ~AFS_VNODE_DATA_CHANGED; + } + } + + if (flags & (AFS_VNODE_DATA_CHANGED | AFS_VNODE_NOT_YET_SET)) { + inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); + i_size_write(&vnode->vfs_inode, status->size); + } +} + +/* + * decode an AFSFetchStatus block + */ +static int xdr_decode_AFSFetchStatus(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; u64 data_version, size; - bool changed = false; - kuid_t owner; - kgid_t group; + u32 type, abort_code; + u8 flags = 0; + int ret; if (vnode) write_seqlock(&vnode->cb_lock); -#define EXTRACT(DST) \ - do { \ - u32 x = ntohl(*bp++); \ - if (DST != x) \ - changed |= true; \ - DST = x; \ - } while (0) - - status->if_version = ntohl(*bp++); - EXTRACT(status->type); - EXTRACT(status->nlink); - size = ntohl(*bp++); - data_version = ntohl(*bp++); - EXTRACT(status->author); - owner = make_kuid(&init_user_ns, ntohl(*bp++)); - changed |= !uid_eq(owner, status->owner); - status->owner = owner; - EXTRACT(status->caller_access); /* call ticket dependent */ - EXTRACT(status->anon_access); - EXTRACT(status->mode); - bp++; /* parent.vnode */ - bp++; /* parent.unique */ - bp++; /* seg size */ - status->mtime_client = ntohl(*bp++); - status->mtime_server = ntohl(*bp++); - group = make_kgid(&init_user_ns, ntohl(*bp++)); - changed |= !gid_eq(group, status->group); - status->group = group; - bp++; /* sync counter */ - data_version |= (u64) ntohl(*bp++) << 32; - EXTRACT(status->lock_count); - size |= (u64) ntohl(*bp++) << 32; - bp++; /* spare 4 */ - *_bp = bp; + if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) { + pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); + goto bad; + } - if (size != status->size) { - status->size = size; - changed |= true; + type = ntohl(xdr->type); + abort_code = ntohl(xdr->abort_code); + switch (type) { + case AFS_FTYPE_FILE: + case AFS_FTYPE_DIR: + case AFS_FTYPE_SYMLINK: + if (type != status->type && + vnode && + !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { + pr_warning("Vnode %x:%x:%x changed type %u to %u\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + status->type, type); + goto bad; + } + status->type = type; + break; + case AFS_FTYPE_INVALID: + if (abort_code != 0) { + status->abort_code = abort_code; + ret = 0; + goto out; + } + /* Fall through */ + default: + goto bad; } - status->mode &= S_IALLUGO; - _debug("vnode time %lx, %lx", - status->mtime_client, status->mtime_server); +#define EXTRACT_M(FIELD) \ + do { \ + u32 x = ntohl(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_META_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) - if (vnode) { - if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - _debug("vnode changed"); - i_size_write(&vnode->vfs_inode, size); - vnode->vfs_inode.i_uid = status->owner; - vnode->vfs_inode.i_gid = status->group; - vnode->vfs_inode.i_generation = vnode->fid.unique; - set_nlink(&vnode->vfs_inode, status->nlink); - - mode = vnode->vfs_inode.i_mode; - mode &= ~S_IALLUGO; - mode |= status->mode; - barrier(); - vnode->vfs_inode.i_mode = mode; - } + EXTRACT_M(nlink); + EXTRACT_M(author); + EXTRACT_M(owner); + EXTRACT_M(caller_access); /* call ticket dependent */ + EXTRACT_M(anon_access); + EXTRACT_M(mode); + EXTRACT_M(group); + + status->mtime_client = ntohl(xdr->mtime_client); + status->mtime_server = ntohl(xdr->mtime_server); + status->lock_count = ntohl(xdr->lock_count); + + size = (u64)ntohl(xdr->size_lo); + size |= (u64)ntohl(xdr->size_hi) << 32; + status->size = size; + + data_version = (u64)ntohl(xdr->data_version_lo); + data_version |= (u64)ntohl(xdr->data_version_hi) << 32; + if (data_version != status->data_version) { + status->data_version = data_version; + flags |= AFS_VNODE_DATA_CHANGED; + } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; - vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; - vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; - inode_set_iversion_raw(&vnode->vfs_inode, data_version); + if (read_req) { + read_req->data_version = data_version; + read_req->file_size = size; } - expected_version = status->data_version; - if (store_version) - expected_version = *store_version; + *_bp = (const void *)*_bp + sizeof(*xdr); - if (expected_version != data_version) { - status->data_version = data_version; - if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - _debug("vnode modified %llx on {%x:%u}", - (unsigned long long) data_version, - vnode->fid.vid, vnode->fid.vnode); - set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); - set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); - } - } else if (store_version) { - status->data_version = data_version; + if (vnode) { + if (test_bit(AFS_VNODE_UNSET, &vnode->flags)) + flags |= AFS_VNODE_NOT_YET_SET; + afs_update_inode_from_status(vnode, status, expected_version, + flags); } + ret = 0; + +out: if (vnode) write_sequnlock(&vnode->cb_lock); + return ret; + +bad: + xdr_dump_bad(*_bp); + ret = afs_protocol_error(call, -EBADMSG); + goto out; } /* @@ -274,7 +360,7 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status(struct afs_call *call) +static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) { struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; @@ -288,7 +374,9 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -300,17 +388,18 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* * FS.FetchStatus operation type */ -static const struct afs_call_type afs_RXFSFetchStatus = { - .name = "FS.FetchStatus", +static const struct afs_call_type afs_RXFSFetchStatus_vnode = { + .name = "FS.FetchStatus(vnode)", .op = afs_FS_FetchStatus, - .deliver = afs_deliver_fs_fetch_status, + .deliver = afs_deliver_fs_fetch_status_vnode, .destructor = afs_flat_call_destructor, }; /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync) +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync, + bool new_inode) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -320,7 +409,8 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy _enter(",%x,{%x:%u},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode, + 16, (21 + 3 + 6) * 4); if (!call) { fc->ac.error = -ENOMEM; return -ENOMEM; @@ -329,6 +419,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy call->key = fc->key; call->reply[0] = vnode; call->reply[1] = volsync; + call->expected_version = new_inode ? 1 : vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -464,7 +555,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req) < 0) + return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -534,6 +627,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; + call->expected_version = vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -546,7 +640,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) bp[6] = 0; bp[7] = htonl(lower_32_bits(req->len)); - atomic_inc(&req->usage); + refcount_inc(&req->usage); call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); @@ -578,6 +672,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; + call->expected_version = vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -588,7 +683,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) bp[4] = htonl(lower_32_bits(req->pos)); bp[5] = htonl(lower_32_bits(req->len)); - atomic_inc(&req->usage); + refcount_inc(&req->usage); call->cb_break = fc->cb_break; afs_use_fs_server(call, fc->cbi); trace_afs_make_fs_call(call, &vnode->fid); @@ -613,8 +708,10 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || + xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -645,6 +742,7 @@ static const struct afs_call_type afs_RXFSMakeDir = { int afs_fs_create(struct afs_fs_cursor *fc, const char *name, umode_t mode, + u64 current_data_version, struct afs_fid *newfid, struct afs_file_status *newstatus, struct afs_callback *newcb) @@ -672,6 +770,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, call->reply[1] = newfid; call->reply[2] = newstatus; call->reply[3] = newcb; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -715,7 +814,9 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -742,7 +843,8 @@ static const struct afs_call_type afs_RXFSRemoveDir = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) +int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, + u64 current_data_version) { struct afs_vnode *vnode = fc->vnode; struct afs_call *call; @@ -764,6 +866,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) call->key = fc->key; call->reply[0] = vnode; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -801,8 +904,10 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || + xdr_decode_AFSFetchStatus(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -823,7 +928,7 @@ static const struct afs_call_type afs_RXFSLink = { * make a hard link */ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name) + const char *name, u64 current_data_version) { struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; @@ -844,6 +949,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, call->key = fc->key; call->reply[0] = dvnode; call->reply[1] = vnode; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -885,8 +991,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) || + xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -909,6 +1017,7 @@ static const struct afs_call_type afs_RXFSSymlink = { int afs_fs_symlink(struct afs_fs_cursor *fc, const char *name, const char *contents, + u64 current_data_version, struct afs_fid *newfid, struct afs_file_status *newstatus) { @@ -937,6 +1046,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, call->reply[0] = vnode; call->reply[1] = newfid; call->reply[2] = newstatus; + call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -987,10 +1097,13 @@ static int afs_deliver_fs_rename(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL); - if (new_dvnode != orig_dvnode) - xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, - NULL); + if (xdr_decode_AFSFetchStatus(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); + if (new_dvnode != orig_dvnode && + xdr_decode_AFSFetchStatus(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1013,7 +1126,9 @@ static const struct afs_call_type afs_RXFSRename = { int afs_fs_rename(struct afs_fs_cursor *fc, const char *orig_name, struct afs_vnode *new_dvnode, - const char *new_name) + const char *new_name, + u64 current_orig_data_version, + u64 current_new_data_version) { struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; @@ -1041,6 +1156,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc, call->key = fc->key; call->reply[0] = orig_dvnode; call->reply[1] = new_dvnode; + call->expected_version = current_orig_data_version + 1; + call->expected_version_2 = current_new_data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1089,8 +1206,9 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, - &call->store_version); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ afs_pages_written_back(vnode, call); @@ -1147,7 +1265,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1222,7 +1340,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, call->first_offset = offset; call->last_to = to; call->send_pages = true; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1252,7 +1370,6 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, */ static int afs_deliver_fs_store_status(struct afs_call *call) { - afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1264,12 +1381,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call) return ret; /* unmarshall the reply once we've received all of it */ - store_version = NULL; - if (call->operation_ID == FSSTOREDATA) - store_version = &call->store_version; - bp = call->buffer; - xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); + if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1324,7 +1439,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1373,7 +1488,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; - call->store_version = vnode->status.data_version + 1; + call->expected_version = vnode->status.data_version + 1; /* marshall the parameters */ bp = call->request; @@ -1418,6 +1533,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) call->key = fc->key; call->reply[0] = vnode; + call->expected_version = vnode->status.data_version; /* marshall the parameters */ bp = call->request; @@ -1471,7 +1587,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -1518,7 +1634,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -1565,7 +1681,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->offset = 0; call->unmarshall++; @@ -1947,3 +2063,265 @@ int afs_fs_get_capabilities(struct afs_net *net, trace_afs_make_fs_call(call, NULL); return afs_make_call(ac, call, GFP_NOFS, false); } + +/* + * Deliver reply data to an FS.FetchStatus with no vnode. + */ +static int afs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_file_status *status = call->reply[1]; + struct afs_callback *callback = call->reply[2]; + struct afs_volsync *volsync = call->reply[3]; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_AFSFetchStatus(call, &bp, status, vnode, + &call->expected_version, NULL); + callback[call->count].version = ntohl(bp[0]); + callback[call->count].expiry = ntohl(bp[1]); + callback[call->count].type = ntohl(bp[2]); + if (vnode) + xdr_decode_AFSCallBack(call, vnode, &bp); + else + bp += 3; + if (volsync) + xdr_decode_AFSVolSync(&bp, volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.FetchStatus operation type + */ +static const struct afs_call_type afs_RXFSFetchStatus = { + .name = "FS.FetchStatus", + .op = afs_FS_FetchStatus, + .deliver = afs_deliver_fs_fetch_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a fid without needing a vnode handle. + */ +int afs_fs_fetch_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fid, + struct afs_file_status *status, + struct afs_callback *callback, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%x:%u},,", + key_serial(fc->key), fid->vid, fid->vnode); + + call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = status; + call->reply[2] = callback; + call->reply[3] = volsync; + call->expected_version = 1; /* vnode->status.data_version */ + + /* marshall the parameters */ + bp = call->request; + bp[0] = htonl(FSFETCHSTATUS); + bp[1] = htonl(fid->vid); + bp[2] = htonl(fid->vnode); + bp[3] = htonl(fid->unique); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.InlineBulkStatus call + */ +static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) +{ + struct afs_file_status *statuses; + struct afs_callback *callbacks; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + u32 tmp; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the file status count and array in two steps */ + case 1: + _debug("extract status count"); + ret = afs_extract_data(call, &call->tmp, 4, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("status count: %u/%u", tmp, call->count2); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG); + + call->count = 0; + call->unmarshall++; + more_counts: + call->offset = 0; + + case 2: + _debug("extract status array %u", call->count); + ret = afs_extract_data(call, call->buffer, 21 * 4, true); + if (ret < 0) + return ret; + + bp = call->buffer; + statuses = call->reply[1]; + if (xdr_decode_AFSFetchStatus(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL) < 0) + return afs_protocol_error(call, -EBADMSG); + + call->count++; + if (call->count < call->count2) + goto more_counts; + + call->count = 0; + call->unmarshall++; + call->offset = 0; + + /* Extract the callback count and array in two steps */ + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, &call->tmp, 4, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("CB count: %u", tmp); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG); + call->count = 0; + call->unmarshall++; + more_cbs: + call->offset = 0; + + case 4: + _debug("extract CB array"); + ret = afs_extract_data(call, call->buffer, 3 * 4, true); + if (ret < 0) + return ret; + + _debug("unmarshall CB array"); + bp = call->buffer; + callbacks = call->reply[2]; + callbacks[call->count].version = ntohl(bp[0]); + callbacks[call->count].expiry = ntohl(bp[1]); + callbacks[call->count].type = ntohl(bp[2]); + statuses = call->reply[1]; + if (call->count == 0 && vnode && statuses[0].abort_code == 0) + xdr_decode_AFSCallBack(call, vnode, &bp); + call->count++; + if (call->count < call->count2) + goto more_cbs; + + call->offset = 0; + call->unmarshall++; + + case 5: + ret = afs_extract_data(call, call->buffer, 6 * 4, false); + if (ret < 0) + return ret; + + bp = call->buffer; + if (call->reply[3]) + xdr_decode_AFSVolSync(&bp, call->reply[3]); + + call->offset = 0; + call->unmarshall++; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.InlineBulkStatus operation type + */ +static const struct afs_call_type afs_RXFSInlineBulkStatus = { + .name = "FS.InlineBulkStatus", + .op = afs_FS_InlineBulkStatus, + .deliver = afs_deliver_fs_inline_bulk_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for up to 50 files + */ +int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fids, + struct afs_file_status *statuses, + struct afs_callback *callbacks, + unsigned int nr_fids, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + int i; + + _enter(",%x,{%x:%u},%u", + key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); + + call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus, + (2 + nr_fids * 3) * 4, + 21 * 4); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = statuses; + call->reply[2] = callbacks; + call->reply[3] = volsync; + call->count2 = nr_fids; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSINLINEBULKSTATUS); + *bp++ = htonl(nr_fids); + for (i = 0; i < nr_fids; i++) { + *bp++ = htonl(fids[i].vid); + *bp++ = htonl(fids[i].vnode); + *bp++ = htonl(fids[i].unique); + } + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &fids[0]); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 65c5b1edd338..06194cfe9724 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -30,12 +30,11 @@ static const struct inode_operations afs_symlink_inode_operations = { }; /* - * map the AFS file status to the inode member variables + * Initialise an inode from the vnode status. */ -static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) +static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key) { struct inode *inode = AFS_VNODE_TO_I(vnode); - bool changed; _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", vnode->status.type, @@ -46,16 +45,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) read_seqlock_excl(&vnode->cb_lock); + afs_update_inode_from_status(vnode, &vnode->status, NULL, + AFS_VNODE_NOT_YET_SET); + switch (vnode->status.type) { case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | vnode->status.mode; inode->i_op = &afs_file_inode_operations; inode->i_fop = &afs_file_operations; + inode->i_mapping->a_ops = &afs_fs_aops; break; case AFS_FTYPE_DIR: inode->i_mode = S_IFDIR | vnode->status.mode; inode->i_op = &afs_dir_inode_operations; inode->i_fop = &afs_dir_file_operations; + inode->i_mapping->a_ops = &afs_dir_aops; break; case AFS_FTYPE_SYMLINK: /* Symlinks with a mode of 0644 are actually mountpoints. */ @@ -67,45 +71,31 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; inode->i_fop = &afs_mntpt_file_operations; + inode->i_mapping->a_ops = &afs_fs_aops; } else { inode->i_mode = S_IFLNK | vnode->status.mode; inode->i_op = &afs_symlink_inode_operations; + inode->i_mapping->a_ops = &afs_fs_aops; } inode_nohighmem(inode); break; default: printk("kAFS: AFS vnode with undefined type\n"); read_sequnlock_excl(&vnode->cb_lock); - return -EBADMSG; + return afs_protocol_error(NULL, -EBADMSG); } - changed = (vnode->status.size != inode->i_size); - - set_nlink(inode, vnode->status.nlink); - inode->i_uid = vnode->status.owner; - inode->i_gid = vnode->status.group; - inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_client; - inode->i_ctime.tv_nsec = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; - inode->i_generation = vnode->fid.unique; - inode_set_iversion_raw(inode, vnode->status.data_version); - inode->i_mapping->a_ops = &afs_fs_aops; + vnode->invalid_before = vnode->status.data_version; read_sequnlock_excl(&vnode->cb_lock); - -#ifdef CONFIG_AFS_FSCACHE - if (changed) - fscache_attr_changed(vnode->cache); -#endif return 0; } /* * Fetch file status from the volume. */ -int afs_fetch_status(struct afs_vnode *vnode, struct key *key) +int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) { struct afs_fs_cursor fc; int ret; @@ -119,7 +109,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key) if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = vnode->cb_break + vnode->cb_s_break; - afs_fs_fetch_file_status(&fc, NULL); + afs_fs_fetch_file_status(&fc, NULL, new_inode); } afs_check_for_remote_deletion(&fc, fc.vnode); @@ -255,6 +245,11 @@ static void afs_get_inode_cache(struct afs_vnode *vnode) } __packed key; struct afs_vnode_cache_aux aux; + if (vnode->status.type == AFS_FTYPE_DIR) { + vnode->cache = NULL; + return; + } + key.vnode_id = vnode->fid.vnode; key.unique = vnode->fid.unique; key.vnode_id_ext[0] = 0; @@ -307,7 +302,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, true); if (ret < 0) goto bad_inode; } else { @@ -331,15 +326,12 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_expires_at += ktime_get_real_seconds(); } - /* set up caching before mapping the status, as map-status reads the - * first page of symlinks to see if they're really mountpoints */ - inode->i_size = vnode->status.size; - afs_get_inode_cache(vnode); - - ret = afs_inode_map_status(vnode, key); + ret = afs_inode_init_from_status(vnode, key); if (ret < 0) goto bad_inode; + afs_get_inode_cache(vnode); + /* success */ clear_bit(AFS_VNODE_UNSET, &vnode->flags); inode->i_flags |= S_NOATIME; @@ -349,10 +341,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* failure */ bad_inode: -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(vnode->cache, NULL, ret == -ENOENT); - vnode->cache = NULL; -#endif iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); @@ -407,8 +395,11 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; - } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) && - !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && + } else if (vnode->status.type == AFS_FTYPE_DIR && + test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) && + vnode->cb_expires_at - 10 > now) { + valid = true; + } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && vnode->cb_expires_at - 10 > now) { valid = true; } @@ -432,7 +423,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * access */ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, false); if (ret < 0) { if (ret == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -453,8 +444,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * different */ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - - clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); mutex_unlock(&vnode->validate_lock); valid: _leave(" = 0"); @@ -544,7 +533,7 @@ void afs_evict_inode(struct inode *inode) } #endif - afs_put_permits(vnode->permit_cache); + afs_put_permits(rcu_access_pointer(vnode->permit_cache)); _leave(""); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a6a1d75eee41..f8086ec95e24 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -122,7 +122,8 @@ struct afs_call { u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ - afs_dataversion_t store_version; /* updated version expected from store */ + afs_dataversion_t expected_version; /* Updated version expected from store */ + afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */ }; struct afs_call_type { @@ -173,11 +174,14 @@ struct afs_read { loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ loff_t remain; /* Amount remaining */ - atomic_t usage; + loff_t file_size; /* File size returned by server */ + afs_dataversion_t data_version; /* Version number returned by server */ + refcount_t usage; unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); - struct page *pages[]; + struct page **pages; + struct page *array[]; }; /* @@ -199,6 +203,18 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) extern struct file_system_type afs_fs_type; /* + * Set of substitutes for @sys. + */ +struct afs_sysnames { +#define AFS_NR_SYSNAME 16 + char *subs[AFS_NR_SYSNAME]; + refcount_t usage; + unsigned short nr; + short error; + char blank[1]; +}; + +/* * AFS network namespace record. */ struct afs_net { @@ -245,9 +261,25 @@ struct afs_net { struct mutex lock_manager_mutex; /* Misc */ - struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ + struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ + struct afs_sysnames *sysnames; + rwlock_t sysnames_lock; + + /* Statistics counters */ + atomic_t n_lookup; /* Number of lookups done */ + atomic_t n_reval; /* Number of dentries needing revalidation */ + atomic_t n_inval; /* Number of invalidations by the server */ + atomic_t n_relpg; /* Number of invalidations by releasepage */ + atomic_t n_read_dir; /* Number of directory pages read */ + atomic_t n_dir_cr; /* Number of directory entry creation edits */ + atomic_t n_dir_rm; /* Number of directory entry removal edits */ + atomic_t n_stores; /* Number of store ops */ + atomic_long_t n_store_bytes; /* Number of bytes stored */ + atomic_long_t n_fetch_bytes; /* Number of bytes fetched */ + atomic_t n_fetches; /* Number of data fetch ops */ }; +extern const char afs_init_sysname[]; extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns enum afs_cell_state { @@ -363,6 +395,7 @@ struct afs_server { #define AFS_SERVER_FL_UPDATING 4 #define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ +#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ atomic_t usage; u32 addr_version; /* Address list version */ @@ -455,10 +488,11 @@ struct afs_vnode { struct afs_volume *volume; /* volume on which vnode resides */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ + afs_dataversion_t invalid_before; /* Child dentries are invalid before this */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - struct afs_permits *permit_cache; /* cache of permits so far obtained */ + struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ spinlock_t wb_lock; /* lock for wb_keys */ @@ -466,12 +500,13 @@ struct afs_vnode { unsigned long flags; #define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ #define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ -#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */ +#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */ #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ #define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ +#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ @@ -611,7 +646,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; */ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); -extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]); +extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); @@ -646,11 +681,26 @@ extern bool afs_cm_incoming_call(struct afs_call *); */ extern const struct file_operations afs_dir_file_operations; extern const struct inode_operations afs_dir_inode_operations; +extern const struct address_space_operations afs_dir_aops; +extern const struct dentry_operations afs_fs_dentry_operations; + +extern void afs_d_release(struct dentry *); + +/* + * dir_edit.c + */ +extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *, + enum afs_edit_dir_reason); +extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason); + +/* + * dynroot.c + */ extern const struct file_operations afs_dynroot_file_operations; extern const struct inode_operations afs_dynroot_inode_operations; -extern const struct dentry_operations afs_fs_dentry_operations; +extern const struct dentry_operations afs_dynroot_dentry_operations; -extern bool afs_dir_check_page(struct inode *, struct page *); +extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *); /* * file.c @@ -680,17 +730,23 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *); +#define AFS_VNODE_NOT_YET_SET 0x01 +#define AFS_VNODE_META_CHANGED 0x02 +#define AFS_VNODE_DATA_CHANGED 0x04 +extern void afs_update_inode_from_status(struct afs_vnode *, struct afs_file_status *, + const afs_dataversion_t *, u8); + +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool); extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); -extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, +extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64, struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool); -extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *); -extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, +extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64); +extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); +extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, struct afs_fid *, struct afs_file_status *); extern int afs_fs_rename(struct afs_fs_cursor *, const char *, - struct afs_vnode *, const char *); + struct afs_vnode *, const char *, u64, u64); extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, pgoff_t, pgoff_t, unsigned, unsigned); extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); @@ -702,11 +758,18 @@ extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); +extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, unsigned int, + struct afs_volsync *); +extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, struct afs_volsync *); /* * inode.c */ -extern int afs_fetch_status(struct afs_vnode *, struct key *); +extern int afs_fetch_status(struct afs_vnode *, struct key *, bool); extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); extern struct inode *afs_iget(struct super_block *, struct key *, @@ -754,6 +817,13 @@ static inline void afs_put_net(struct afs_net *net) { } +static inline void __afs_stat(atomic_t *s) +{ + atomic_inc(s); +} + +#define afs_stat_v(vnode, n) __afs_stat(&afs_v2net(vnode)->n) + /* * misc.c */ @@ -781,6 +851,7 @@ extern int __net_init afs_proc_init(struct afs_net *); extern void __net_exit afs_proc_cleanup(struct afs_net *); extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); +extern void afs_put_sysnames(struct afs_sysnames *); /* * rotate.c @@ -809,6 +880,7 @@ extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, void *, size_t, bool); +extern int afs_protocol_error(struct afs_call *, int); static inline int afs_transfer_reply(struct afs_call *call) { @@ -955,7 +1027,6 @@ extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); -extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern int afs_page_mkwrite(struct vm_fault *); extern void afs_prune_wb_keys(struct afs_vnode *); diff --git a/fs/afs/main.c b/fs/afs/main.c index 15a02a05ff40..d7560168b3bf 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -34,11 +34,42 @@ MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); struct workqueue_struct *afs_wq; struct afs_net __afs_net; +#if defined(CONFIG_ALPHA) +const char afs_init_sysname[] = "alpha_linux26"; +#elif defined(CONFIG_X86_64) +const char afs_init_sysname[] = "amd64_linux26"; +#elif defined(CONFIG_ARM) +const char afs_init_sysname[] = "arm_linux26"; +#elif defined(CONFIG_ARM64) +const char afs_init_sysname[] = "aarch64_linux26"; +#elif defined(CONFIG_X86_32) +const char afs_init_sysname[] = "i386_linux26"; +#elif defined(CONFIG_IA64) +const char afs_init_sysname[] = "ia64_linux26"; +#elif defined(CONFIG_PPC64) +const char afs_init_sysname[] = "ppc64_linux26"; +#elif defined(CONFIG_PPC32) +const char afs_init_sysname[] = "ppc_linux26"; +#elif defined(CONFIG_S390) +#ifdef CONFIG_64BIT +const char afs_init_sysname[] = "s390x_linux26"; +#else +const char afs_init_sysname[] = "s390_linux26"; +#endif +#elif defined(CONFIG_SPARC64) +const char afs_init_sysname[] = "sparc64_linux26"; +#elif defined(CONFIG_SPARC32) +const char afs_init_sysname[] = "sparc_linux26"; +#else +const char afs_init_sysname[] = "unknown_linux26"; +#endif + /* * Initialise an AFS network namespace record. */ static int __net_init afs_net_init(struct afs_net *net) { + struct afs_sysnames *sysnames; int ret; net->live = true; @@ -67,6 +98,16 @@ static int __net_init afs_net_init(struct afs_net *net) INIT_WORK(&net->fs_manager, afs_manage_servers); timer_setup(&net->fs_timer, afs_servers_timer, 0); + ret = -ENOMEM; + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); + if (!sysnames) + goto error_sysnames; + sysnames->subs[0] = (char *)&afs_init_sysname; + sysnames->nr = 1; + refcount_set(&sysnames->usage, 1); + net->sysnames = sysnames; + rwlock_init(&net->sysnames_lock); + /* Register the /proc stuff */ ret = afs_proc_init(net); if (ret < 0) @@ -92,6 +133,8 @@ error_cell_init: net->live = false; afs_proc_cleanup(net); error_proc: + afs_put_sysnames(net->sysnames); +error_sysnames: net->live = false; return ret; } @@ -106,6 +149,7 @@ static void __net_exit afs_net_exit(struct afs_net *net) afs_purge_servers(net); afs_close_socket(net); afs_proc_cleanup(net); + afs_put_sysnames(net->sysnames); } /* diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 4508dd54f789..839a22280606 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -126,6 +126,34 @@ static const struct file_operations afs_proc_servers_fops = { .release = seq_release, }; +static int afs_proc_sysname_open(struct inode *inode, struct file *file); +static int afs_proc_sysname_release(struct inode *inode, struct file *file); +static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos); +static void *afs_proc_sysname_next(struct seq_file *p, void *v, + loff_t *pos); +static void afs_proc_sysname_stop(struct seq_file *p, void *v); +static int afs_proc_sysname_show(struct seq_file *m, void *v); +static ssize_t afs_proc_sysname_write(struct file *file, + const char __user *buf, + size_t size, loff_t *_pos); + +static const struct seq_operations afs_proc_sysname_ops = { + .start = afs_proc_sysname_start, + .next = afs_proc_sysname_next, + .stop = afs_proc_sysname_stop, + .show = afs_proc_sysname_show, +}; + +static const struct file_operations afs_proc_sysname_fops = { + .open = afs_proc_sysname_open, + .read = seq_read, + .llseek = seq_lseek, + .release = afs_proc_sysname_release, + .write = afs_proc_sysname_write, +}; + +static const struct file_operations afs_proc_stats_fops; + /* * initialise the /proc/fs/afs/ directory */ @@ -139,7 +167,9 @@ int afs_proc_init(struct afs_net *net) if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || - !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops)) + !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) || + !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) || + !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops)) goto error_tree; _leave(" = 0"); @@ -183,6 +213,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) * first item */ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) { struct afs_net *net = afs_seq2net(m); @@ -204,6 +235,7 @@ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) * clean up after reading from the cells list */ static void afs_proc_cells_stop(struct seq_file *m, void *v) + __releases(rcu) { rcu_read_unlock(); } @@ -282,7 +314,8 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto done; } - set_bit(AFS_CELL_FL_NO_GC, &cell->flags); + if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) + afs_put_cell(net, cell); printk("kAFS: Added new cell '%s'\n", name); } else { goto inval; @@ -304,7 +337,40 @@ inval: static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf, size_t size, loff_t *_pos) { - return 0; + struct afs_cell *cell; + struct afs_net *net = afs_proc2net(file); + unsigned int seq = 0; + char name[AFS_MAXCELLNAME + 1]; + int len; + + if (*_pos > 0) + return 0; + if (!net->ws_cell) + return 0; + + rcu_read_lock(); + do { + read_seqbegin_or_lock(&net->cells_lock, &seq); + len = 0; + cell = rcu_dereference_raw(net->ws_cell); + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len); + } + } while (need_seqretry(&net->cells_lock, seq)); + done_seqretry(&net->cells_lock, seq); + rcu_read_unlock(); + + if (!len) + return 0; + + name[len++] = '\n'; + if (len > size) + len = size; + if (copy_to_user(buf, name, len) != 0) + return -EFAULT; + *_pos = 1; + return len; } /* @@ -327,6 +393,12 @@ static ssize_t afs_proc_rootcell_write(struct file *file, if (IS_ERR(kbuf)) return PTR_ERR(kbuf); + ret = -EINVAL; + if (kbuf[0] == '.') + goto out; + if (memchr(kbuf, '/', size)) + goto out; + /* trim to first NL */ s = memchr(kbuf, '\n', size); if (s) @@ -339,6 +411,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file, if (ret >= 0) ret = size; /* consume everything, always */ +out: kfree(kbuf); _leave(" = %d", ret); return ret; @@ -413,6 +486,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file) * first item */ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) + __acquires(cell->proc_lock) { struct afs_cell *cell = m->private; @@ -438,6 +512,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) + __releases(cell->proc_lock) { struct afs_cell *cell = p->private; @@ -500,6 +575,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) * first item */ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) { struct afs_addr_list *alist; struct afs_cell *cell = m->private; @@ -544,6 +620,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) + __releases(rcu) { rcu_read_unlock(); } @@ -580,6 +657,7 @@ static int afs_proc_servers_open(struct inode *inode, struct file *file) * first item. */ static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) + __acquires(rcu) { struct afs_net *net = afs_seq2net(m); @@ -601,6 +679,7 @@ static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) * clean up after reading from the cells list */ static void afs_proc_servers_stop(struct seq_file *p, void *v) + __releases(rcu) { rcu_read_unlock(); } @@ -626,3 +705,244 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) &alist->addrs[alist->index].transport); return 0; } + +void afs_put_sysnames(struct afs_sysnames *sysnames) +{ + int i; + + if (sysnames && refcount_dec_and_test(&sysnames->usage)) { + for (i = 0; i < sysnames->nr; i++) + if (sysnames->subs[i] != afs_init_sysname && + sysnames->subs[i] != sysnames->blank) + kfree(sysnames->subs[i]); + } +} + +/* + * Handle opening of /proc/fs/afs/sysname. If it is opened for writing, we + * assume the caller wants to change the substitution list and we allocate a + * buffer to hold the list. + */ +static int afs_proc_sysname_open(struct inode *inode, struct file *file) +{ + struct afs_sysnames *sysnames; + struct seq_file *m; + int ret; + + ret = seq_open(file, &afs_proc_sysname_ops); + if (ret < 0) + return ret; + + if (file->f_mode & FMODE_WRITE) { + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); + if (!sysnames) { + seq_release(inode, file); + return -ENOMEM; + } + + refcount_set(&sysnames->usage, 1); + m = file->private_data; + m->private = sysnames; + } + + return 0; +} + +/* + * Handle writes to /proc/fs/afs/sysname to set the @sys substitution. + */ +static ssize_t afs_proc_sysname_write(struct file *file, + const char __user *buf, + size_t size, loff_t *_pos) +{ + struct afs_sysnames *sysnames; + struct seq_file *m = file->private_data; + char *kbuf = NULL, *s, *p, *sub; + int ret, len; + + sysnames = m->private; + if (!sysnames) + return -EINVAL; + if (sysnames->error) + return sysnames->error; + + if (size >= PAGE_SIZE - 1) { + sysnames->error = -EINVAL; + return -EINVAL; + } + if (size == 0) + return 0; + + kbuf = memdup_user_nul(buf, size); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + inode_lock(file_inode(file)); + + p = kbuf; + while ((s = strsep(&p, " \t\n"))) { + len = strlen(s); + if (len == 0) + continue; + ret = -ENAMETOOLONG; + if (len >= AFSNAMEMAX) + goto error; + + if (len >= 4 && + s[len - 4] == '@' && + s[len - 3] == 's' && + s[len - 2] == 'y' && + s[len - 1] == 's') + /* Protect against recursion */ + goto invalid; + + if (s[0] == '.' && + (len < 2 || (len == 2 && s[1] == '.'))) + goto invalid; + + if (memchr(s, '/', len)) + goto invalid; + + ret = -EFBIG; + if (sysnames->nr >= AFS_NR_SYSNAME) + goto out; + + if (strcmp(s, afs_init_sysname) == 0) { + sub = (char *)afs_init_sysname; + } else { + ret = -ENOMEM; + sub = kmemdup(s, len + 1, GFP_KERNEL); + if (!sub) + goto out; + } + + sysnames->subs[sysnames->nr] = sub; + sysnames->nr++; + } + + ret = size; /* consume everything, always */ +out: + inode_unlock(file_inode(file)); + kfree(kbuf); + return ret; + +invalid: + ret = -EINVAL; +error: + sysnames->error = ret; + goto out; +} + +static int afs_proc_sysname_release(struct inode *inode, struct file *file) +{ + struct afs_sysnames *sysnames, *kill = NULL; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net(m); + + sysnames = m->private; + if (sysnames) { + if (!sysnames->error) { + kill = sysnames; + if (sysnames->nr == 0) { + sysnames->subs[0] = sysnames->blank; + sysnames->nr++; + } + write_lock(&net->sysnames_lock); + kill = net->sysnames; + net->sysnames = sysnames; + write_unlock(&net->sysnames_lock); + } + afs_put_sysnames(kill); + } + + return seq_release(inode, file); +} + +static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) + __acquires(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; + + read_lock(&net->sysnames_lock); + + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} + +static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; + + *pos += 1; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} + +static void afs_proc_sysname_stop(struct seq_file *m, void *v) + __releases(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); + + read_unlock(&net->sysnames_lock); +} + +static int afs_proc_sysname_show(struct seq_file *m, void *v) +{ + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *sysnames = net->sysnames; + unsigned int i = (unsigned long)v - 1; + + if (i < sysnames->nr) + seq_printf(m, "%s\n", sysnames->subs[i]); + return 0; +} + +/* + * Display general per-net namespace statistics + */ +static int afs_proc_stats_show(struct seq_file *m, void *v) +{ + struct afs_net *net = afs_seq2net(m); + + seq_puts(m, "kAFS statistics\n"); + + seq_printf(m, "dir-mgmt: look=%u reval=%u inval=%u relpg=%u\n", + atomic_read(&net->n_lookup), + atomic_read(&net->n_reval), + atomic_read(&net->n_inval), + atomic_read(&net->n_relpg)); + + seq_printf(m, "dir-data: rdpg=%u\n", + atomic_read(&net->n_read_dir)); + + seq_printf(m, "dir-edit: cr=%u rm=%u\n", + atomic_read(&net->n_dir_cr), + atomic_read(&net->n_dir_rm)); + + seq_printf(m, "file-rd : n=%u nb=%lu\n", + atomic_read(&net->n_fetches), + atomic_long_read(&net->n_fetch_bytes)); + seq_printf(m, "file-wr : n=%u nb=%lu\n", + atomic_read(&net->n_stores), + atomic_long_read(&net->n_store_bytes)); + return 0; +} + +/* + * Open "/proc/fs/afs/stats" to allow reading of the stat counters. + */ +static int afs_proc_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, afs_proc_stats_show, NULL); +} + +static const struct file_operations afs_proc_stats_fops = { + .open = afs_proc_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ad1328d85526..ac0feac9d746 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -21,7 +21,7 @@ /* * Initialise a filesystem server cursor for iterating over FS servers. */ -void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) { memset(fc, 0, sizeof(*fc)); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index f7ae54b6a393..5c6263972ec9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -926,3 +926,12 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, afs_set_call_complete(call, ret, remote_abort); return ret; } + +/* + * Log protocol error production. + */ +noinline int afs_protocol_error(struct afs_call *call, int error) +{ + trace_afs_protocol_error(call, error, __builtin_return_address(0)); + return error; +} diff --git a/fs/afs/security.c b/fs/afs/security.c index b88b7d45fdaa..cea2fff313dc 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -178,18 +178,14 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) { - rcu_read_unlock(); + if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) goto someone_else_changed_it; - } /* We need a ref on any permits list we want to copy as we'll have to * drop the lock to do memory allocation. */ - if (permits && !refcount_inc_not_zero(&permits->usage)) { - rcu_read_unlock(); + if (permits && !refcount_inc_not_zero(&permits->usage)) goto someone_else_changed_it; - } rcu_read_unlock(); @@ -278,6 +274,7 @@ someone_else_changed_it: /* Someone else changed the cache under us - don't recheck at this * time. */ + rcu_read_unlock(); return; } @@ -296,8 +293,6 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, _enter("{%x:%u},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); - permits = vnode->permit_cache; - /* check the permits to see if we've got one yet */ if (key == vnode->volume->cell->anonymous_key) { _debug("anon"); @@ -327,7 +322,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, */ _debug("no valid permit"); - ret = afs_fetch_status(vnode, key); + ret = afs_fetch_status(vnode, key, false); if (ret < 0) { *_access = 0; _leave(" = %d", ret); diff --git a/fs/afs/server.c b/fs/afs/server.c index a43ef77dabae..e23be63998a8 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -59,7 +59,8 @@ struct afs_server *afs_find_server(struct afs_net *net, alist = rcu_dereference(server->addresses); for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { b = &alist->addrs[i].transport.sin6; - diff = (u16)a->sin6_port - (u16)b->sin6_port; + diff = ((u16 __force)a->sin6_port - + (u16 __force)b->sin6_port); if (diff == 0) diff = memcmp(&a->sin6_addr, &b->sin6_addr, @@ -79,10 +80,11 @@ struct afs_server *afs_find_server(struct afs_net *net, alist = rcu_dereference(server->addresses); for (i = 0; i < alist->nr_ipv4; i++) { b = &alist->addrs[i].transport.sin6; - diff = (u16)a->sin6_port - (u16)b->sin6_port; + diff = ((u16 __force)a->sin6_port - + (u16 __force)b->sin6_port); if (diff == 0) - diff = ((u32)a->sin6_addr.s6_addr32[3] - - (u32)b->sin6_addr.s6_addr32[3]); + diff = ((u32 __force)a->sin6_addr.s6_addr32[3] - + (u32 __force)b->sin6_addr.s6_addr32[3]); if (diff == 0) goto found; if (diff < 0) { @@ -381,7 +383,7 @@ static void afs_server_rcu(struct rcu_head *rcu) { struct afs_server *server = container_of(rcu, struct afs_server, rcu); - afs_put_addrlist(server->addresses); + afs_put_addrlist(rcu_access_pointer(server->addresses)); kfree(server); } @@ -390,7 +392,7 @@ static void afs_server_rcu(struct rcu_head *rcu) */ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { - struct afs_addr_list *alist = server->addresses; + struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, .addr = &alist->addrs[0], diff --git a/fs/afs/super.c b/fs/afs/super.c index 3623c952b6ff..65081ec3c36e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -154,7 +154,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) seq_puts(m, "none"); return 0; } - + switch (volume->type) { case AFSVL_RWVOL: break; @@ -269,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params, int cellnamesz; _enter(",%s", name); - + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; @@ -418,7 +418,10 @@ static int afs_fill_super(struct super_block *sb, if (!sb->s_root) goto error; - sb->s_d_op = &afs_fs_dentry_operations; + if (params->dyn_root) + sb->s_d_op = &afs_dynroot_dentry_operations; + else + sb->s_d_op = &afs_fs_dentry_operations; _leave(" = 0"); return 0; @@ -676,7 +679,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = 0; return 0; } - + key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) return PTR_ERR(key); diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 5d8562f1ad4a..1ed7e2fd2f35 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -303,7 +303,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved); r->uuid.clock_seq_low = htonl(u->clock_seq_low); for (i = 0; i < 6; i++) - r->uuid.node[i] = ntohl(u->node[i]); + r->uuid.node[i] = htonl(u->node[i]); trace_afs_make_vl_call(call); return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); @@ -450,7 +450,7 @@ again: call->count2 = ntohl(*bp); /* Type or next count */ if (call->count > YFS_MAXENDPOINTS) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); if (!alist) @@ -474,7 +474,7 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } size += sizeof(__be32); @@ -487,24 +487,24 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); bp += 6; break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } /* Got either the type of the next entry or the count of * volEndpoints if no more fsEndpoints. */ - call->count2 = htonl(*bp++); + call->count2 = ntohl(*bp++); call->offset = 0; call->count--; @@ -517,7 +517,7 @@ again: if (!call->count) goto end; if (call->count > YFS_MAXENDPOINTS) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); call->unmarshall = 3; @@ -531,7 +531,7 @@ again: return ret; bp = call->buffer; - call->count2 = htonl(*bp++); + call->count2 = ntohl(*bp++); call->offset = 0; call->unmarshall = 4; @@ -545,7 +545,7 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } if (call->count > 1) @@ -558,16 +558,16 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); bp += 6; break; default: - return -EBADMSG; + return afs_protocol_error(call, -EBADMSG); } /* Got either the type of the next entry or the count of @@ -576,7 +576,7 @@ again: call->offset = 0; call->count--; if (call->count > 0) { - call->count2 = htonl(*bp++); + call->count2 = ntohl(*bp++); goto again; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 9370e2feb999..c164698dc304 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -42,10 +42,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, if (!req) return -ENOMEM; - atomic_set(&req->usage, 1); + refcount_set(&req->usage, 1); req->pos = pos; req->len = len; req->nr_pages = 1; + req->pages = req->array; req->pages[0] = page; get_page(page); @@ -124,7 +125,12 @@ try_again: page->index, priv); goto flush_conflicting_write; } - if (to < f || from > t) + /* If the file is being filled locally, allow inter-write + * spaces to be merged into writes. If it's not, only write + * back what the user gives us. + */ + if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && + (to < f || from > t)) goto flush_conflicting_write; if (from < f) f = from; @@ -355,6 +361,12 @@ found_key: } switch (ret) { + case 0: + afs_stat_v(vnode, n_stores); + atomic_long_add((last * PAGE_SIZE + to) - + (first * PAGE_SIZE + offset), + &afs_v2net(vnode)->n_store_bytes); + break; case -EACCES: case -EPERM: case -ENOKEY: @@ -412,7 +424,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, trace_afs_page_dirty(vnode, tracepoint_string("WARN"), primary_page->index, priv); - if (start >= final_page || to < PAGE_SIZE) + if (start >= final_page || + (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))) goto no_more; start++; @@ -433,9 +446,10 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, } for (loop = 0; loop < n; loop++) { - if (to != PAGE_SIZE) - break; page = pages[loop]; + if (to != PAGE_SIZE && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) + break; if (page->index > final_page) break; if (!trylock_page(page)) @@ -448,7 +462,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, priv = page_private(page); f = priv & AFS_PRIV_MAX; t = priv >> AFS_PRIV_SHIFT; - if (f != 0) { + if (f != 0 && + !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { unlock_page(page); break; } @@ -570,10 +585,11 @@ static int afs_writepages_region(struct address_space *mapping, _debug("wback %lx", page->index); - /* at this point we hold neither mapping->tree_lock nor lock on - * the page itself: the page may be truncated or invalidated - * (changing page->mapping to NULL), or even swizzled back from - * swapper_space to tmpfs file mapping + /* + * at this point we hold neither the i_pages lock nor the + * page lock: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled + * back from swapper_space to tmpfs file mapping */ ret = lock_page_killable(page); if (ret < 0) { @@ -734,20 +750,6 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) } /* - * Flush out all outstanding writes on a file opened for writing when it is - * closed. - */ -int afs_flush(struct file *file, fl_owner_t id) -{ - _enter(""); - - if ((file->f_mode & FMODE_WRITE) == 0) - return 0; - - return vfs_fsync(file, 0); -} - -/* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal */ diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h new file mode 100644 index 000000000000..aa21f3068d52 --- /dev/null +++ b/fs/afs/xdr_fs.h @@ -0,0 +1,103 @@ +/* AFS fileserver XDR types + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef XDR_FS_H +#define XDR_FS_H + +struct afs_xdr_AFSFetchStatus { + __be32 if_version; +#define AFS_FSTATUS_VERSION 1 + __be32 type; + __be32 nlink; + __be32 size_lo; + __be32 data_version_lo; + __be32 author; + __be32 owner; + __be32 caller_access; + __be32 anon_access; + __be32 mode; + __be32 parent_vnode; + __be32 parent_unique; + __be32 seg_size; + __be32 mtime_client; + __be32 mtime_server; + __be32 group; + __be32 sync_counter; + __be32 data_version_hi; + __be32 lock_count; + __be32 size_hi; + __be32 abort_code; +} __packed; + +#define AFS_DIR_HASHTBL_SIZE 128 +#define AFS_DIR_DIRENT_SIZE 32 +#define AFS_DIR_SLOTS_PER_BLOCK 64 +#define AFS_DIR_BLOCK_SIZE 2048 +#define AFS_DIR_BLOCKS_PER_PAGE (PAGE_SIZE / AFS_DIR_BLOCK_SIZE) +#define AFS_DIR_MAX_SLOTS 65536 +#define AFS_DIR_BLOCKS_WITH_CTR 128 +#define AFS_DIR_MAX_BLOCKS 1023 +#define AFS_DIR_RESV_BLOCKS 1 +#define AFS_DIR_RESV_BLOCKS0 13 + +/* + * Directory entry structure. + */ +union afs_xdr_dirent { + struct { + u8 valid; + u8 unused[1]; + __be16 hash_next; + __be32 vnode; + __be32 unique; + u8 name[16]; + u8 overflow[4]; /* if any char of the name (inc + * NUL) reaches here, consume + * the next dirent too */ + } u; + u8 extended_name[32]; +} __packed; + +/* + * Directory block header (one at the beginning of every 2048-byte block). + */ +struct afs_xdr_dir_hdr { + __be16 npages; + __be16 magic; +#define AFS_DIR_MAGIC htons(1234) + u8 reserved; + u8 bitmap[8]; + u8 pad[19]; +} __packed; + +/* + * Directory block layout + */ +union afs_xdr_dir_block { + struct afs_xdr_dir_hdr hdr; + + struct { + struct afs_xdr_dir_hdr hdr; + u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS]; + __be16 hashtable[AFS_DIR_HASHTBL_SIZE]; + } meta; + + union afs_xdr_dirent dirents[AFS_DIR_SLOTS_PER_BLOCK]; +} __packed; + +/* + * Directory layout on a linux VM page. + */ +struct afs_xdr_dir_page { + union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; +}; + +#endif /* XDR_FS_H */ diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index a0c57c37fa21..be9c3dc048ab 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -19,9 +19,6 @@ */ static autofs_wqt_t autofs4_next_wait_queue = 1; -/* These are the signals we allow interrupting a pending mount */ -#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT)) - void autofs4_catatonic_mode(struct autofs_sb_info *sbi) { struct autofs_wait_queue *wq, *nwq; @@ -486,29 +483,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, * wq->name.name is NULL iff the lock is already released * or the mount has been made catatonic. */ - if (wq->name.name) { - /* Block all but "shutdown" signals while waiting */ - unsigned long shutdown_sigs_mask; - unsigned long irqflags; - sigset_t oldset; - - spin_lock_irqsave(¤t->sighand->siglock, irqflags); - oldset = current->blocked; - shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0]; - siginitsetinv(¤t->blocked, shutdown_sigs_mask); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); - - wait_event_interruptible(wq->queue, wq->name.name == NULL); - - spin_lock_irqsave(¤t->sighand->siglock, irqflags); - current->blocked = oldset; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); - } else { - pr_debug("skipped sleeping\n"); - } - + wait_event_killable(wq->queue, wq->name.name == NULL); status = wq->status; /* @@ -574,7 +549,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok kfree(wq->name.name); wq->name.name = NULL; /* Do not wait on this queue */ wq->status = status; - wake_up_interruptible(&wq->queue); + wake_up(&wq->queue); if (!--wq->wait_ctr) kfree(wq); mutex_unlock(&sbi->wq_mutex); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ce1824f47ba6..c3deb2e35f20 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -330,6 +330,7 @@ beyond_if: #ifdef __alpha__ regs->gp = ex.a_gpvalue; #endif + finalize_exec(bprm); start_thread(regs, ex.a_entry, current->mm->start_stack); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bdb201230bae..41e04183e4ce 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -377,6 +377,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, } else map_addr = vm_mmap(filep, addr, size, prot, type, off); + if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr)) + pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n", + task_pid_nr(current), current->comm, + (void *)addr); + return(map_addr); } @@ -575,7 +580,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, elf_prot |= PROT_EXEC; vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED; + elf_type |= MAP_FIXED_NOREPLACE; else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr; @@ -890,7 +895,7 @@ static int load_elf_binary(struct linux_binprm *bprm) the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { - int elf_prot = 0, elf_flags; + int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE; unsigned long k, vaddr; unsigned long total_size = 0; @@ -922,6 +927,13 @@ static int load_elf_binary(struct linux_binprm *bprm) */ } } + + /* + * Some binaries have overlapping elf segments and then + * we have to forcefully map over an existing mapping + * e.g. over this newly established brk mapping. + */ + elf_fixed = MAP_FIXED; } if (elf_ppnt->p_flags & PF_R) @@ -939,7 +951,7 @@ static int load_elf_binary(struct linux_binprm *bprm) * the ET_DYN load_addr calculations, proceed normally. */ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { - elf_flags |= MAP_FIXED; + elf_flags |= elf_fixed; } else if (loc->elf_ex.e_type == ET_DYN) { /* * This logic is run once for the first LOAD Program @@ -975,7 +987,7 @@ static int load_elf_binary(struct linux_binprm *bprm) load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); - elf_flags |= MAP_FIXED; + elf_flags |= elf_fixed; } else load_bias = 0; @@ -1155,6 +1167,7 @@ static int load_elf_binary(struct linux_binprm *bprm) ELF_PLAT_INIT(regs, reloc_func_desc); #endif + finalize_exec(bprm); start_thread(regs, elf_entry, bprm->p); retval = 0; out: @@ -1234,7 +1247,7 @@ static int load_elf_library(struct file *file) (eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr)), PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE, (eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr))); if (error != ELF_PAGESTART(eppnt->p_vaddr)) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 429326b6e2e7..d90993adeffa 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -463,6 +463,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) dynaddr); #endif + finalize_exec(bprm); /* everything is now ready... get the userspace context ready to roll */ entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; start_thread(regs, entryaddr, current->mm->start_stack); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 5d6b94475f27..82a48e830018 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -994,6 +994,7 @@ static int load_flat_binary(struct linux_binprm *bprm) FLAT_PLAT_INIT(regs); #endif + finalize_exec(bprm); pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n", regs, start_addr, current->mm->start_stack); start_thread(regs, start_addr, current->mm->start_stack); diff --git a/fs/block_dev.c b/fs/block_dev.c index 7a506c55a993..7ec920e27065 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1948,11 +1948,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) static int blkdev_writepages(struct address_space *mapping, struct writeback_control *wbc) { - if (dax_mapping(mapping)) { - struct block_device *bdev = I_BDEV(mapping->host); - - return dax_writeback_mapping_range(mapping, bdev, wbc); - } return generic_writepages(mapping, wbc); } diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 167e5dc7eadd..23537bc8c827 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + config BTRFS_FS tristate "Btrfs filesystem support" select LIBCRC32C diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 0066d95b133f..15e1dfef56a5 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Red Hat. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index d5540749f0e5..d522494698fa 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. * Copyright (C) 2014 Fujitsu. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kthread.h> diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index fc957e00cef1..7861c9feba5f 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -1,24 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. * Copyright (C) 2014 Fujitsu. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_ASYNC_THREAD_ -#define __BTRFS_ASYNC_THREAD_ +#ifndef BTRFS_ASYNC_THREAD_H +#define BTRFS_ASYNC_THREAD_H + #include <linux/workqueue.h> struct btrfs_fs_info; @@ -85,4 +73,5 @@ void btrfs_set_work_high_priority(struct btrfs_work *work); struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work); struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq); bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq); + #endif diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 571024bc632e..0a8e2e29a66b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/mm.h> diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 0a30028d5196..54d58988483a 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2011 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_BACKREF__ -#define __BTRFS_BACKREF__ +#ifndef BTRFS_BACKREF_H +#define BTRFS_BACKREF_H #include <linux/btrfs.h> #include "ulist.h" diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ca15be569d69..234bae55b85d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_I__ -#define __BTRFS_I__ +#ifndef BTRFS_INODE_H +#define BTRFS_INODE_H #include <linux/hash.h> #include "extent_map.h" diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 3baebbc021c5..dc062b195c46 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) STRATO AG 2011. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ /* diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 2de58a99ee92..9bf4359cc44c 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) STRATO AG 2011. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#if !defined(__BTRFS_CHECK_INTEGRITY__) -#define __BTRFS_CHECK_INTEGRITY__ +#ifndef BTRFS_CHECK_INTEGRITY_H +#define BTRFS_CHECK_INTEGRITY_H #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 562c3e633403..1061575a7d25 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kernel.h> @@ -458,7 +445,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, break; rcu_read_lock(); - page = radix_tree_lookup(&mapping->page_tree, pg_index); + page = radix_tree_lookup(&mapping->i_pages, pg_index); rcu_read_unlock(); if (page && !radix_tree_exceptional_entry(page)) { misses++; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index ce796557a918..cc605f7b23fb 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_COMPRESSION_ -#define __BTRFS_COMPRESSION_ +#ifndef BTRFS_COMPRESSION_H +#define BTRFS_COMPRESSION_H /* * We want to make sure that amount of RAM required to uncompress an extent is diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a2c9d21176e2..3fd44835b386 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007,2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0eb55825862a..5474ef14d6e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_CTREE__ -#define __BTRFS_CTREE__ +#ifndef BTRFS_CTREE_H +#define BTRFS_CTREE_H #include <linux/mm.h> #include <linux/sched/signal.h> @@ -3752,4 +3739,5 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) #endif return 0; } + #endif diff --git a/fs/btrfs/dedupe.h b/fs/btrfs/dedupe.h index 83ebfe28da9e..90281a7a35a8 100644 --- a/fs/btrfs/dedupe.h +++ b/fs/btrfs/dedupe.h @@ -1,24 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2016 Fujitsu. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_DEDUPE__ -#define __BTRFS_DEDUPE__ +#ifndef BTRFS_DEDUPE_H +#define BTRFS_DEDUPE_H /* later in-band dedupe will expand this struct */ struct btrfs_dedupe_hash; + #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 86ec2edc05e8..06ec8ab6d9ba 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011 Fujitsu. All rights reserved. * Written by Miao Xie <miaox@cn.fujitsu.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/slab.h> diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 100a91e26b55..ca7a97f3ab6b 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -1,24 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2011 Fujitsu. All rights reserved. * Written by Miao Xie <miaox@cn.fujitsu.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __DELAYED_TREE_OPERATION_H -#define __DELAYED_TREE_OPERATION_H +#ifndef BTRFS_DELAYED_INODE_H +#define BTRFS_DELAYED_INODE_H #include <linux/rbtree.h> #include <linux/spinlock.h> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 2677257c149d..9e98295de7ce 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 9e3e5aff0937..741869dbc316 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -1,22 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __DELAYED_REF__ -#define __DELAYED_REF__ + +#ifndef BTRFS_DELAYED_REF_H +#define BTRFS_DELAYED_REF_H #include <linux/refcount.h> @@ -298,4 +286,5 @@ btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) { return container_of(node, struct btrfs_delayed_data_ref, node); } + #endif diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 0d203633bb96..f82be266ba4b 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) STRATO AG 2012. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/sched.h> #include <linux/bio.h> #include <linux/slab.h> diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h index 8566a02ef222..b6d4206188bb 100644 --- a/fs/btrfs/dev-replace.h +++ b/fs/btrfs/dev-replace.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) STRATO AG 2012. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#if !defined(__BTRFS_DEV_REPLACE__) -#define __BTRFS_DEV_REPLACE__ +#ifndef BTRFS_DEV_REPLACE_H +#define BTRFS_DEV_REPLACE_H struct btrfs_ioctl_dev_replace_args; @@ -48,4 +35,5 @@ static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) { atomic64_inc(stat_value); } + #endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 29e967b2c667..39e9766d1cbd 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include "ctree.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 07b5e6f7df67..4ac8b1d21baf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> @@ -449,6 +436,14 @@ static int verify_level_key(struct btrfs_fs_info *fs_info, if (!first_key) return 0; + /* + * For live tree block (new tree blocks in current transaction), + * we need proper lock context to avoid race, which is impossible here. + * So we only checks tree blocks which is read from disk, whose + * generation <= fs_info->last_trans_committed. + */ + if (btrfs_header_generation(eb) > fs_info->last_trans_committed) + return 0; if (found_level) btrfs_node_key_to_cpu(eb, &found_key, 0); else @@ -3812,7 +3807,8 @@ void close_ctree(struct btrfs_fs_info *fs_info) btrfs_err(fs_info, "commit super ret %d", ret); } - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state) || + test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) btrfs_error_commit_super(fs_info); kthread_stop(fs_info->transaction_kthread); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 453ea9f5d4e9..1a3d277b027b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __DISKIO__ -#define __DISKIO__ +#ifndef BTRFS_DISK_IO_H +#define BTRFS_DISK_IO_H #define BTRFS_SUPER_INFO_OFFSET SZ_64K #define BTRFS_SUPER_INFO_SIZE 4096 @@ -169,4 +156,5 @@ static inline void btrfs_set_buffer_lockdep_class(u64 objectid, { } #endif + #endif diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index ddaccad469f8..1f3755b3a37a 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #include <linux/fs.h> #include <linux/types.h> #include "ctree.h" diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h index 91b3908e7c54..57488ecd7d4e 100644 --- a/fs/btrfs/export.h +++ b/fs/btrfs/export.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ + #ifndef BTRFS_EXPORT_H #define BTRFS_EXPORT_H diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e08d0d45af4f..75cfb80d2551 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/pagemap.h> @@ -4642,6 +4630,7 @@ again: if (wait_for_alloc) { mutex_unlock(&fs_info->chunk_mutex); wait_for_alloc = 0; + cond_resched(); goto again; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 47a8fe9d22e8..e99b329002cf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #include <linux/bitops.h> #include <linux/slab.h> #include <linux/bio.h> @@ -3963,11 +3964,11 @@ retry: done_index = page->index; /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping + * At this point we hold neither the i_pages lock nor + * the page lock: the page may be truncated or + * invalidated (changing page->mapping to NULL), + * or even swizzled back from swapper_space to + * tmpfs file mapping */ if (!trylock_page(page)) { flush_write_bio(epd); @@ -5174,13 +5175,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) WARN_ON(!PagePrivate(page)); clear_page_dirty_for_io(page); - spin_lock_irq(&page->mapping->tree_lock); + xa_lock_irq(&page->mapping->i_pages); if (!PageDirty(page)) { - radix_tree_tag_clear(&page->mapping->page_tree, + radix_tree_tag_clear(&page->mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&page->mapping->tree_lock); + xa_unlock_irq(&page->mapping->i_pages); ClearPageError(page); unlock_page(page); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b77d84909863..a53009694b16 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __EXTENTIO__ -#define __EXTENTIO__ + +#ifndef BTRFS_EXTENT_IO_H +#define BTRFS_EXTENT_IO_H #include <linux/rbtree.h> #include <linux/refcount.h> @@ -572,4 +573,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); + #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 53a0633c6ef7..1b8a078f92eb 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #include <linux/err.h> #include <linux/slab.h> #include <linux/spinlock.h> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index f6f8ba114977..5fcb80a6ce37 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __EXTENTMAP__ -#define __EXTENTMAP__ + +#ifndef BTRFS_EXTENT_MAP_H +#define BTRFS_EXTENT_MAP_H #include <linux/rbtree.h> #include <linux/refcount.h> @@ -93,4 +94,5 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len); int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, struct extent_map **em_in, u64 start, u64 len); + #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index fdcb41002623..f9dd6d1836a3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/bio.h> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f247300170e5..0167a9c97c9c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d0dde9e6afd7..e5b569bebc73 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Red Hat. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/pagemap.h> diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 79eca4cabb1c..15e30b93db0d 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2009 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_FREE_SPACE_CACHE -#define __BTRFS_FREE_SPACE_CACHE +#ifndef BTRFS_FREE_SPACE_CACHE_H +#define BTRFS_FREE_SPACE_CACHE_H struct btrfs_free_space { struct rb_node offset_index; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index af36a6a971fe..32a0f6cb5594 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kernel.h> diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h index ba3787df43c3..874b4feecad2 100644 --- a/fs/btrfs/free-space-tree.h +++ b/fs/btrfs/free-space-tree.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_FREE_SPACE_TREE -#define __BTRFS_FREE_SPACE_TREE +#ifndef BTRFS_FREE_SPACE_TREE_H +#define BTRFS_FREE_SPACE_TREE_H /* * The default size for new free space bitmap items. The last bitmap in a block diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 1d5631ef2738..a8956a3c9e05 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include "ctree.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9409dcc7020d..12fcd8897c33 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/delay.h> diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h index 6734ec92a1e9..7a962811dffe 100644 --- a/fs/btrfs/inode-map.h +++ b/fs/btrfs/inode-map.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BTRFS_INODE_MAP -#define __BTRFS_INODE_MAP + +#ifndef BTRFS_INODE_MAP_H +#define BTRFS_INODE_MAP_H void btrfs_init_free_ino_ctl(struct btrfs_root *root); void btrfs_unpin_free_ino(struct btrfs_root *root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1f091c2358a4..e064c49c9a9a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kernel.h> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b2db3988813f..632e26d6f7ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kernel.h> diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 621083f8932c..e4faefac9d16 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/sched.h> #include <linux/pagemap.h> #include <linux/spinlock.h> diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index c44a9d5f5362..29135def468e 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_LOCKING_ -#define __BTRFS_LOCKING_ +#ifndef BTRFS_LOCKING_H +#define BTRFS_LOCKING_H #define BTRFS_WRITE_LOCK 1 #define BTRFS_READ_LOCK 2 diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 1c7f7f70caf4..0667ea07f766 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kernel.h> diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h index 1b10a3cd1195..75246f2f56ba 100644 --- a/fs/btrfs/math.h +++ b/fs/btrfs/math.h @@ -1,25 +1,11 @@ - +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2012 Fujitsu. All rights reserved. * Written by Miao Xie <miaox@cn.fujitsu.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_MATH_H -#define __BTRFS_MATH_H +#ifndef BTRFS_MATH_H +#define BTRFS_MATH_H #include <asm/div64.h> diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 661cc3db0c7c..6db8bb2f2c28 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/slab.h> diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4a1672a13ba6..3be443fb3001 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_ORDERED_DATA__ -#define __BTRFS_ORDERED_DATA__ +#ifndef BTRFS_ORDERED_DATA_H +#define BTRFS_ORDERED_DATA_H /* one of these per inode */ struct btrfs_ordered_inode_tree { @@ -218,4 +205,5 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); int __init ordered_data_init(void); void __cold ordered_data_exit(void); + #endif diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 47767d5b8f0b..aa534108c1e2 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Red Hat. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include "ctree.h" diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 4a8770485f77..124276bba8cf 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include "ctree.h" diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 3afd508ed8c5..4a98481688f4 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,23 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __PRINT_TREE_ -#define __PRINT_TREE_ +#ifndef BTRFS_PRINT_TREE_H +#define BTRFS_PRINT_TREE_H + void btrfs_print_leaf(struct extent_buffer *l); void btrfs_print_tree(struct extent_buffer *c); + #endif diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 5859f7d3cf3e..53a8c95828e3 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/hashtable.h> diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h index 100f18829d50..618815b4f9d5 100644 --- a/fs/btrfs/props.h +++ b/fs/btrfs/props.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_PROPS_H -#define __BTRFS_PROPS_H +#ifndef BTRFS_PROPS_H +#define BTRFS_PROPS_H #include "ctree.h" diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f583f13ff26e..09c7e4fd550f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index e63e2d497a8e..d60dd06445ce 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2014 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_QGROUP__ -#define __BTRFS_QGROUP__ +#ifndef BTRFS_QGROUP_H +#define BTRFS_QGROUP_H #include "ulist.h" #include "delayed-ref.h" @@ -341,4 +328,5 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root); void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes); void btrfs_qgroup_check_reserved_leak(struct inode *inode); -#endif /* __BTRFS_QGROUP__ */ + +#endif diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index c3a2bc8af675..9abd950e7f78 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2012 Fusion-io All rights reserved. * Copyright (C) 2012 Intel Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/sched.h> #include <linux/wait.h> #include <linux/bio.h> diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 4ee4fe346838..f5d4c13a8dbc 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -1,24 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2012 Fusion-io All rights reserved. * Copyright (C) 2012 Intel Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_RAID56__ -#define __BTRFS_RAID56__ +#ifndef BTRFS_RAID56_H +#define BTRFS_RAID56_H + static inline int nr_parity_stripes(struct map_lookup *map) { if (map->type & BTRFS_BLOCK_GROUP_RAID5) @@ -65,4 +53,5 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); + #endif diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h index 9e111e4576d4..a97dc74a4d3d 100644 --- a/fs/btrfs/rcu-string.h +++ b/fs/btrfs/rcu-string.h @@ -1,21 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2012 Red Hat. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ +#ifndef BTRFS_RCU_STRING_H +#define BTRFS_RCU_STRING_H + struct rcu_string { struct rcu_head rcu; char str[0]; @@ -54,3 +44,5 @@ static inline void rcu_string_free(struct rcu_string *str) struct rcu_string *__str = rcu_dereference(rcu_str); \ __str->str; \ }) + +#endif diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index a52dd12af648..40f1bcef394d 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 35fab67dcbe8..e5b9e596bb92 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h index 3bf02ce0e1e2..b7d2a4edfdb7 100644 --- a/fs/btrfs/ref-verify.h +++ b/fs/btrfs/ref-verify.h @@ -1,22 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2014 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __REF_VERIFY__ -#define __REF_VERIFY__ + +#ifndef BTRFS_REF_VERIFY_H +#define BTRFS_REF_VERIFY_H #ifdef CONFIG_BTRFS_FS_REF_VERIFY int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info); @@ -59,4 +47,5 @@ static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info) } #endif /* CONFIG_BTRFS_FS_REF_VERIFY */ -#endif /* _REF_VERIFY__ */ + +#endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4874c09f6d3c..00b7d3231821 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index aab0194efe46..6db3bda44aa5 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/err.h> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1a2066ac6fe7..52b39a0924e9 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011, 2012 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/blkdev.h> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1f5748c7d1c7..221e5cdb060b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2012 Alexander Block. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/bsearch.h> diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 3aa4bc55754f..ead397f7034f 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -1,22 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2012 Alexander Block. All rights reserved. * Copyright (C) 2012 STRATO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ +#ifndef BTRFS_SEND_H +#define BTRFS_SEND_H + #include "ctree.h" #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" @@ -132,3 +122,5 @@ enum { #ifdef __KERNEL__ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg); #endif + +#endif diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index 5e2b92d83617..b7b4acb12833 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/highmem.h> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 170baef49fae..0628092b0b1b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/blkdev.h> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ca067471cd46..4848a4318fb5 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 80457f31c29f..b567560d9aa9 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BTRFS_SYSFS_H_ -#define _BTRFS_SYSFS_H_ + +#ifndef BTRFS_SYSFS_H +#define BTRFS_SYSFS_H /* * Data exported through sysfs @@ -90,4 +91,4 @@ void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, u64 bit, enum btrfs_feature_set set); -#endif /* _BTRFS_SYSFS_H_ */ +#endif diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index e74278170806..30ed438da2a9 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index bc0615bac3cc..a5a0b9500d3e 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_TESTS -#define __BTRFS_TESTS +#ifndef BTRFS_TESTS_H +#define BTRFS_TESTS_H #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS int btrfs_run_sanity_tests(void); diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index b9142c614114..31e8a9ec228c 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/slab.h> diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 2e7f64a3b22b..76aa5a678a96 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/pagemap.h> diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index c23bd00bdd92..79e0a5f4d9c9 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/types.h> diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index eca6412d42bd..d3c9f8a59ba5 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/slab.h> diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 8444a018cca2..e1f9666c4974 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/types.h> diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 13420cd19ef0..e0ba799536b4 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/types.h> diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 160eb2fba726..39b95783f736 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Facebook. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/types.h> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5c4cf0f9146b..63fdcab64b01 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b6c94ce33503..c88fccd80bc5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_TRANSACTION__ -#define __BTRFS_TRANSACTION__ +#ifndef BTRFS_TRANSACTION_H +#define BTRFS_TRANSACTION_H #include <linux/refcount.h> #include "btrfs_inode.h" @@ -228,4 +215,5 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction); void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); + #endif diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 8871286c1a91..8d40e7dd8c30 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) Qu Wenruo 2017. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program. */ /* diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index aba542755710..ff043275b784 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -1,21 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) Qu Wenruo 2017. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program. */ -#ifndef __BTRFS_TREE_CHECKER__ -#define __BTRFS_TREE_CHECKER__ +#ifndef BTRFS_TREE_CHECKER_H +#define BTRFS_TREE_CHECKER_H #include "ctree.h" #include "extent_io.h" diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index c09dbe4bd6e7..3c0987ab587d 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c91babc6aa4b..43758e30aa7a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> @@ -2352,8 +2339,10 @@ again: nritems = btrfs_header_nritems(path->nodes[0]); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); - if (ret) + if (ret == 1) break; + else if (ret < 0) + goto out; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); @@ -2456,13 +2445,41 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, if (ret) break; - /* for regular files, make sure corresponding - * orphan item exist. extents past the new EOF - * will be truncated later by orphan cleanup. + /* + * Before replaying extents, truncate the inode to its + * size. We need to do it now and not after log replay + * because before an fsync we can have prealloc extents + * added beyond the inode's i_size. If we did it after, + * through orphan cleanup for example, we would drop + * those prealloc extents just after replaying them. */ if (S_ISREG(mode)) { - ret = insert_orphan_item(wc->trans, root, - key.objectid); + struct inode *inode; + u64 from; + + inode = read_one_inode(root, key.objectid); + if (!inode) { + ret = -EIO; + break; + } + from = ALIGN(i_size_read(inode), + root->fs_info->sectorsize); + ret = btrfs_drop_extents(wc->trans, root, inode, + from, (u64)-1, 1); + /* + * If the nlink count is zero here, the iput + * will free the inode. We bump it to make + * sure it doesn't get freed until the link + * count fixup is done. + */ + if (!ret) { + if (inode->i_nlink == 0) + inc_nlink(inode); + /* Update link count and nbytes. */ + ret = btrfs_update_inode(wc->trans, + root, inode); + } + iput(inode); if (ret) break; } @@ -3520,8 +3537,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * from this directory and from this transaction */ ret = btrfs_next_leaf(root, path); - if (ret == 1) { - last_offset = (u64)-1; + if (ret) { + if (ret == 1) + last_offset = (u64)-1; + else + err = ret; goto done; } btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); @@ -4354,6 +4374,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, num++; } + /* + * Add all prealloc extents beyond the inode's i_size to make sure we + * don't lose them after doing a fast fsync and replaying the log. + */ + if (inode->flags & BTRFS_INODE_PREALLOC) { + struct rb_node *node; + + for (node = rb_last(&tree->map); node; node = rb_prev(node)) { + em = rb_entry(node, struct extent_map, rb_node); + if (em->start < i_size_read(&inode->vfs_inode)) + break; + if (!list_empty(&em->list)) + continue; + /* Same as above loop. */ + if (++num > 32768) { + list_del_init(&tree->modified_extents); + ret = -EFBIG; + goto process; + } + refcount_inc(&em->refs); + set_bit(EXTENT_FLAG_LOGGING, &em->flags); + list_add_tail(&em->list, &extents); + } + } + list_sort(NULL, &extents, extent_cmp); btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); /* diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 88abc43312a1..122e68b89a5a 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __TREE_LOG_ -#define __TREE_LOG_ +#ifndef BTRFS_TREE_LOG_H +#define BTRFS_TREE_LOG_H #include "ctree.h" #include "transaction.h" @@ -87,4 +74,5 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent); + #endif diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index d8edf164f81c..3374c9e9be67 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011 STRATO AG * written by Arne Jansen <sensille@gmx.net> - * Distributed under the GNU GPL license version 2. */ #include <linux/slab.h> diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 53c913632733..02fda0a2d4ce 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -1,12 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2011 STRATO AG * written by Arne Jansen <sensille@gmx.net> - * Distributed under the GNU GPL license version 2. - * */ -#ifndef __ULIST__ -#define __ULIST__ +#ifndef BTRFS_ULIST_H +#define BTRFS_ULIST_H #include <linux/list.h> #include <linux/rbtree.h> diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 9916f03430bc..1ba7ca2a4200 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) STRATO AG 2013. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/uuid.h> #include <asm/unaligned.h> #include "ctree.h" diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 93f8f17cacca..292266f6ab9c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/sched.h> #include <linux/bio.h> #include <linux/slab.h> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d1fcaea9fef5..79096884654f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS_VOLUMES_ -#define __BTRFS_VOLUMES_ +#ifndef BTRFS_VOLUMES_H +#define BTRFS_VOLUMES_H #include <linux/bio.h> #include <linux/sort.h> diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index e1e8177deb5e..ea78c3d6dcfc 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Red Hat. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/init.h> @@ -32,7 +19,6 @@ #include "props.h" #include "locking.h" - int btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) { diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index e215a3212a2a..471fcac6ff55 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Red Hat. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __XATTR__ -#define __XATTR__ +#ifndef BTRFS_XATTR_H +#define BTRFS_XATTR_H #include <linux/xattr.h> @@ -34,4 +21,4 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, const struct qstr *qstr); -#endif /* __XATTR__ */ +#endif diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 2b52950dc2c6..970ff3e35bb3 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Oracle. All rights reserved. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on jffs2 zlib code: * Copyright © 2001-2007 Red Hat, Inc. * Created by David Woodhouse <dwmw2@infradead.org> diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 01a4eab602a3..af6ec59972f5 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -1,16 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2016-present, Facebook, Inc. * All rights reserved. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ + #include <linux/bio.h> #include <linux/err.h> #include <linux/init.h> diff --git a/fs/buffer.c b/fs/buffer.c index ec5dd39071e6..249b83fafe48 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync); * we get exclusion from try_to_free_buffers with the blockdev mapping's * private_lock. * - * Hack idea: for the blockdev mapping, i_bufferlist_lock contention + * Hack idea: for the blockdev mapping, private_lock contention * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take private_lock. (But if - * private_lock is contended then so is mapping->tree_lock). + * succeeds, there is no need to take private_lock. */ static struct buffer_head * __find_get_block_slow(struct block_device *bdev, sector_t block) @@ -495,35 +494,12 @@ repeat: return err; } -static void do_thaw_one(struct super_block *sb, void *unused) +void emergency_thaw_bdev(struct super_block *sb) { while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); } -static void do_thaw_all(struct work_struct *work) -{ - iterate_supers(do_thaw_one, NULL); - kfree(work); - printk(KERN_WARNING "Emergency Thaw complete\n"); -} - -/** - * emergency_thaw_all -- forcibly thaw every frozen filesystem - * - * Used for emergency unfreeze of all filesystems via SysRq - */ -void emergency_thaw_all(void) -{ - struct work_struct *work; - - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - INIT_WORK(work, do_thaw_all); - schedule_work(work); - } -} - /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written @@ -594,20 +570,21 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); * * The caller must hold lock_page_memcg(). */ -static void __set_page_dirty(struct page *page, struct address_space *mapping, +void __set_page_dirty(struct page *page, struct address_space *mapping, int warn) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + xa_lock_irqsave(&mapping->i_pages, flags); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->page_tree, + radix_tree_tag_set(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irqrestore(&mapping->tree_lock, flags); + xa_unlock_irqrestore(&mapping->i_pages, flags); } +EXPORT_SYMBOL_GPL(__set_page_dirty); /* * Add a page to the dirty page list. @@ -1095,7 +1072,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, * inode list. * * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, - * mapping->tree_lock and mapping->host->i_lock. + * i_pages lock and mapping->host->i_lock. */ void mark_buffer_dirty(struct buffer_head *bh) { diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 174f5709e508..a699e320393f 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ - export.o caps.o snap.o xattr.o \ + export.o caps.o snap.o xattr.o quota.o \ mds_client.o mdsmap.o strings.o ceph_frag.o \ debugfs.o diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b4336b42ce3b..5f7ad3d0df2e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -15,6 +15,7 @@ #include "mds_client.h" #include "cache.h" #include <linux/ceph/osd_client.h> +#include <linux/ceph/striper.h> /* * Ceph address space ops. @@ -438,7 +439,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_file_info *ci = file->private_data; + struct ceph_file_info *fi = file->private_data; struct ceph_rw_context *rw_ctx; int rc = 0; int max = 0; @@ -452,7 +453,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc == 0) goto out; - rw_ctx = ceph_find_rw_context(ci); + rw_ctx = ceph_find_rw_context(fi); max = fsc->mount_options->rsize >> PAGE_SHIFT; dout("readpages %p file %p ctx %p nr_pages %d max %d\n", inode, file, rw_ctx, nr_pages, max); @@ -800,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct ceph_osd_request *req = NULL; struct ceph_writeback_ctl ceph_wbc; bool should_loop, range_whole = false; - bool stop, done = false; + bool done = false; dout("writepages_start %p (mode=%s)\n", inode, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : @@ -856,7 +857,7 @@ retry: * in that range can be associated with newer snapc. * They are not writeable until we write all dirty pages * associated with 'snapc' get written */ - if (index > 0 || wbc->sync_mode != WB_SYNC_NONE) + if (index > 0) should_loop = true; dout(" non-head snapc, range whole\n"); } @@ -864,8 +865,7 @@ retry: ceph_put_snap_context(last_snapc); last_snapc = snapc; - stop = false; - while (!stop && index <= end) { + while (!done && index <= end) { int num_ops = 0, op_idx; unsigned i, pvec_pages, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; @@ -898,16 +898,30 @@ get_more_pages: unlock_page(page); continue; } - if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); + /* only if matching snap context */ + pgsnapc = page_snap_context(page); + if (pgsnapc != snapc) { + dout("page snapc %p %lld != oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); + if (!should_loop && + !ceph_wbc.head_snapc && + wbc->sync_mode != WB_SYNC_NONE) + should_loop = true; unlock_page(page); - break; + continue; } if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - /* not done if range_cyclic */ - stop = true; + if (ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) + mapping->a_ops->invalidatepage(page, + 0, PAGE_SIZE); + unlock_page(page); + continue; + } + if (strip_unit_end && (page->index > strip_unit_end)) { + dout("end of strip unit %p\n", page); unlock_page(page); break; } @@ -921,15 +935,6 @@ get_more_pages: wait_on_page_writeback(page); } - /* only if matching snap context */ - pgsnapc = page_snap_context(page); - if (pgsnapc != snapc) { - dout("page snapc %p %lld != oldest %p %lld\n", - pgsnapc, pgsnapc->seq, snapc, snapc->seq); - unlock_page(page); - continue; - } - if (!clear_page_dirty_for_io(page)) { dout("%p !clear_page_dirty_for_io\n", page); unlock_page(page); @@ -945,19 +950,15 @@ get_more_pages: if (locked_pages == 0) { u64 objnum; u64 objoff; + u32 xlen; /* prepare async write request */ offset = (u64)page_offset(page); - len = wsize; - - rc = ceph_calc_file_object_mapping(&ci->i_layout, - offset, len, - &objnum, &objoff, - &len); - if (rc < 0) { - unlock_page(page); - break; - } + ceph_calc_file_object_mapping(&ci->i_layout, + offset, wsize, + &objnum, &objoff, + &xlen); + len = xlen; num_ops = 1; strip_unit_end = page->index + @@ -1146,7 +1147,7 @@ new_request: * we tagged for writeback prior to entering this loop. */ if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) - done = stop = true; + done = true; release_pvec_pages: dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 33a211b364ed..bb524c880b1e 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -51,7 +51,7 @@ static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { .type = FSCACHE_COOKIE_TYPE_INDEX, }; -int ceph_fscache_register(void) +int __init ceph_fscache_register(void) { return fscache_register_netfs(&ceph_cache_netfs); } @@ -135,7 +135,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; - dout("ceph inode 0x%p cached okay", ci); + dout("ceph inode 0x%p cached okay\n", ci); return FSCACHE_CHECKAUX_OKAY; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0e5bd3e3344e..23dbfae16156 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -184,36 +184,54 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, mdsc->caps_avail_count); spin_unlock(&mdsc->caps_list_lock); - for (i = have; i < need; i++) { -retry: + for (i = have; i < need; ) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); - if (!cap) { - if (!trimmed) { - for (j = 0; j < mdsc->max_sessions; j++) { - s = __ceph_lookup_mds_session(mdsc, j); - if (!s) - continue; - mutex_unlock(&mdsc->mutex); + if (cap) { + list_add(&cap->caps_item, &newcaps); + alloc++; + i++; + continue; + } - mutex_lock(&s->s_mutex); - max_caps = s->s_nr_caps - (need - i); - ceph_trim_caps(mdsc, s, max_caps); - mutex_unlock(&s->s_mutex); + if (!trimmed) { + for (j = 0; j < mdsc->max_sessions; j++) { + s = __ceph_lookup_mds_session(mdsc, j); + if (!s) + continue; + mutex_unlock(&mdsc->mutex); - ceph_put_mds_session(s); - mutex_lock(&mdsc->mutex); - } - trimmed = true; - goto retry; - } else { - pr_warn("reserve caps ctx=%p ENOMEM " - "need=%d got=%d\n", - ctx, need, have + alloc); - goto out_nomem; + mutex_lock(&s->s_mutex); + max_caps = s->s_nr_caps - (need - i); + ceph_trim_caps(mdsc, s, max_caps); + mutex_unlock(&s->s_mutex); + + ceph_put_mds_session(s); + mutex_lock(&mdsc->mutex); } + trimmed = true; + + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count) { + int more_have; + if (mdsc->caps_avail_count >= need - i) + more_have = need - i; + else + more_have = mdsc->caps_avail_count; + + i += more_have; + have += more_have; + mdsc->caps_avail_count -= more_have; + mdsc->caps_reserve_count += more_have; + + } + spin_unlock(&mdsc->caps_list_lock); + + continue; } - list_add(&cap->caps_item, &newcaps); - alloc++; + + pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", + ctx, need, have + alloc); + goto out_nomem; } BUG_ON(have + alloc != need); @@ -234,16 +252,28 @@ retry: return 0; out_nomem: + + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_avail_count += have; + mdsc->caps_reserve_count -= have; + while (!list_empty(&newcaps)) { cap = list_first_entry(&newcaps, struct ceph_cap, caps_item); list_del(&cap->caps_item); - kmem_cache_free(ceph_cap_cachep, cap); + + /* Keep some preallocated caps around (ceph_min_count), to + * avoid lots of free/alloc churn. */ + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + kmem_cache_free(ceph_cap_cachep, cap); + } else { + mdsc->caps_avail_count++; + mdsc->caps_total_count++; + list_add(&cap->caps_item, &mdsc->caps_list); + } } - spin_lock(&mdsc->caps_list_lock); - mdsc->caps_avail_count += have; - mdsc->caps_reserve_count -= have; BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); @@ -254,12 +284,26 @@ out_nomem: int ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + int i; + struct ceph_cap *cap; + dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); if (ctx->count) { spin_lock(&mdsc->caps_list_lock); BUG_ON(mdsc->caps_reserve_count < ctx->count); mdsc->caps_reserve_count -= ctx->count; - mdsc->caps_avail_count += ctx->count; + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + mdsc->caps_total_count -= ctx->count; + for (i = 0; i < ctx->count; i++) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + } else { + mdsc->caps_avail_count += ctx->count; + } ctx->count = 0; dout("unreserve caps %d = %d used + %d resv + %d avail\n", mdsc->caps_total_count, mdsc->caps_use_count, @@ -285,7 +329,23 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, mdsc->caps_use_count++; mdsc->caps_total_count++; spin_unlock(&mdsc->caps_list_lock); + } else { + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count) { + BUG_ON(list_empty(&mdsc->caps_list)); + + mdsc->caps_avail_count--; + mdsc->caps_use_count++; + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + mdsc->caps_avail_count); + } + spin_unlock(&mdsc->caps_list_lock); } + return cap; } @@ -341,6 +401,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc, { struct ceph_mds_client *mdsc = fsc->mdsc; + spin_lock(&mdsc->caps_list_lock); + if (total) *total = mdsc->caps_total_count; if (avail) @@ -351,6 +413,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc, *reserved = mdsc->caps_reserve_count; if (min) *min = mdsc->caps_min_count; + + spin_unlock(&mdsc->caps_list_lock); } /* @@ -639,9 +703,11 @@ void ceph_add_cap(struct inode *inode, } spin_lock(&realm->inodes_with_caps_lock); - ci->i_snap_realm = realm; list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); + ci->i_snap_realm = realm; + if (realm->ino == ci->i_vino.ino) + realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); if (oldrealm) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 644def813754..abdf98deeec4 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -260,7 +260,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mdsmap_show_fops); @@ -268,7 +268,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mds_sessions_show_fops); @@ -276,7 +276,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mdsc = debugfs_create_file("mdsc", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mdsc_show_fops); @@ -292,7 +292,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &dentry_lru_show_fops); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2bdd561c4c68..1a78dd6f8bf2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -101,18 +101,18 @@ static int fpos_cmp(loff_t l, loff_t r) * regardless of what dir changes take place on the * server. */ -static int note_last_dentry(struct ceph_file_info *fi, const char *name, +static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, int len, unsigned next_offset) { char *buf = kmalloc(len+1, GFP_KERNEL); if (!buf) return -ENOMEM; - kfree(fi->last_name); - fi->last_name = buf; - memcpy(fi->last_name, name, len); - fi->last_name[len] = 0; - fi->next_offset = next_offset; - dout("note_last_dentry '%s'\n", fi->last_name); + kfree(dfi->last_name); + dfi->last_name = buf; + memcpy(dfi->last_name, name, len); + dfi->last_name[len] = 0; + dfi->next_offset = next_offset; + dout("note_last_dentry '%s'\n", dfi->last_name); return 0; } @@ -174,7 +174,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, static int __dcache_readdir(struct file *file, struct dir_context *ctx, int shared_gen) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); struct dentry *dentry, *last = NULL; @@ -221,7 +221,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, bool emit_dentry = false; dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl); if (!dentry) { - fi->flags |= CEPH_F_ATEND; + dfi->file_info.flags |= CEPH_F_ATEND; err = 0; break; } @@ -272,33 +272,33 @@ out: if (last) { int ret; di = ceph_dentry(last); - ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, + ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, fpos_off(di->offset) + 1); if (ret < 0) err = ret; dput(last); /* last_name no longer match cache index */ - if (fi->readdir_cache_idx >= 0) { - fi->readdir_cache_idx = -1; - fi->dir_release_count = 0; + if (dfi->readdir_cache_idx >= 0) { + dfi->readdir_cache_idx = -1; + dfi->dir_release_count = 0; } } return err; } -static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos) +static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos) { - if (!fi->last_readdir) + if (!dfi->last_readdir) return true; if (is_hash_order(pos)) - return !ceph_frag_contains_value(fi->frag, fpos_hash(pos)); + return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos)); else - return fi->frag != fpos_frag(pos); + return dfi->frag != fpos_frag(pos); } static int ceph_readdir(struct file *file, struct dir_context *ctx) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); @@ -309,7 +309,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_reply_info_parsed *rinfo; dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); - if (fi->flags & CEPH_F_ATEND) + if (dfi->file_info.flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ @@ -350,15 +350,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* proceed with a normal readdir */ more: /* do we have the correct frag content buffered? */ - if (need_send_readdir(fi, ctx->pos)) { + if (need_send_readdir(dfi, ctx->pos)) { struct ceph_mds_request *req; int op = ceph_snap(inode) == CEPH_SNAPDIR ? CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; /* discard old result, if any */ - if (fi->last_readdir) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + if (dfi->last_readdir) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; } if (is_hash_order(ctx->pos)) { @@ -372,7 +372,7 @@ more: } dout("readdir fetching %llx.%llx frag %x offset '%s'\n", - ceph_vinop(inode), frag, fi->last_name); + ceph_vinop(inode), frag, dfi->last_name); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -388,8 +388,8 @@ more: __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); req->r_inode_drop = CEPH_CAP_FILE_EXCL; } - if (fi->last_name) { - req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); + if (dfi->last_name) { + req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; @@ -399,10 +399,10 @@ more: cpu_to_le32(fpos_hash(ctx->pos)); } - req->r_dir_release_cnt = fi->dir_release_count; - req->r_dir_ordered_cnt = fi->dir_ordered_count; - req->r_readdir_cache_idx = fi->readdir_cache_idx; - req->r_readdir_offset = fi->next_offset; + req->r_dir_release_cnt = dfi->dir_release_count; + req->r_dir_ordered_cnt = dfi->dir_ordered_count; + req->r_readdir_cache_idx = dfi->readdir_cache_idx; + req->r_readdir_offset = dfi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); req->r_args.readdir.flags = cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); @@ -426,35 +426,35 @@ more: if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); if (!rinfo->hash_order) { - fi->next_offset = req->r_readdir_offset; + dfi->next_offset = req->r_readdir_offset; /* adjust ctx->pos to beginning of frag */ ctx->pos = ceph_make_fpos(frag, - fi->next_offset, + dfi->next_offset, false); } } - fi->frag = frag; - fi->last_readdir = req; + dfi->frag = frag; + dfi->last_readdir = req; if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { - fi->readdir_cache_idx = req->r_readdir_cache_idx; - if (fi->readdir_cache_idx < 0) { + dfi->readdir_cache_idx = req->r_readdir_cache_idx; + if (dfi->readdir_cache_idx < 0) { /* preclude from marking dir ordered */ - fi->dir_ordered_count = 0; + dfi->dir_ordered_count = 0; } else if (ceph_frag_is_leftmost(frag) && - fi->next_offset == 2) { + dfi->next_offset == 2) { /* note dir version at start of readdir so * we can tell if any dentries get dropped */ - fi->dir_release_count = req->r_dir_release_cnt; - fi->dir_ordered_count = req->r_dir_ordered_cnt; + dfi->dir_release_count = req->r_dir_release_cnt; + dfi->dir_ordered_count = req->r_dir_ordered_cnt; } } else { - dout("readdir !did_prepopulate"); + dout("readdir !did_prepopulate\n"); /* disable readdir cache */ - fi->readdir_cache_idx = -1; + dfi->readdir_cache_idx = -1; /* preclude from marking dir complete */ - fi->dir_release_count = 0; + dfi->dir_release_count = 0; } /* note next offset and last dentry name */ @@ -463,19 +463,19 @@ more: rinfo->dir_entries + (rinfo->dir_nr-1); unsigned next_offset = req->r_reply_info.dir_end ? 2 : (fpos_off(rde->offset) + 1); - err = note_last_dentry(fi, rde->name, rde->name_len, + err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); if (err) return err; } else if (req->r_reply_info.dir_end) { - fi->next_offset = 2; + dfi->next_offset = 2; /* keep last name */ } } - rinfo = &fi->last_readdir->r_reply_info; + rinfo = &dfi->last_readdir->r_reply_info; dout("readdir frag %x num %d pos %llx chunk first %llx\n", - fi->frag, rinfo->dir_nr, ctx->pos, + dfi->frag, rinfo->dir_nr, ctx->pos, rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); i = 0; @@ -519,52 +519,55 @@ more: ctx->pos++; } - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; - if (fi->next_offset > 2) { - frag = fi->frag; + if (dfi->next_offset > 2) { + frag = dfi->frag; goto more; } /* more frags? */ - if (!ceph_frag_is_rightmost(fi->frag)) { - frag = ceph_frag_next(fi->frag); + if (!ceph_frag_is_rightmost(dfi->frag)) { + frag = ceph_frag_next(dfi->frag); if (is_hash_order(ctx->pos)) { loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), - fi->next_offset, true); + dfi->next_offset, true); if (new_pos > ctx->pos) ctx->pos = new_pos; /* keep last_name */ } else { - ctx->pos = ceph_make_fpos(frag, fi->next_offset, false); - kfree(fi->last_name); - fi->last_name = NULL; + ctx->pos = ceph_make_fpos(frag, dfi->next_offset, + false); + kfree(dfi->last_name); + dfi->last_name = NULL; } dout("readdir next frag is %x\n", frag); goto more; } - fi->flags |= CEPH_F_ATEND; + dfi->file_info.flags |= CEPH_F_ATEND; /* * if dir_release_count still matches the dir, no dentries * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ - if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { + if (atomic64_read(&ci->i_release_count) == + dfi->dir_release_count) { spin_lock(&ci->i_ceph_lock); - if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { + if (dfi->dir_ordered_count == + atomic64_read(&ci->i_ordered_count)) { dout(" marking %p complete and ordered\n", inode); /* use i_size to track number of entries in * readdir cache */ - BUG_ON(fi->readdir_cache_idx < 0); - i_size_write(inode, fi->readdir_cache_idx * + BUG_ON(dfi->readdir_cache_idx < 0); + i_size_write(inode, dfi->readdir_cache_idx * sizeof(struct dentry*)); } else { dout(" marking %p complete\n", inode); } - __ceph_dir_set_complete(ci, fi->dir_release_count, - fi->dir_ordered_count); + __ceph_dir_set_complete(ci, dfi->dir_release_count, + dfi->dir_ordered_count); spin_unlock(&ci->i_ceph_lock); } @@ -572,25 +575,25 @@ more: return 0; } -static void reset_readdir(struct ceph_file_info *fi) +static void reset_readdir(struct ceph_dir_file_info *dfi) { - if (fi->last_readdir) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + if (dfi->last_readdir) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; } - kfree(fi->last_name); - fi->last_name = NULL; - fi->dir_release_count = 0; - fi->readdir_cache_idx = -1; - fi->next_offset = 2; /* compensate for . and .. */ - fi->flags &= ~CEPH_F_ATEND; + kfree(dfi->last_name); + dfi->last_name = NULL; + dfi->dir_release_count = 0; + dfi->readdir_cache_idx = -1; + dfi->next_offset = 2; /* compensate for . and .. */ + dfi->file_info.flags &= ~CEPH_F_ATEND; } /* * discard buffered readdir content on seekdir(0), or seek to new frag, * or seek prior to current chunk */ -static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) +static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos) { struct ceph_mds_reply_info_parsed *rinfo; loff_t chunk_offset; @@ -599,10 +602,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) if (is_hash_order(new_pos)) { /* no need to reset last_name for a forward seek when * dentries are sotred in hash order */ - } else if (fi->frag != fpos_frag(new_pos)) { + } else if (dfi->frag != fpos_frag(new_pos)) { return true; } - rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; + rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL; if (!rinfo || !rinfo->dir_nr) return true; chunk_offset = rinfo->dir_entries[0].offset; @@ -612,7 +615,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file->f_mapping->host; loff_t retval; @@ -630,20 +633,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) } if (offset >= 0) { - if (need_reset_readdir(fi, offset)) { + if (need_reset_readdir(dfi, offset)) { dout("dir_llseek dropping %p content\n", file); - reset_readdir(fi); + reset_readdir(dfi); } else if (is_hash_order(offset) && offset > file->f_pos) { /* for hash offset, we don't know if a forward seek * is within same frag */ - fi->dir_release_count = 0; - fi->readdir_cache_idx = -1; + dfi->dir_release_count = 0; + dfi->readdir_cache_idx = -1; } if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; - fi->flags &= ~CEPH_F_ATEND; + dfi->file_info.flags &= ~CEPH_F_ATEND; } retval = offset; } @@ -824,6 +827,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; + err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; @@ -877,6 +883,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; + dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { @@ -926,6 +935,12 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } + if (op == CEPH_MDS_OP_MKDIR && + ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } + mode |= S_IFDIR; err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) @@ -1065,6 +1080,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, else return -EROFS; } + /* don't allow cross-quota renames */ + if ((old_dir != new_dir) && + (!ceph_quota_is_same_realm(old_dir, new_dir))) + return -EXDEV; + dout("rename dir %p dentry %p to dir %p dentry %p\n", old_dir, old_dentry, new_dir, new_dentry); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); @@ -1351,7 +1371,7 @@ static void ceph_d_prune(struct dentry *dentry) static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - struct ceph_file_info *cf = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); int left; @@ -1360,12 +1380,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; - if (!cf->dir_info) { - cf->dir_info = kmalloc(bufsize, GFP_KERNEL); - if (!cf->dir_info) + if (!dfi->dir_info) { + dfi->dir_info = kmalloc(bufsize, GFP_KERNEL); + if (!dfi->dir_info) return -ENOMEM; - cf->dir_info_len = - snprintf(cf->dir_info, bufsize, + dfi->dir_info_len = + snprintf(dfi->dir_info, bufsize, "entries: %20lld\n" " files: %20lld\n" " subdirs: %20lld\n" @@ -1385,10 +1405,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, (long)ci->i_rctime.tv_nsec); } - if (*ppos >= cf->dir_info_len) + if (*ppos >= dfi->dir_info_len) return 0; - size = min_t(unsigned, size, cf->dir_info_len-*ppos); - left = copy_to_user(buf, cf->dir_info + *ppos, size); + size = min_t(unsigned, size, dfi->dir_info_len-*ppos); + left = copy_to_user(buf, dfi->dir_info + *ppos, size); if (left == size) return -EFAULT; *ppos += (size - left); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b67eec3532a1..f85040d73e3d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -30,6 +30,8 @@ static __le32 ceph_flags_sys2wire(u32 flags) break; } + flags &= ~O_ACCMODE; + #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; } ceph_sys2wire(O_CREAT); @@ -41,7 +43,7 @@ static __le32 ceph_flags_sys2wire(u32 flags) #undef ceph_sys2wire if (flags) - dout("unused open flags: %x", flags); + dout("unused open flags: %x\n", flags); return cpu_to_le32(wire_flags); } @@ -159,13 +161,50 @@ out: return req; } +static int ceph_init_file_info(struct inode *inode, struct file *file, + int fmode, bool isdir) +{ + struct ceph_file_info *fi; + + dout("%s %p %p 0%o (%s)\n", __func__, inode, file, + inode->i_mode, isdir ? "dir" : "regular"); + BUG_ON(inode->i_fop->release != ceph_release); + + if (isdir) { + struct ceph_dir_file_info *dfi = + kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL); + if (!dfi) { + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ + return -ENOMEM; + } + + file->private_data = dfi; + fi = &dfi->file_info; + dfi->next_offset = 2; + dfi->readdir_cache_idx = -1; + } else { + fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); + if (!fi) { + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ + return -ENOMEM; + } + + file->private_data = fi; + } + + fi->fmode = fmode; + spin_lock_init(&fi->rw_contexts_lock); + INIT_LIST_HEAD(&fi->rw_contexts); + + return 0; +} + /* * initialize private struct file data. * if we fail, clean up by dropping fmode reference on the ceph_inode */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { - struct ceph_file_info *cf; int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -173,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) ceph_fscache_register_inode_cookie(inode); ceph_fscache_file_set_cookie(inode, file); case S_IFDIR: - dout("init_file %p %p 0%o (regular)\n", inode, file, - inode->i_mode); - cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); - if (!cf) { - ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ - return -ENOMEM; - } - cf->fmode = fmode; - - spin_lock_init(&cf->rw_contexts_lock); - INIT_LIST_HEAD(&cf->rw_contexts); - - cf->next_offset = 2; - cf->readdir_cache_idx = -1; - file->private_data = cf; - BUG_ON(inode->i_fop->release != ceph_release); + ret = ceph_init_file_info(inode, file, fmode, + S_ISDIR(inode->i_mode)); + if (ret) + return ret; break; case S_IFLNK: @@ -278,11 +305,11 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct ceph_file_info *cf = file->private_data; + struct ceph_file_info *fi = file->private_data; int err; int flags, fmode, wanted; - if (cf) { + if (fi) { dout("open file %p is already opened\n", file); return 0; } @@ -375,7 +402,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct ceph_mds_request *req; struct dentry *dn; struct ceph_acls_info acls = {}; - int mask; + int mask; int err; dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", @@ -386,6 +413,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (flags & O_CREAT) { + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; @@ -460,16 +489,27 @@ out_acl: int ceph_release(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_file_info *cf = file->private_data; - dout("release inode %p file %p\n", inode, file); - ceph_put_fmode(ci, cf->fmode); - if (cf->last_readdir) - ceph_mdsc_put_request(cf->last_readdir); - kfree(cf->last_name); - kfree(cf->dir_info); - WARN_ON(!list_empty(&cf->rw_contexts)); - kmem_cache_free(ceph_file_cachep, cf); + if (S_ISDIR(inode->i_mode)) { + struct ceph_dir_file_info *dfi = file->private_data; + dout("release inode %p dir file %p\n", inode, file); + WARN_ON(!list_empty(&dfi->file_info.rw_contexts)); + + ceph_put_fmode(ci, dfi->file_info.fmode); + + if (dfi->last_readdir) + ceph_mdsc_put_request(dfi->last_readdir); + kfree(dfi->last_name); + kfree(dfi->dir_info); + kmem_cache_free(ceph_dir_file_cachep, dfi); + } else { + struct ceph_file_info *fi = file->private_data; + dout("release inode %p regular file %p\n", inode, file); + WARN_ON(!list_empty(&fi->rw_contexts)); + + ceph_put_fmode(ci, fi->fmode); + kmem_cache_free(ceph_file_cachep, fi); + } /* wake up anyone waiting for caps on this inode */ wake_up_all(&ci->i_cap_wq); @@ -1338,6 +1378,11 @@ retry_snap: pos = iocb->ki_pos; count = iov_iter_count(from); + if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) { + err = -EDQUOT; + goto out; + } + err = file_remove_privs(file); if (err) goto out; @@ -1419,6 +1464,7 @@ retry_snap: if (written >= 0) { int dirty; + spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, @@ -1426,6 +1472,8 @@ retry_snap: spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); + if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos)) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL); } dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", @@ -1668,6 +1716,12 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) && + ceph_quota_is_max_bytes_exceeded(inode, offset + length)) { + ret = -EDQUOT; + goto unlock; + } + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; @@ -1716,6 +1770,9 @@ static long ceph_fallocate(struct file *file, int mode, spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); + if ((endoff > size) && + ceph_quota_is_max_bytes_approaching(inode, endoff)) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL); } ceph_put_cap_refs(ci, got); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index c6ec5aa46100..ae056927080d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb) atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; + ci->i_max_bytes = 0; + ci->i_max_files = 0; + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); @@ -536,6 +539,9 @@ void ceph_destroy_inode(struct inode *inode) ceph_queue_caps_release(inode); + if (__ceph_has_any_quota(ci)) + ceph_adjust_quota_realms_count(inode, false); + /* * we may still have a snap_realm reference if there are stray * caps in i_snap_caps. @@ -548,6 +554,9 @@ void ceph_destroy_inode(struct inode *inode) dout(" dropping residual ref to snap realm %p\n", realm); spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + ci->i_snap_realm = NULL; + if (realm->ino == ci->i_vino.ino) + realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(mdsc, realm); } @@ -660,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_FILE_BUFFER| CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { - if (timespec_compare(ctime, &inode->i_ctime) > 0) { + if (ci->i_version == 0 || + timespec_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode->i_ctime = *ctime; } - if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { + if (ci->i_version == 0 || + ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %ld.%09ld -> %ld.%09ld " "tw %d -> %d\n", @@ -786,10 +797,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page, new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ - ci->i_version = le64_to_cpu(info->version); inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); @@ -857,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, xattr_blob = NULL; } + /* finally update i_version */ + ci->i_version = le64_to_cpu(info->version); + inode->i_mapping->a_ops = &ceph_aops; switch (inode->i_mode & S_IFMT) { @@ -1867,20 +1882,9 @@ retry: * possibly truncate them.. so write AND block! */ if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { - struct ceph_cap_snap *capsnap; - to = ci->i_truncate_size; - list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - // MDS should have revoked Frw caps - WARN_ON_ONCE(capsnap->writing); - if (capsnap->dirty_pages && capsnap->size > to) - to = capsnap->size; - } spin_unlock(&ci->i_ceph_lock); dout("__do_pending_vmtruncate %p flushing snaps first\n", inode); - - truncate_pagecache(inode, to); - filemap_write_and_wait_range(&inode->i_data, 0, inode->i_sb->s_maxbytes); goto retry; @@ -2152,6 +2156,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + if ((attr->ia_valid & ATTR_SIZE) && + ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) + return -EDQUOT; + err = __ceph_setattr(inode, attr); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 851aa69ec8f0..c90f03beb15d 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -5,7 +5,7 @@ #include "super.h" #include "mds_client.h" #include "ioctl.h" - +#include <linux/ceph/striper.h> /* * ioctls @@ -185,7 +185,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; struct ceph_object_locator oloc; CEPH_DEFINE_OID_ONSTACK(oid); - u64 len = 1, olen; + u32 xlen; u64 tmp; struct ceph_pg pgid; int r; @@ -195,13 +195,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->lock); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, - &dl.object_no, &dl.object_offset, - &olen); - if (r < 0) { - up_read(&osdc->lock); - return -EIO; - } + ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, 1, + &dl.object_no, &dl.object_offset, &xlen); dl.file_offset -= dl.object_offset; dl.object_size = ci->i_layout.object_size; dl.block_size = ci->i_layout.stripe_unit; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 9e66f69ee8a5..9dae2ec7e1fa 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -95,7 +95,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, owner = secure_addr(fl->fl_owner); dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " - "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, + "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type, (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, wait, fl->fl_type); @@ -132,7 +132,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, } ceph_mdsc_put_request(req); dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, + "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type, (int)operation, (u64)fl->fl_pid, fl->fl_start, length, wait, fl->fl_type, err); return err; @@ -226,7 +226,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) return -ENOLCK; - dout("ceph_lock, fl_owner: %p", fl->fl_owner); + dout("ceph_lock, fl_owner: %p\n", fl->fl_owner); /* set wait bit as appropriate, then make command as Ceph expects it*/ if (IS_GETLK(cmd)) @@ -264,7 +264,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op == CEPH_MDS_OP_SETFILELOCK) { - dout("mds locked, locking locally"); + dout("mds locked, locking locally\n"); err = posix_lock_file(file, fl, NULL); if (err) { /* undo! This should only happen if @@ -272,7 +272,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * deadlock. */ ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on posix_lock_file, undid lock", + dout("got %d on posix_lock_file, undid lock\n", err); } } @@ -294,7 +294,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) if (fl->fl_type & LOCK_MAND) return -EOPNOTSUPP; - dout("ceph_flock, fl_file: %p", fl->fl_file); + dout("ceph_flock, fl_file: %p\n", fl->fl_file); spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { @@ -329,7 +329,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on locks_lock_file_wait, undid lock", err); + dout("got %d on locks_lock_file_wait, undid lock\n", err); } } return err; @@ -356,7 +356,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ++(*flock_count); spin_unlock(&ctx->flc_lock); } - dout("counted %d flock locks and %d fcntl locks", + dout("counted %d flock locks and %d fcntl locks\n", *flock_count, *fcntl_count); } @@ -384,7 +384,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, cephlock->type = CEPH_LOCK_UNLOCK; break; default: - dout("Have unknown lock type %d", lock->fl_type); + dout("Have unknown lock type %d\n", lock->fl_type); err = -EINVAL; } @@ -407,7 +407,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int seen_flock = 0; int l = 0; - dout("encoding %d flock and %d fcntl locks", num_flock_locks, + dout("encoding %d flock and %d fcntl locks\n", num_flock_locks, num_fcntl_locks); if (!ctx) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2e8f90f96540..5ece2e6ad154 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end, } else info->inline_version = CEPH_INLINE_NONE; + if (features & CEPH_FEATURE_MDS_QUOTA) { + u8 struct_v, struct_compat; + u32 struct_len; + + /* + * both struct_v and struct_compat are expected to be >= 1 + */ + ceph_decode_8_safe(p, end, struct_v, bad); + ceph_decode_8_safe(p, end, struct_compat, bad); + if (!struct_v || !struct_compat) + goto bad; + ceph_decode_32_safe(p, end, struct_len, bad); + ceph_decode_need(p, end, struct_len, bad); + ceph_decode_64_safe(p, end, info->max_bytes, bad); + ceph_decode_64_safe(p, end, info->max_files, bad); + } else { + info->max_bytes = 0; + info->max_files = 0; + } + info->pool_ns_len = 0; info->pool_ns_data = NULL; if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { @@ -384,7 +404,7 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s) refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref)); return s; } else { - dout("mdsc get_session %p 0 -- FAIL", s); + dout("mdsc get_session %p 0 -- FAIL\n", s); return NULL; } } @@ -419,9 +439,10 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, static bool __have_session(struct ceph_mds_client *mdsc, int mds) { - if (mds >= mdsc->max_sessions) + if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) return false; - return mdsc->sessions[mds]; + else + return true; } static int __verify_registered_session(struct ceph_mds_client *mdsc, @@ -448,6 +469,25 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) return ERR_PTR(-ENOMEM); + + if (mds >= mdsc->max_sessions) { + int newmax = 1 << get_count_order(mds + 1); + struct ceph_mds_session **sa; + + dout("%s: realloc to %d\n", __func__, newmax); + sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); + if (!sa) + goto fail_realloc; + if (mdsc->sessions) { + memcpy(sa, mdsc->sessions, + mdsc->max_sessions * sizeof(void *)); + kfree(mdsc->sessions); + } + mdsc->sessions = sa; + mdsc->max_sessions = newmax; + } + + dout("%s: mds%d\n", __func__, mds); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -476,23 +516,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_flushing); - dout("register_session mds%d\n", mds); - if (mds >= mdsc->max_sessions) { - int newmax = 1 << get_count_order(mds+1); - struct ceph_mds_session **sa; - - dout("register_session realloc to %d\n", newmax); - sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); - if (!sa) - goto fail_realloc; - if (mdsc->sessions) { - memcpy(sa, mdsc->sessions, - mdsc->max_sessions * sizeof(void *)); - kfree(mdsc->sessions); - } - mdsc->sessions = sa; - mdsc->max_sessions = newmax; - } mdsc->sessions[mds] = s; atomic_inc(&mdsc->num_sessions); refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ @@ -2531,10 +2554,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * Otherwise we just have to return an ESTALE */ if (result == -ESTALE) { - dout("got ESTALE on request %llu", req->r_tid); + dout("got ESTALE on request %llu\n", req->r_tid); req->r_resend_mds = -1; if (req->r_direct_mode != USE_AUTH_MDS) { - dout("not using auth, setting for that now"); + dout("not using auth, setting for that now\n"); req->r_direct_mode = USE_AUTH_MDS; __do_request(mdsc, req); mutex_unlock(&mdsc->mutex); @@ -2542,13 +2565,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } else { int mds = __choose_mds(mdsc, req); if (mds >= 0 && mds != req->r_session->s_mds) { - dout("but auth changed, so resending"); + dout("but auth changed, so resending\n"); __do_request(mdsc, req); mutex_unlock(&mdsc->mutex); goto out; } } - dout("have to return ESTALE on request %llu", req->r_tid); + dout("have to return ESTALE on request %llu\n", req->r_tid); } @@ -3470,13 +3493,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, } /* - * drop all leases (and dentry refs) in preparation for umount + * lock unlock sessions, to wait ongoing session activities */ -static void drop_leases(struct ceph_mds_client *mdsc) +static void lock_unlock_sessions(struct ceph_mds_client *mdsc) { int i; - dout("drop_leases\n"); mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); @@ -3572,7 +3594,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) if (!mdsc) return -ENOMEM; mdsc->fsc = fsc; - fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); if (!mdsc->mdsmap) { @@ -3580,6 +3601,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) return -ENOMEM; } + fsc->mdsc = mdsc; init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); INIT_LIST_HEAD(&mdsc->waiting_for_map); @@ -3587,6 +3609,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) atomic_set(&mdsc->num_sessions, 0); mdsc->max_sessions = 0; mdsc->stopping = 0; + atomic64_set(&mdsc->quotarealms_count, 0); mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; @@ -3660,7 +3683,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) dout("pre_umount\n"); mdsc->stopping = 1; - drop_leases(mdsc); + lock_unlock_sessions(mdsc); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); @@ -3858,6 +3881,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) struct ceph_mds_client *mdsc = fsc->mdsc; dout("mdsc_destroy %p\n", mdsc); + if (!mdsc) + return; + /* flush out any connection work with references to us */ ceph_msgr_flush(); @@ -4077,6 +4103,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) case CEPH_MSG_CLIENT_LEASE: handle_lease(mdsc, s, msg); break; + case CEPH_MSG_CLIENT_QUOTA: + ceph_handle_quota(mdsc, s, msg); + break; default: pr_err("received unknown message type %d %s\n", type, diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 71e3b783ee6f..2ec3b5b35067 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in { char *inline_data; u32 pool_ns_len; char *pool_ns_data; + u64 max_bytes; + u64 max_files; }; struct ceph_mds_reply_dir_entry { @@ -312,6 +314,8 @@ struct ceph_mds_client { int max_sessions; /* len of s_mds_sessions */ int stopping; /* true if shutting down */ + atomic64_t quotarealms_count; /* # realms with quota */ + /* * snap_rwsem will cover cap linkage into snaprealms, and * realm snap contexts. (later, we can do per-realm snap diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c new file mode 100644 index 000000000000..242bfa5c0539 --- /dev/null +++ b/fs/ceph/quota.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * quota.c - CephFS quota + * + * Copyright (C) 2017-2018 SUSE + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/statfs.h> + +#include "super.h" +#include "mds_client.h" + +void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + if (inc) + atomic64_inc(&mdsc->quotarealms_count); + else + atomic64_dec(&mdsc->quotarealms_count); +} + +static inline bool ceph_has_realms_with_quotas(struct inode *inode) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + return atomic64_read(&mdsc->quotarealms_count) > 0; +} + +void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg) +{ + struct super_block *sb = mdsc->fsc->sb; + struct ceph_mds_quota *h = msg->front.iov_base; + struct ceph_vino vino; + struct inode *inode; + struct ceph_inode_info *ci; + + if (msg->front.iov_len != sizeof(*h)) { + pr_err("%s corrupt message mds%d len %d\n", __func__, + session->s_mds, (int)msg->front.iov_len); + ceph_msg_dump(msg); + return; + } + + /* increment msg sequence number */ + mutex_lock(&session->s_mutex); + session->s_seq++; + mutex_unlock(&session->s_mutex); + + /* lookup inode */ + vino.ino = le64_to_cpu(h->ino); + vino.snap = CEPH_NOSNAP; + inode = ceph_find_inode(sb, vino); + if (!inode) { + pr_warn("Failed to find inode %llu\n", vino.ino); + return; + } + ci = ceph_inode(inode); + + spin_lock(&ci->i_ceph_lock); + ci->i_rbytes = le64_to_cpu(h->rbytes); + ci->i_rfiles = le64_to_cpu(h->rfiles); + ci->i_rsubdirs = le64_to_cpu(h->rsubdirs); + __ceph_update_quota(ci, le64_to_cpu(h->max_bytes), + le64_to_cpu(h->max_files)); + spin_unlock(&ci->i_ceph_lock); + + iput(inode); +} + +/* + * This function walks through the snaprealm for an inode and returns the + * ceph_snap_realm for the first snaprealm that has quotas set (either max_files + * or max_bytes). If the root is reached, return the root ceph_snap_realm + * instead. + * + * Note that the caller is responsible for calling ceph_put_snap_realm() on the + * returned realm. + */ +static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, + struct inode *inode) +{ + struct ceph_inode_info *ci = NULL; + struct ceph_snap_realm *realm, *next; + struct inode *in; + bool has_quota; + + if (ceph_snap(inode) != CEPH_NOSNAP) + return NULL; + + realm = ceph_inode(inode)->i_snap_realm; + if (realm) + ceph_get_snap_realm(mdsc, realm); + else + pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " + "null i_snap_realm\n", ceph_vinop(inode)); + while (realm) { + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (!in) + break; + + ci = ceph_inode(in); + has_quota = __ceph_has_any_quota(ci); + iput(in); + + next = realm->parent; + if (has_quota || !next) + return realm; + + ceph_get_snap_realm(mdsc, next); + ceph_put_snap_realm(mdsc, realm); + realm = next; + } + if (realm) + ceph_put_snap_realm(mdsc, realm); + + return NULL; +} + +bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; + struct ceph_snap_realm *old_realm, *new_realm; + bool is_same; + + down_read(&mdsc->snap_rwsem); + old_realm = get_quota_realm(mdsc, old); + new_realm = get_quota_realm(mdsc, new); + is_same = (old_realm == new_realm); + up_read(&mdsc->snap_rwsem); + + if (old_realm) + ceph_put_snap_realm(mdsc, old_realm); + if (new_realm) + ceph_put_snap_realm(mdsc, new_realm); + + return is_same; +} + +enum quota_check_op { + QUOTA_CHECK_MAX_FILES_OP, /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_OP, /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_APPROACHING_OP /* check if quota max_files + limit is approaching */ +}; + +/* + * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each + * realm, it will execute quota check operation defined by the 'op' parameter. + * The snaprealm walk is interrupted if the quota check detects that the quota + * is exceeded or if the root inode is reached. + */ +static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, + loff_t delta) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_inode_info *ci; + struct ceph_snap_realm *realm, *next; + struct inode *in; + u64 max, rvalue; + bool exceeded = false; + + if (ceph_snap(inode) != CEPH_NOSNAP) + return false; + + down_read(&mdsc->snap_rwsem); + realm = ceph_inode(inode)->i_snap_realm; + if (realm) + ceph_get_snap_realm(mdsc, realm); + else + pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " + "null i_snap_realm\n", ceph_vinop(inode)); + while (realm) { + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (!in) + break; + + ci = ceph_inode(in); + spin_lock(&ci->i_ceph_lock); + if (op == QUOTA_CHECK_MAX_FILES_OP) { + max = ci->i_max_files; + rvalue = ci->i_rfiles + ci->i_rsubdirs; + } else { + max = ci->i_max_bytes; + rvalue = ci->i_rbytes; + } + spin_unlock(&ci->i_ceph_lock); + switch (op) { + case QUOTA_CHECK_MAX_FILES_OP: + exceeded = (max && (rvalue >= max)); + break; + case QUOTA_CHECK_MAX_BYTES_OP: + exceeded = (max && (rvalue + delta > max)); + break; + case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP: + if (max) { + if (rvalue >= max) + exceeded = true; + else { + /* + * when we're writing more that 1/16th + * of the available space + */ + exceeded = + (((max - rvalue) >> 4) < delta); + } + } + break; + default: + /* Shouldn't happen */ + pr_warn("Invalid quota check op (%d)\n", op); + exceeded = true; /* Just break the loop */ + } + iput(in); + + next = realm->parent; + if (exceeded || !next) + break; + ceph_get_snap_realm(mdsc, next); + ceph_put_snap_realm(mdsc, realm); + realm = next; + } + ceph_put_snap_realm(mdsc, realm); + up_read(&mdsc->snap_rwsem); + + return exceeded; +} + +/* + * ceph_quota_is_max_files_exceeded - check if we can create a new file + * @inode: directory where a new file is being created + * + * This functions returns true is max_files quota allows a new file to be + * created. It is necessary to walk through the snaprealm hierarchy (until the + * FS root) to check all realms with quotas set. + */ +bool ceph_quota_is_max_files_exceeded(struct inode *inode) +{ + if (!ceph_has_realms_with_quotas(inode)) + return false; + + WARN_ON(!S_ISDIR(inode->i_mode)); + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0); +} + +/* + * ceph_quota_is_max_bytes_exceeded - check if we can write to a file + * @inode: inode being written + * @newsize: new size if write succeeds + * + * This functions returns true is max_bytes quota allows a file size to reach + * @newsize; it returns false otherwise. + */ +bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize) +{ + loff_t size = i_size_read(inode); + + if (!ceph_has_realms_with_quotas(inode)) + return false; + + /* return immediately if we're decreasing file size */ + if (newsize <= size) + return false; + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size)); +} + +/* + * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes + * @inode: inode being written + * @newsize: new size if write succeeds + * + * This function returns true if the new file size @newsize will be consuming + * more than 1/16th of the available quota space; it returns false otherwise. + */ +bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize) +{ + loff_t size = ceph_inode(inode)->i_reported_size; + + if (!ceph_has_realms_with_quotas(inode)) + return false; + + /* return immediately if we're decreasing file size */ + if (newsize <= size) + return false; + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP, + (newsize - size)); +} + +/* + * ceph_quota_update_statfs - if root has quota update statfs with quota status + * @fsc: filesystem client instance + * @buf: statfs to update + * + * If the mounted filesystem root has max_bytes quota set, update the filesystem + * statistics with the quota status. + * + * This function returns true if the stats have been updated, false otherwise. + */ +bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) +{ + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_inode_info *ci; + struct ceph_snap_realm *realm; + struct inode *in; + u64 total = 0, used, free; + bool is_updated = false; + + down_read(&mdsc->snap_rwsem); + realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); + up_read(&mdsc->snap_rwsem); + if (!realm) + return false; + + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (in) { + ci = ceph_inode(in); + spin_lock(&ci->i_ceph_lock); + if (ci->i_max_bytes) { + total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT; + used = ci->i_rbytes >> CEPH_BLOCK_SHIFT; + /* It is possible for a quota to be exceeded. + * Report 'zero' in that case + */ + free = total > used ? total - used : 0; + } + spin_unlock(&ci->i_ceph_lock); + if (total) { + buf->f_blocks = total; + buf->f_bfree = free; + buf->f_bavail = free; + is_updated = true; + } + iput(in); + } + ceph_put_snap_realm(mdsc, realm); + + return is_updated; +} + diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 07cf95e6413d..041c27ea8de1 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -931,6 +931,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); ci->i_snap_realm = realm; + if (realm->ino == ci->i_vino.ino) + realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fb2bc9c15a23..b33082e6878f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -76,9 +76,18 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; - buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + + /* + * By default use root quota for stats; fallback to overall filesystem + * usage if using 'noquotadf' mount option or if the root dir doesn't + * have max_bytes quota set. + */ + if (ceph_test_mount_opt(fsc, NOQUOTADF) || + !ceph_quota_update_statfs(fsc, buf)) { + buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); + buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + } buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; @@ -151,6 +160,8 @@ enum { Opt_acl, #endif Opt_noacl, + Opt_quotadf, + Opt_noquotadf, }; static match_table_t fsopt_tokens = { @@ -187,6 +198,8 @@ static match_table_t fsopt_tokens = { {Opt_acl, "acl"}, #endif {Opt_noacl, "noacl"}, + {Opt_quotadf, "quotadf"}, + {Opt_noquotadf, "noquotadf"}, {-1, NULL} }; @@ -314,13 +327,16 @@ static int parse_fsopt_token(char *c, void *private) break; case Opt_fscache: fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; + kfree(fsopt->fscache_uniq); + fsopt->fscache_uniq = NULL; break; case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; + kfree(fsopt->fscache_uniq); + fsopt->fscache_uniq = NULL; break; case Opt_poolperm: fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; - printk ("pool perm"); break; case Opt_nopoolperm: fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; @@ -331,6 +347,12 @@ static int parse_fsopt_token(char *c, void *private) case Opt_norequire_active_mds: fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; break; + case Opt_quotadf: + fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; + break; + case Opt_noquotadf: + fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; + break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: fsopt->sb_flags |= SB_POSIXACL; @@ -513,13 +535,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { - if (fsopt->fscache_uniq) - seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); - else - seq_puts(m, ",fsc"); + seq_show_option(m, "fsc", fsopt->fscache_uniq); } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) + seq_puts(m, ",noquotadf"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (fsopt->sb_flags & SB_POSIXACL) @@ -529,7 +550,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) #endif if (fsopt->mds_namespace) - seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); + seq_show_option(m, "mds_namespace", fsopt->mds_namespace); if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) @@ -679,6 +700,7 @@ struct kmem_cache *ceph_cap_cachep; struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; +struct kmem_cache *ceph_dir_file_cachep; static void ceph_inode_init_once(void *foo) { @@ -698,8 +720,7 @@ static int __init init_caches(void) if (!ceph_inode_cachep) return -ENOMEM; - ceph_cap_cachep = KMEM_CACHE(ceph_cap, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); if (!ceph_cap_cachep) goto bad_cap; ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, @@ -716,6 +737,10 @@ static int __init init_caches(void) if (!ceph_file_cachep) goto bad_file; + ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); + if (!ceph_dir_file_cachep) + goto bad_dir_file; + error = ceph_fscache_register(); if (error) goto bad_fscache; @@ -723,6 +748,8 @@ static int __init init_caches(void) return 0; bad_fscache: + kmem_cache_destroy(ceph_dir_file_cachep); +bad_dir_file: kmem_cache_destroy(ceph_file_cachep); bad_file: kmem_cache_destroy(ceph_dentry_cachep); @@ -748,6 +775,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); + kmem_cache_destroy(ceph_dir_file_cachep); ceph_fscache_unregister(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1c2086e0fec2..a7077a0c989f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -39,6 +39,7 @@ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ +#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE @@ -310,6 +311,9 @@ struct ceph_inode_info { u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; + /* quotas */ + u64 i_max_bytes, i_max_files; + struct rb_root i_fragtree; int i_fragtree_nsplits; struct mutex i_fragtree_mutex; @@ -671,6 +675,10 @@ struct ceph_file_info { spinlock_t rw_contexts_lock; struct list_head rw_contexts; +}; + +struct ceph_dir_file_info { + struct ceph_file_info file_info; /* readdir: position within the dir */ u32 frag; @@ -748,6 +756,7 @@ struct ceph_readdir_cache_control { */ struct ceph_snap_realm { u64 ino; + struct inode *inode; atomic_t nref; struct rb_node node; @@ -1066,4 +1075,37 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); +/* quota.c */ +static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci) +{ + return ci->i_max_files || ci->i_max_bytes; +} + +extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc); + +static inline void __ceph_update_quota(struct ceph_inode_info *ci, + u64 max_bytes, u64 max_files) +{ + bool had_quota, has_quota; + had_quota = __ceph_has_any_quota(ci); + ci->i_max_bytes = max_bytes; + ci->i_max_files = max_files; + has_quota = __ceph_has_any_quota(ci); + + if (had_quota != has_quota) + ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota); +} + +extern void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg); +extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); +extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new); +extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, + loff_t newlen); +extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, + loff_t newlen); +extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, + struct kstatfs *buf); + #endif /* _FS_CEPH_SUPER_H */ diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index e1c4e0b12b4c..7e72348639e4 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } +/* quotas */ + +static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci) +{ + return (ci->i_max_files || ci->i_max_bytes); +} + +static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val, + size_t size) +{ + return snprintf(val, size, "max_bytes=%llu max_files=%llu", + ci->i_max_bytes, ci->i_max_files); +} + +static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%llu", ci->i_max_bytes); +} + +static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%llu", ci->i_max_files); +} #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define CEPH_XATTR_NAME2(_type, _name, _name2) \ @@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, .hidden = true, \ .exists_cb = ceph_vxattrcb_layout_exists, \ } +#define XATTR_QUOTA_FIELD(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_quota_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { { @@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), + { + .name = "ceph.quota", + .name_size = sizeof("ceph.quota"), + .getxattr_cb = ceph_vxattrcb_quota, + .readonly = false, + .hidden = true, + .exists_cb = ceph_vxattrcb_quota_exists, + }, + XATTR_QUOTA_FIELD(quota, max_bytes), + XATTR_QUOTA_FIELD(quota, max_files), { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index e35e711db68e..9d69ea433330 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -42,23 +42,6 @@ cifs_dump_mem(char *label, void *data, int length) data, length, true); } -#ifdef CONFIG_CIFS_DEBUG -void cifs_vfs_err(const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - pr_err_ratelimited("CIFS VFS: %pV", &vaf); - - va_end(args); -} -#endif - void cifs_dump_detail(void *buf) { #ifdef CONFIG_CIFS_DEBUG2 diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index c611ca2339d7..fe5567655662 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -39,6 +39,7 @@ extern int cifsFYI; #else #define NOISY 0 #endif +#define ONCE 8 /* * debug ON @@ -46,19 +47,28 @@ extern int cifsFYI; */ #ifdef CONFIG_CIFS_DEBUG -__printf(1, 2) void cifs_vfs_err(const char *fmt, ...); - /* information message: e.g., configuration, major event */ -#define cifs_dbg(type, fmt, ...) \ -do { \ - if (type == FYI && cifsFYI & CIFS_INFO) { \ - pr_debug_ratelimited("%s: " \ - fmt, __FILE__, ##__VA_ARGS__); \ - } else if (type == VFS) { \ - cifs_vfs_err(fmt, ##__VA_ARGS__); \ - } else if (type == NOISY && type != 0) { \ - pr_debug_ratelimited(fmt, ##__VA_ARGS__); \ - } \ +#define cifs_dbg_func(ratefunc, type, fmt, ...) \ +do { \ + if ((type) & FYI && cifsFYI & CIFS_INFO) { \ + pr_debug_ ## ratefunc("%s: " \ + fmt, __FILE__, ##__VA_ARGS__); \ + } else if ((type) & VFS) { \ + pr_err_ ## ratefunc("CuIFS VFS: " \ + fmt, ##__VA_ARGS__); \ + } else if ((type) & NOISY && (NOISY != 0)) { \ + pr_debug_ ## ratefunc(fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define cifs_dbg(type, fmt, ...) \ +do { \ + if ((type) & ONCE) \ + cifs_dbg_func(once, \ + type, fmt, ##__VA_ARGS__); \ + else \ + cifs_dbg_func(ratelimited, \ + type, fmt, ##__VA_ARGS__); \ } while (0) /* diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 2282562e78a1..cb950a5fa078 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -665,6 +665,8 @@ struct TCP_Server_Info { struct delayed_work echo; /* echo ping workqueue job */ char *smallbuf; /* pointer to current "small" buffer */ char *bigbuf; /* pointer to current "big" buffer */ + /* Total size of this PDU. Only valid from cifs_demultiplex_thread */ + unsigned int pdu_size; unsigned int total_read; /* total amount of data read in this pass */ #ifdef CONFIG_CIFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ @@ -676,6 +678,7 @@ struct TCP_Server_Info { unsigned int max_read; unsigned int max_write; #ifdef CONFIG_CIFS_SMB311 + __le16 cipher_type; /* save initital negprot hash */ __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; #endif /* 3.1.1 */ @@ -1373,6 +1376,7 @@ struct mid_q_entry { mid_handle_t *handle; /* call handle mid callback */ void *callback_data; /* general purpose pointer for callback */ void *resp_buf; /* pointer to received SMB header */ + unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ unsigned int mid_flags; __le16 command; /* smb command code */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 59c09a596c0a..6d3e40d7029c 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -206,8 +206,10 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); - if (rc) + if (rc) { + printk_once(KERN_WARNING "reconnect tcon failed rc = %d\n", rc); goto out; + } atomic_inc(&tconInfoReconnectCount); @@ -1416,8 +1418,9 @@ openRetry: int cifs_discard_remaining_data(struct TCP_Server_Info *server) { - unsigned int rfclen = get_rfc1002_length(server->smallbuf); - int remaining = rfclen + 4 - server->total_read; + unsigned int rfclen = server->pdu_size; + int remaining = rfclen + server->vals->header_preamble_size - + server->total_read; while (remaining > 0) { int length; @@ -1454,7 +1457,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int data_offset, data_len; struct cifs_readdata *rdata = mid->callback_data; char *buf = server->smallbuf; - unsigned int buflen = get_rfc1002_length(buf) + + unsigned int buflen = server->pdu_size + server->vals->header_preamble_size; bool use_rdma_mr = false; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4e0808f40195..e8830f076a7f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -772,7 +772,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) { int length; char *buf = server->smallbuf; - unsigned int pdu_length = get_rfc1002_length(buf); + unsigned int pdu_length = server->pdu_size; /* make sure this will fit in a large buffer */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - @@ -881,6 +881,7 @@ cifs_demultiplex_thread(void *p) * so we can now interpret the length field. */ pdu_length = get_rfc1002_length(buf); + server->pdu_size = pdu_length; cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length); if (!is_smb_response(server, buf[0])) @@ -927,6 +928,7 @@ cifs_demultiplex_thread(void *p) server->lstrp = jiffies; if (mid_entry != NULL) { + mid_entry->resp_buf_size = server->pdu_size; if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) && mid_entry->mid_state == MID_RESPONSE_RECEIVED && server->ops->handle_cancelled_mid) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7cee97b93a61..4bcd4e838b47 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, for (i = 0; i < found_pages; i++) { page = wdata->pages[i]; /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping + * At this point we hold neither the i_pages lock nor the + * page lock: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled + * back from swapper_space to tmpfs file mapping */ if (nr_pages == 0) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f856df4adae3..3c371f7f5963 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -710,7 +710,7 @@ cgfi_exit: /* Simple function to return a 64 bit hash of string. Rarely called */ static __u64 simple_hashstr(const char *str) { - const __u64 hash_mult = 1125899906842597L; /* a big enough prime */ + const __u64 hash_mult = 1125899906842597ULL; /* a big enough prime */ __u64 hash = 0; while (*str) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 5406e95f5d92..68ea8491c160 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -93,6 +93,43 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24) }; +#ifdef CONFIG_CIFS_SMB311 +static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen, + size_t hdr_preamble_size) +{ + __u16 neg_count; + __u32 nc_offset, size_of_pad_before_neg_ctxts; + struct smb2_negotiate_rsp *pneg_rsp = (struct smb2_negotiate_rsp *)hdr; + + /* Negotiate contexts are only valid for latest dialect SMB3.11 */ + neg_count = le16_to_cpu(pneg_rsp->NegotiateContextCount); + if ((neg_count == 0) || + (pneg_rsp->DialectRevision != cpu_to_le16(SMB311_PROT_ID))) + return 0; + + /* Make sure that negotiate contexts start after gss security blob */ + nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset); + if (nc_offset < non_ctxlen - hdr_preamble_size /* RFC1001 len */) { + printk_once(KERN_WARNING "invalid negotiate context offset\n"); + return 0; + } + size_of_pad_before_neg_ctxts = nc_offset - + (non_ctxlen - hdr_preamble_size); + + /* Verify that at least minimal negotiate contexts fit within frame */ + if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) { + printk_once(KERN_WARNING "negotiate context goes beyond end\n"); + return 0; + } + + cifs_dbg(FYI, "length of negcontexts %d pad %d\n", + len - nc_offset, size_of_pad_before_neg_ctxts); + + /* length of negcontexts including pad from end of sec blob to them */ + return (len - nc_offset) + size_of_pad_before_neg_ctxts; +} +#endif /* CIFS_SMB311 */ + int smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr) { @@ -198,6 +235,11 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr) clc_len = smb2_calc_size(hdr); +#ifdef CONFIG_CIFS_SMB311 + if (shdr->Command == SMB2_NEGOTIATE) + clc_len += get_neg_ctxt_len(hdr, len, clc_len, + srvr->vals->header_preamble_size); +#endif /* SMB311 */ if (srvr->vals->header_preamble_size + len != clc_len) { cifs_dbg(FYI, "Calculated size %u length %zu mismatch mid %llu\n", clc_len, srvr->vals->header_preamble_size + len, mid); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 968b1d43a1ea..b4ae932ea134 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1451,6 +1451,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct cifs_open_parms oparms; struct cifs_fid fid; + struct kvec err_iov = {NULL, 0}; struct smb2_err_rsp *err_buf = NULL; struct smb2_symlink_err_rsp *symlink; unsigned int sub_len; @@ -1473,15 +1474,16 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_buf); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov); if (!rc || !err_buf) { kfree(utf16_path); return -ENOENT; } + err_buf = err_iov.iov_base; if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) || - get_rfc1002_length(err_buf) + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE) { + err_iov.iov_len + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE) { kfree(utf16_path); return -ENOENT; } @@ -1494,13 +1496,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, print_len = le16_to_cpu(symlink->PrintNameLength); print_offset = le16_to_cpu(symlink->PrintNameOffset); - if (get_rfc1002_length(err_buf) + server->vals->header_preamble_size < + if (err_iov.iov_len + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) { kfree(utf16_path); return -ENOENT; } - if (get_rfc1002_length(err_buf) + server->vals->header_preamble_size < + if (err_iov.iov_len + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) { kfree(utf16_path); return -ENOENT; @@ -2550,7 +2552,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid) unsigned int npages; struct page **pages; unsigned int len; - unsigned int buflen = get_rfc1002_length(buf) + server->vals->header_preamble_size; + unsigned int buflen = server->pdu_size + server->vals->header_preamble_size; int rc; int i = 0; @@ -2624,7 +2626,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server, { int length; char *buf = server->smallbuf; - unsigned int pdu_length = get_rfc1002_length(buf); + unsigned int pdu_length = server->pdu_size; unsigned int buf_size; struct mid_q_entry *mid_entry; @@ -2668,7 +2670,7 @@ static int smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid) { char *buf = server->smallbuf; - unsigned int pdu_length = get_rfc1002_length(buf); + unsigned int pdu_length = server->pdu_size; struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf; unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize); @@ -2699,7 +2701,7 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid) { char *buf = server->large_buf ? server->bigbuf : server->smallbuf; - return handle_read_data(server, mid, buf, get_rfc1002_length(buf) + + return handle_read_data(server, mid, buf, server->pdu_size + server->vals->header_preamble_size, NULL, 0, 0); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index f7741cee2a4c..0f044c4a2dc9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -268,8 +268,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) mutex_unlock(&tcon->ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); - if (rc) + if (rc) { + /* If sess reconnected but tcon didn't, something strange ... */ + printk_once(KERN_WARNING "reconnect tcon failed rc = %d\n", rc); goto out; + } if (smb2_command != SMB2_INTERNAL_CMD) queue_delayed_work(cifsiod_wq, &server->reconnect, 0); @@ -403,6 +406,100 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += 4 + sizeof(struct smb2_preauth_neg_context) + sizeof(struct smb2_encryption_neg_context); } + +static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt) +{ + unsigned int len = le16_to_cpu(ctxt->DataLength); + + /* If invalid preauth context warn but use what we requested, SHA-512 */ + if (len < MIN_PREAUTH_CTXT_DATA_LEN) { + printk_once(KERN_WARNING "server sent bad preauth context\n"); + return; + } + if (le16_to_cpu(ctxt->HashAlgorithmCount) != 1) + printk_once(KERN_WARNING "illegal SMB3 hash algorithm count\n"); + if (ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512) + printk_once(KERN_WARNING "unknown SMB3 hash algorithm\n"); +} + +static int decode_encrypt_ctx(struct TCP_Server_Info *server, + struct smb2_encryption_neg_context *ctxt) +{ + unsigned int len = le16_to_cpu(ctxt->DataLength); + + cifs_dbg(FYI, "decode SMB3.11 encryption neg context of len %d\n", len); + if (len < MIN_ENCRYPT_CTXT_DATA_LEN) { + printk_once(KERN_WARNING "server sent bad crypto ctxt len\n"); + return -EINVAL; + } + + if (le16_to_cpu(ctxt->CipherCount) != 1) { + printk_once(KERN_WARNING "illegal SMB3.11 cipher count\n"); + return -EINVAL; + } + cifs_dbg(FYI, "SMB311 cipher type:%d\n", le16_to_cpu(ctxt->Ciphers[0])); + if ((ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES128_CCM) && + (ctxt->Ciphers[0] != SMB2_ENCRYPTION_AES128_GCM)) { + printk_once(KERN_WARNING "invalid SMB3.11 cipher returned\n"); + return -EINVAL; + } + server->cipher_type = ctxt->Ciphers[0]; + return 0; +} + +static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp, + struct TCP_Server_Info *server) +{ + struct smb2_neg_context *pctx; + unsigned int offset = le32_to_cpu(rsp->NegotiateContextOffset); + unsigned int ctxt_cnt = le16_to_cpu(rsp->NegotiateContextCount); + unsigned int len_of_smb = be32_to_cpu(rsp->hdr.smb2_buf_length); + unsigned int len_of_ctxts, i; + int rc = 0; + + cifs_dbg(FYI, "decoding %d negotiate contexts\n", ctxt_cnt); + if (len_of_smb <= offset) { + cifs_dbg(VFS, "Invalid response: negotiate context offset\n"); + return -EINVAL; + } + + len_of_ctxts = len_of_smb - offset; + + for (i = 0; i < ctxt_cnt; i++) { + int clen; + /* check that offset is not beyond end of SMB */ + if (len_of_ctxts == 0) + break; + + if (len_of_ctxts < sizeof(struct smb2_neg_context)) + break; + + pctx = (struct smb2_neg_context *)(offset + + server->vals->header_preamble_size + (char *)rsp); + clen = le16_to_cpu(pctx->DataLength); + if (clen > len_of_ctxts) + break; + + if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) + decode_preauth_context( + (struct smb2_preauth_neg_context *)pctx); + else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) + rc = decode_encrypt_ctx(server, + (struct smb2_encryption_neg_context *)pctx); + else + cifs_dbg(VFS, "unknown negcontext of type %d ignored\n", + le16_to_cpu(pctx->ContextType)); + + if (rc) + break; + /* offsets must be 8 byte aligned */ + clen = (clen + 7) & ~0x7; + offset += clen + sizeof(struct smb2_neg_context); + len_of_ctxts -= clen; + } + return rc; +} + #else static void assemble_neg_contexts(struct smb2_negotiate_req *req, unsigned int *total_len) @@ -616,6 +713,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) else if (rc == 0) rc = -EIO; } + +#ifdef CONFIG_CIFS_SMB311 + if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { + if (rsp->NegotiateContextCount) + rc = smb311_decode_neg_context(rsp, server); + else + cifs_dbg(VFS, "Missing expected negotiate contexts\n"); + } +#endif /* CONFIG_CIFS_SMB311 */ neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; @@ -1026,7 +1132,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) if (rc) goto out; - if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != + if (offsetof(struct smb2_sess_setup_rsp, Buffer) - ses->server->vals->header_preamble_size != le16_to_cpu(rsp->SecurityBufferOffset)) { cifs_dbg(VFS, "Invalid security buffer offset %d\n", le16_to_cpu(rsp->SecurityBufferOffset)); @@ -1701,7 +1807,7 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, struct smb2_file_all_info *buf, - struct smb2_err_rsp **err_buf) + struct kvec *err_iov) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; @@ -1841,9 +1947,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc != 0) { cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); - if (err_buf && rsp) - *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4, - GFP_KERNEL); + if (err_iov && rsp) { + *err_iov = rsp_iov; + resp_buftype = CIFS_NO_BUFFER; + rsp = NULL; + } goto creat_exit; } @@ -2098,13 +2206,13 @@ close_exit: } static int -validate_buf(unsigned int offset, unsigned int buffer_length, - struct smb2_hdr *hdr, unsigned int min_buf_size) - +validate_iov(struct TCP_Server_Info *server, + unsigned int offset, unsigned int buffer_length, + struct kvec *iov, unsigned int min_buf_size) { - unsigned int smb_len = be32_to_cpu(hdr->smb2_buf_length); - char *end_of_smb = smb_len + 4 /* RFC1001 length field */ + (char *)hdr; - char *begin_of_buf = 4 /* RFC1001 len field */ + offset + (char *)hdr; + unsigned int smb_len = iov->iov_len; + char *end_of_smb = smb_len + server->vals->header_preamble_size + (char *)iov->iov_base; + char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)iov->iov_base; char *end_of_buf = begin_of_buf + buffer_length; @@ -2134,18 +2242,18 @@ validate_buf(unsigned int offset, unsigned int buffer_length, * Caller must free buffer. */ static int -validate_and_copy_buf(unsigned int offset, unsigned int buffer_length, - struct smb2_hdr *hdr, unsigned int minbufsize, +validate_and_copy_iov(struct TCP_Server_Info *server, + unsigned int offset, unsigned int buffer_length, + struct kvec *iov, unsigned int minbufsize, char *data) - { - char *begin_of_buf = 4 /* RFC1001 len field */ + offset + (char *)hdr; + char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)(iov->iov_base); int rc; if (!data) return -EINVAL; - rc = validate_buf(offset, buffer_length, hdr, minbufsize); + rc = validate_iov(server, offset, buffer_length, iov, minbufsize); if (rc) return rc; @@ -2223,9 +2331,10 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, } } - rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), + rc = validate_and_copy_iov(ses->server, + le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), - &rsp->hdr, min_len, *data); + &rsp_iov, min_len, *data); qinf_exit: free_rsp_buf(resp_buftype, rsp); @@ -3146,8 +3255,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, goto qdir_exit; } - rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), - le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, + rc = validate_iov(server, + le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, info_buf_size); if (rc) goto qdir_exit; @@ -3454,7 +3564,7 @@ static int build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, int outbuf_len, u64 persistent_fid, u64 volatile_fid) { - struct TCP_Server_Info *server = tcon->ses->server; + struct TCP_Server_Info *server; int rc; struct smb2_query_info_req *req; unsigned int total_len; @@ -3464,6 +3574,8 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) return -EIO; + server = tcon->ses->server; + rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req, &total_len); if (rc) @@ -3517,8 +3629,9 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, info = (struct smb2_fs_full_size_info *)(server->vals->header_preamble_size + le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr); - rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset), - le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr, + rc = validate_iov(server, + le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, sizeof(struct smb2_fs_full_size_info)); if (!rc) copy_fs_info_to_kstatfs(info, fsdata); @@ -3574,7 +3687,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, rsp_len = le32_to_cpu(rsp->OutputBufferLength); offset = le16_to_cpu(rsp->OutputBufferOffset); - rc = validate_buf(offset, rsp_len, &rsp->hdr, min_len); + rc = validate_iov(server, offset, rsp_len, &rsp_iov, min_len); if (rc) goto qfsattr_exit; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 253e2c7c952f..6093e5142b2b 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -263,11 +263,19 @@ struct smb2_negotiate_req { #define SMB2_NT_FIND 0x00100000 #define SMB2_LARGE_FILES 0x00200000 +struct smb2_neg_context { + __le16 ContextType; + __le16 DataLength; + __le32 Reserved; + /* Followed by array of data */ +} __packed; + #define SMB311_SALT_SIZE 32 /* Hash Algorithm Types */ #define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001) #define SMB2_PREAUTH_HASH_SIZE 64 +#define MIN_PREAUTH_CTXT_DATA_LEN (SMB311_SALT_SIZE + 6) struct smb2_preauth_neg_context { __le16 ContextType; /* 1 */ __le16 DataLength; @@ -282,6 +290,8 @@ struct smb2_preauth_neg_context { #define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001) #define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002) +/* Min encrypt context data is one cipher so 2 bytes + 2 byte count field */ +#define MIN_ENCRYPT_CTXT_DATA_LEN 4 struct smb2_encryption_neg_context { __le16 ContextType; /* 2 */ __le16 DataLength; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index cbcce3f7e86f..8ba24a95db71 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -122,7 +122,7 @@ extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, struct smb2_file_all_info *buf, - struct smb2_err_rsp **err_buf); + struct kvec *err_iov); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, u32 indatalen, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index bf49cb73b9e6..8806f3f76c1d 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -604,7 +604,7 @@ int smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error) { - unsigned int len = get_rfc1002_length(mid->resp_buf); + unsigned int len = mid->resp_buf_size; struct kvec iov[2]; struct smb_rqst rqst = { .rq_iov = iov, .rq_nvec = 2 }; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 279718dcb2ed..8f6f25918229 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -790,7 +790,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, buf = (char *)midQ->resp_buf; resp_iov->iov_base = buf; - resp_iov->iov_len = get_rfc1002_length(buf) + + resp_iov->iov_len = midQ->resp_buf_size + ses->server->vals->header_preamble_size; if (midQ->large_buf) *resp_buf_type = CIFS_LARGE_BUFFER; @@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table); #define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) -static unsigned long dax_radix_sector(void *entry) +static unsigned long dax_radix_pfn(void *entry) { return (unsigned long)entry >> RADIX_DAX_SHIFT; } -static void *dax_radix_locked_entry(sector_t sector, unsigned long flags) +static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) { return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | - ((unsigned long)sector << RADIX_DAX_SHIFT) | - RADIX_DAX_ENTRY_LOCK); + (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); } static unsigned int dax_radix_order(void *entry) @@ -159,11 +158,9 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo } /* - * We do not necessarily hold the mapping->tree_lock when we call this - * function so it is possible that 'entry' is no longer a valid item in the - * radix tree. This is okay because all we really need to do is to find the - * correct waitqueue where tasks might be waiting for that old 'entry' and - * wake them. + * @entry may no longer be the entry at the index in the mapping. + * The important information it's conveying is whether the entry at + * this index used to be a PMD entry. */ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all) @@ -175,7 +172,7 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, /* * Checking for locked entry and prepare_to_wait_exclusive() happens - * under mapping->tree_lock, ditto for entry handling in our callers. + * under the i_pages lock, ditto for entry handling in our callers. * So at this point all tasks that could have seen our entry locked * must be in the waitqueue and the following check will see them. */ @@ -184,41 +181,39 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, } /* - * Check whether the given slot is locked. The function must be called with - * mapping->tree_lock held + * Check whether the given slot is locked. Must be called with the i_pages + * lock held. */ static inline int slot_locked(struct address_space *mapping, void **slot) { unsigned long entry = (unsigned long) - radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); return entry & RADIX_DAX_ENTRY_LOCK; } /* - * Mark the given slot is locked. The function must be called with - * mapping->tree_lock held + * Mark the given slot as locked. Must be called with the i_pages lock held. */ static inline void *lock_slot(struct address_space *mapping, void **slot) { unsigned long entry = (unsigned long) - radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); entry |= RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); + radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); return (void *)entry; } /* - * Mark the given slot is unlocked. The function must be called with - * mapping->tree_lock held + * Mark the given slot as unlocked. Must be called with the i_pages lock held. */ static inline void *unlock_slot(struct address_space *mapping, void **slot) { unsigned long entry = (unsigned long) - radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); + radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); return (void *)entry; } @@ -229,7 +224,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) * put_locked_mapping_entry() when he locked the entry and now wants to * unlock it. * - * The function must be called with mapping->tree_lock held. + * Must be called with the i_pages lock held. */ static void *get_unlocked_mapping_entry(struct address_space *mapping, pgoff_t index, void ***slotp) @@ -242,7 +237,7 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, ewait.wait.func = wake_exceptional_entry_func; for (;;) { - entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, + entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || @@ -255,10 +250,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); schedule(); finish_wait(wq, &ewait.wait); - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); } } @@ -267,15 +262,15 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, { void *entry, **slot; - spin_lock_irq(&mapping->tree_lock); - entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot); + xa_lock_irq(&mapping->i_pages); + entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || !slot_locked(mapping, slot))) { - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return; } unlock_slot(mapping, slot); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); dax_wake_mapping_entry_waiter(mapping, index, entry, false); } @@ -299,6 +294,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping, dax_wake_mapping_entry_waiter(mapping, index, entry, false); } +static unsigned long dax_entry_size(void *entry) +{ + if (dax_is_zero_entry(entry)) + return 0; + else if (dax_is_empty_entry(entry)) + return 0; + else if (dax_is_pmd_entry(entry)) + return PMD_SIZE; + else + return PAGE_SIZE; +} + +static unsigned long dax_radix_end_pfn(void *entry) +{ + return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; +} + +/* + * Iterate through all mapped pfns represented by an entry, i.e. skip + * 'empty' and 'zero' entries. + */ +#define for_each_mapped_pfn(entry, pfn) \ + for (pfn = dax_radix_pfn(entry); \ + pfn < dax_radix_end_pfn(entry); pfn++) + +static void dax_associate_entry(void *entry, struct address_space *mapping) +{ + unsigned long pfn; + + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) + return; + + for_each_mapped_pfn(entry, pfn) { + struct page *page = pfn_to_page(pfn); + + WARN_ON_ONCE(page->mapping); + page->mapping = mapping; + } +} + +static void dax_disassociate_entry(void *entry, struct address_space *mapping, + bool trunc) +{ + unsigned long pfn; + + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) + return; + + for_each_mapped_pfn(entry, pfn) { + struct page *page = pfn_to_page(pfn); + + WARN_ON_ONCE(trunc && page_ref_count(page) > 1); + WARN_ON_ONCE(page->mapping && page->mapping != mapping); + page->mapping = NULL; + } +} + /* * Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't @@ -332,7 +384,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, void *entry, **slot; restart: - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, &slot); if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { @@ -364,12 +416,12 @@ restart: if (pmd_downgrade) { /* * Make sure 'entry' remains valid while we drop - * mapping->tree_lock. + * the i_pages lock. */ entry = lock_slot(mapping, slot); } - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); /* * Besides huge zero pages the only other thing that gets * downgraded are empty entries which don't need to be @@ -386,26 +438,27 @@ restart: put_locked_mapping_entry(mapping, index); return ERR_PTR(err); } - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); if (!entry) { /* - * We needed to drop the page_tree lock while calling + * We needed to drop the i_pages lock while calling * radix_tree_preload() and we didn't have an entry to * lock. See if another thread inserted an entry at * our index during this time. */ - entry = __radix_tree_lookup(&mapping->page_tree, index, + entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (entry) { radix_tree_preload_end(); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); goto restart; } } if (pmd_downgrade) { - radix_tree_delete(&mapping->page_tree, index); + dax_disassociate_entry(entry, mapping, false); + radix_tree_delete(&mapping->i_pages, index); mapping->nrexceptional--; dax_wake_mapping_entry_waiter(mapping, index, entry, true); @@ -413,11 +466,11 @@ restart: entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); - err = __radix_tree_insert(&mapping->page_tree, index, + err = __radix_tree_insert(&mapping->i_pages, index, dax_radix_order(entry), entry); radix_tree_preload_end(); if (err) { - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); /* * Our insertion of a DAX entry failed, most likely * because we were inserting a PMD entry and it @@ -430,12 +483,12 @@ restart: } /* Good, we have inserted empty locked entry into the tree. */ mapping->nrexceptional++; - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return entry; } entry = lock_slot(mapping, slot); out_unlock: - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return entry; } @@ -444,22 +497,23 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping, { int ret = 0; void *entry; - struct radix_tree_root *page_tree = &mapping->page_tree; + struct radix_tree_root *pages = &mapping->i_pages; - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) goto out; if (!trunc && - (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || - radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) + (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))) goto out; - radix_tree_delete(page_tree, index); + dax_disassociate_entry(entry, mapping, trunc); + radix_tree_delete(pages, index); mapping->nrexceptional--; ret = 1; out: put_unlocked_mapping_entry(mapping, index, entry); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(pages); return ret; } /* @@ -526,12 +580,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, */ static void *dax_insert_mapping_entry(struct address_space *mapping, struct vm_fault *vmf, - void *entry, sector_t sector, + void *entry, pfn_t pfn_t, unsigned long flags, bool dirty) { - struct radix_tree_root *page_tree = &mapping->page_tree; - void *new_entry; + struct radix_tree_root *pages = &mapping->i_pages; + unsigned long pfn = pfn_t_to_pfn(pfn_t); pgoff_t index = vmf->pgoff; + void *new_entry; if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); @@ -545,8 +600,12 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, unmap_mapping_pages(mapping, vmf->pgoff, 1, false); } - spin_lock_irq(&mapping->tree_lock); - new_entry = dax_radix_locked_entry(sector, flags); + xa_lock_irq(pages); + new_entry = dax_radix_locked_entry(pfn, flags); + if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + dax_disassociate_entry(entry, mapping, false); + dax_associate_entry(new_entry, mapping); + } if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* @@ -561,17 +620,17 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, void **slot; void *ret; - ret = __radix_tree_lookup(page_tree, index, &node, &slot); + ret = __radix_tree_lookup(pages, index, &node, &slot); WARN_ON_ONCE(ret != entry); - __radix_tree_replace(page_tree, node, slot, + __radix_tree_replace(pages, node, slot, new_entry, NULL); entry = new_entry; } if (dirty) - radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); + radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(pages); return entry; } @@ -657,17 +716,14 @@ unlock_pte: i_mmap_unlock_read(mapping); } -static int dax_writeback_one(struct block_device *bdev, - struct dax_device *dax_dev, struct address_space *mapping, - pgoff_t index, void *entry) +static int dax_writeback_one(struct dax_device *dax_dev, + struct address_space *mapping, pgoff_t index, void *entry) { - struct radix_tree_root *page_tree = &mapping->page_tree; - void *entry2, **slot, *kaddr; - long ret = 0, id; - sector_t sector; - pgoff_t pgoff; + struct radix_tree_root *pages = &mapping->i_pages; + void *entry2, **slot; + unsigned long pfn; + long ret = 0; size_t size; - pfn_t pfn; /* * A page got tagged dirty in DAX mapping? Something is seriously @@ -676,17 +732,17 @@ static int dax_writeback_one(struct block_device *bdev, if (WARN_ON(!radix_tree_exceptional_entry(entry))) return -EIO; - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(pages); entry2 = get_unlocked_mapping_entry(mapping, index, &slot); /* Entry got punched out / reallocated? */ if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) goto put_unlocked; /* * Entry got reallocated elsewhere? No need to writeback. We have to - * compare sectors as we must not bail out due to difference in lockbit + * compare pfns as we must not bail out due to difference in lockbit * or entry type. */ - if (dax_radix_sector(entry2) != dax_radix_sector(entry)) + if (dax_radix_pfn(entry2) != dax_radix_pfn(entry)) goto put_unlocked; if (WARN_ON_ONCE(dax_is_empty_entry(entry) || dax_is_zero_entry(entry))) { @@ -695,7 +751,7 @@ static int dax_writeback_one(struct block_device *bdev, } /* Another fsync thread may have already written back this entry */ - if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) + if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)) goto put_unlocked; /* Lock the entry to serialize with page faults */ entry = lock_slot(mapping, slot); @@ -703,60 +759,40 @@ static int dax_writeback_one(struct block_device *bdev, * We can clear the tag now but we have to be careful so that concurrent * dax_writeback_one() calls for the same index cannot finish before we * actually flush the caches. This is achieved as the calls will look - * at the entry only under tree_lock and once they do that they will - * see the entry locked and wait for it to unlock. + * at the entry only under the i_pages lock and once they do that + * they will see the entry locked and wait for it to unlock. */ - radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE); - spin_unlock_irq(&mapping->tree_lock); + radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE); + xa_unlock_irq(pages); /* * Even if dax_writeback_mapping_range() was given a wbc->range_start * in the middle of a PMD, the 'index' we are given will be aligned to - * the start index of the PMD, as will the sector we pull from - * 'entry'. This allows us to flush for PMD_SIZE and not have to - * worry about partial PMD writebacks. + * the start index of the PMD, as will the pfn we pull from 'entry'. + * This allows us to flush for PMD_SIZE and not have to worry about + * partial PMD writebacks. */ - sector = dax_radix_sector(entry); + pfn = dax_radix_pfn(entry); size = PAGE_SIZE << dax_radix_order(entry); - id = dax_read_lock(); - ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); - if (ret) - goto dax_unlock; - - /* - * dax_direct_access() may sleep, so cannot hold tree_lock over - * its invocation. - */ - ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn); - if (ret < 0) - goto dax_unlock; - - if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) { - ret = -EIO; - goto dax_unlock; - } - - dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); - dax_flush(dax_dev, kaddr, size); + dax_mapping_entry_mkclean(mapping, index, pfn); + dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); /* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as * the pfn mappings are writeprotected and fault waits for mapping * entry lock. */ - spin_lock_irq(&mapping->tree_lock); - radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&mapping->tree_lock); + xa_lock_irq(pages); + radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY); + xa_unlock_irq(pages); trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); - dax_unlock: - dax_read_unlock(id); put_locked_mapping_entry(mapping, index); return ret; put_unlocked: put_unlocked_mapping_entry(mapping, index, entry2); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(pages); return ret; } @@ -808,8 +844,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, break; } - ret = dax_writeback_one(bdev, dax_dev, mapping, - indices[i], pvec.pages[i]); + ret = dax_writeback_one(dax_dev, mapping, indices[i], + pvec.pages[i]); if (ret < 0) { mapping_set_error(mapping, ret); goto out; @@ -877,6 +913,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry, int ret = VM_FAULT_NOPAGE; struct page *zero_page; void *entry2; + pfn_t pfn; zero_page = ZERO_PAGE(0); if (unlikely(!zero_page)) { @@ -884,14 +921,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry, goto out; } - entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, + pfn = page_to_pfn_t(zero_page); + entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(entry2)) { ret = VM_FAULT_SIGBUS; goto out; } - vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page)); + vm_insert_mixed(vmf->vma, vaddr, pfn); out: trace_dax_load_hole(inode, vmf, ret); return ret; @@ -1200,8 +1238,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, if (error < 0) goto error_finish_iomap; - entry = dax_insert_mapping_entry(mapping, vmf, entry, - dax_iomap_sector(&iomap, pos), + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 0, write && !sync); if (IS_ERR(entry)) { error = PTR_ERR(entry); @@ -1280,13 +1317,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, void *ret = NULL; spinlock_t *ptl; pmd_t pmd_entry; + pfn_t pfn; zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); if (unlikely(!zero_page)) goto fallback; - ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, + pfn = page_to_pfn_t(zero_page); + ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(ret)) goto fallback; @@ -1409,8 +1448,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, if (error < 0) goto finish_iomap; - entry = dax_insert_mapping_entry(mapping, vmf, entry, - dax_iomap_sector(&iomap, pos), + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD, write && !sync); if (IS_ERR(entry)) goto finish_iomap; @@ -1524,21 +1562,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, pgoff_t index = vmf->pgoff; int vmf_ret, error; - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, &slot); /* Did we race with someone splitting entry or so? */ if (!entry || (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { put_unlocked_mapping_entry(mapping, index, entry); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } - radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); + radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY); entry = lock_slot(mapping, slot); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); switch (pe_size) { case PE_SIZE_PTE: error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); diff --git a/fs/dcache.c b/fs/dcache.c index 593079176123..86d2de63461e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -257,11 +257,25 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } +static void __d_free_external_name(struct rcu_head *head) +{ + struct external_name *name = container_of(head, struct external_name, + u.head); + + mod_node_page_state(page_pgdat(virt_to_page(name)), + NR_INDIRECTLY_RECLAIMABLE_BYTES, + -ksize(name)); + + kfree(name); +} + static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - kfree(external_name(dentry)); - kmem_cache_free(dentry_cache, dentry); + + __d_free_external_name(&external_name(dentry)->u.head); + + kmem_cache_free(dentry_cache, dentry); } static inline int dname_external(const struct dentry *dentry) @@ -291,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name) struct external_name *p; p = container_of(name->name, struct external_name, name[0]); if (unlikely(atomic_dec_and_test(&p->u.count))) - kfree_rcu(p, u.head); + call_rcu(&p->u.head, __d_free_external_name); } } EXPORT_SYMBOL(release_dentry_name_snapshot); @@ -1038,6 +1052,8 @@ static void shrink_dentry_list(struct list_head *list) while (!list_empty(list)) { struct dentry *dentry, *parent; + cond_resched(); + dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); rcu_read_lock(); @@ -1191,7 +1207,6 @@ void shrink_dcache_sb(struct super_block *sb) this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); - cond_resched(); } while (list_lru_count(&sb->s_dentry_lru) > 0); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -1473,7 +1488,6 @@ void shrink_dcache_parent(struct dentry *parent) break; shrink_dentry_list(&data.dispose); - cond_resched(); } } EXPORT_SYMBOL(shrink_dcache_parent); @@ -1600,7 +1614,6 @@ void d_invalidate(struct dentry *dentry) detach_mounts(data.mountpoint); dput(data.mountpoint); } - cond_resched(); } } EXPORT_SYMBOL(d_invalidate); @@ -1617,6 +1630,7 @@ EXPORT_SYMBOL(d_invalidate); struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { + struct external_name *ext = NULL; struct dentry *dentry; char *dname; int err; @@ -1637,14 +1651,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dname = dentry->d_iname; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); - struct external_name *p = kmalloc(size + name->len, - GFP_KERNEL_ACCOUNT); - if (!p) { + + ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); + if (!ext) { kmem_cache_free(dentry_cache, dentry); return NULL; } - atomic_set(&p->u.count, 1); - dname = p->name; + atomic_set(&ext->u.count, 1); + dname = ext->name; } else { dname = dentry->d_iname; } @@ -1683,6 +1697,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } } + if (unlikely(ext)) { + pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); + mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, + ksize(ext)); + } + this_cpu_inc(nr_dentry); return dentry; @@ -2770,7 +2790,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target) dentry->d_name.hash_len = target->d_name.hash_len; } if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) - kfree_rcu(old_name, u.head); + call_rcu(&old_name->u.head, __d_free_external_name); } /* diff --git a/fs/exec.c b/fs/exec.c index a919a827d181..183059c427b9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ limit = _STK_LIM / 4 * 3; - limit = min(limit, rlimit(RLIMIT_STACK) / 4); + limit = min(limit, bprm->rlim_stack.rlim_cur / 4); if (size > limit) goto fail; } @@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm) if (!mm) goto err; + /* Save current stack limit for all calculations made during exec. */ + task_lock(current->group_leader); + bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK]; + task_unlock(current->group_leader); + err = __bprm_mm_init(bprm); if (err) goto err; @@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm, #ifdef CONFIG_STACK_GROWSUP /* Limit stack size */ - stack_base = rlimit_max(RLIMIT_STACK); + stack_base = bprm->rlim_stack.rlim_max; if (stack_base > STACK_SIZE_MAX) stack_base = STACK_SIZE_MAX; @@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm, * Align this down to a page boundary as expand_stack * will align it up. */ - rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; + rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; #ifdef CONFIG_STACK_GROWSUP if (stack_size + stack_expand > rlim_stack) stack_base = vma->vm_start + rlim_stack; @@ -1341,11 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm) * RLIMIT_STACK, but after the point of no return to avoid * needing to clean up the change on failure. */ - if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) - current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; + if (bprm->rlim_stack.rlim_cur > _STK_LIM) + bprm->rlim_stack.rlim_cur = _STK_LIM; } - arch_pick_mmap_layout(current->mm); + arch_pick_mmap_layout(current->mm, &bprm->rlim_stack); current->sas_ss_sp = current->sas_ss_size = 0; @@ -1378,6 +1383,16 @@ void setup_new_exec(struct linux_binprm * bprm) } EXPORT_SYMBOL(setup_new_exec); +/* Runs immediately before start_thread() takes over. */ +void finalize_exec(struct linux_binprm *bprm) +{ + /* Store any stack rlimit changes before starting thread. */ + task_lock(current->group_leader); + current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack; + task_unlock(current->group_leader); +} +EXPORT_SYMBOL(finalize_exec); + /* * Prepare credentials and lock ->cred_guard_mutex. * install_exec_creds() commits the new creds and drops the lock. diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 329a5d103846..645158dc33f1 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -435,6 +435,15 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, if (IS_ERR_OR_NULL(result)) return ERR_PTR(-ESTALE); + /* + * If no acceptance criteria was specified by caller, a disconnected + * dentry is also accepatable. Callers may use this mode to query if + * file handle is stale or to get a reference to an inode without + * risking the high overhead caused by directory reconnect. + */ + if (!acceptable) + return result; + if (d_is_dir(result)) { /* * This request is for a directory. diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 032295e1d386..cc40802ddfa8 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; /* inode.c */ +extern void ext2_set_file_ops(struct inode *inode); extern const struct address_space_operations ext2_aops; extern const struct address_space_operations ext2_nobh_aops; extern const struct iomap_ops ext2_iomap_ops; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 9b2ac55ac34f..1e01fabef130 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t offset = iocb->ki_pos; ssize_t ret; - if (WARN_ON_ONCE(IS_DAX(inode))) - return -EIO; - ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); if (ret < 0 && iov_iter_rw(iter) == WRITE) ext2_write_failed(mapping, offset + count); @@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static int ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { -#ifdef CONFIG_FS_DAX - if (dax_mapping(mapping)) { - return dax_writeback_mapping_range(mapping, - mapping->host->i_sb->s_bdev, - wbc); - } -#endif - return mpage_writepages(mapping, wbc, ext2_get_block); } +static int +ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + return dax_writeback_mapping_range(mapping, + mapping->host->i_sb->s_bdev, wbc); +} + const struct address_space_operations ext2_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, @@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = { .error_remove_page = generic_error_remove_page, }; +static const struct address_space_operations ext2_dax_aops = { + .writepages = ext2_dax_writepages, + .direct_IO = noop_direct_IO, + .set_page_dirty = noop_set_page_dirty, + .invalidatepage = noop_invalidatepage, +}; + /* * Probably it should be a library function... search for first non-zero word * or memcmp with zero_page, whatever is better for particular architecture. @@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_DAX; } +void ext2_set_file_ops(struct inode *inode) +{ + inode->i_op = &ext2_file_inode_operations; + inode->i_fop = &ext2_file_operations; + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &ext2_dax_aops; + else if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; +} + struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; @@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) ei->i_data[n] = raw_inode->i_block[n]; if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e078075dc66f..55f7caadb093 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); mark_inode_dirty(inode); return ext2_add_nondir(dentry, inode); } @@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); mark_inode_dirty(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 129205028300..1e50c5efae67 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2716,12 +2716,6 @@ static int ext4_writepages(struct address_space *mapping, percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); - if (dax_mapping(mapping)) { - ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, - wbc); - goto out_writepages; - } - /* * No pages to write? This is mainly a kludge to avoid starting * a transaction for special inodes like journal inode on last iput() @@ -2942,6 +2936,27 @@ out_writepages: return ret; } +static int ext4_dax_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + int ret; + long nr_to_write = wbc->nr_to_write; + struct inode *inode = mapping->host; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + + percpu_down_read(&sbi->s_journal_flag_rwsem); + trace_ext4_writepages(inode, wbc); + + ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc); + trace_ext4_writepages_result(inode, wbc, ret, + nr_to_write - wbc->nr_to_write); + percpu_up_read(&sbi->s_journal_flag_rwsem); + return ret; +} + static int ext4_nonda_switch(struct super_block *sb) { s64 free_clusters, dirty_clusters; @@ -3845,10 +3860,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (ext4_has_inline_data(inode)) return 0; - /* DAX uses iomap path now */ - if (WARN_ON_ONCE(IS_DAX(inode))) - return 0; - trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == READ) ret = ext4_direct_IO_read(iocb, iter); @@ -3934,6 +3945,13 @@ static const struct address_space_operations ext4_da_aops = { .error_remove_page = generic_error_remove_page, }; +static const struct address_space_operations ext4_dax_aops = { + .writepages = ext4_dax_writepages, + .direct_IO = noop_direct_IO, + .set_page_dirty = noop_set_page_dirty, + .invalidatepage = noop_invalidatepage, +}; + void ext4_set_aops(struct inode *inode) { switch (ext4_inode_journal_mode(inode)) { @@ -3946,7 +3964,9 @@ void ext4_set_aops(struct inode *inode) default: BUG(); } - if (test_opt(inode->i_sb, DELALLOC)) + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &ext4_dax_aops; + else if (test_opt(inode->i_sb, DELALLOC)) inode->i_mapping->a_ops = &ext4_da_aops; else inode->i_mapping->a_ops = &ext4_aops; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index db50686f5096..02237d4d91f5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page) SetPageDirty(page); spin_unlock(&mapping->private_lock); - spin_lock_irqsave(&mapping->tree_lock, flags); + xa_lock_irqsave(&mapping->i_pages, flags); WARN_ON_ONCE(!PageUptodate(page)); account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->page_tree, + radix_tree_tag_set(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + xa_unlock_irqrestore(&mapping->i_pages, flags); unlock_page_memcg(page); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fe661274ff10..8c9c2f31b253 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (bit_pos == NR_DENTRY_IN_BLOCK && !truncate_hole(dir, page->index, page->index + 1)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, page_index(page), + xa_lock_irqsave(&mapping->i_pages, flags); + radix_tree_tag_clear(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + xa_unlock_irqrestore(&mapping->i_pages, flags); clear_page_dirty_for_io(page); ClearPagePrivate(page); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bfb7a4a3a929..9327411fd93b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1015,7 +1015,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, unsigned int init_segno = segno; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), - .iroot = RADIX_TREE_INIT(GFP_NOFS), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; trace_f2fs_gc_begin(sbi->sb, sync, background, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3b77d6421218..265da200daa8 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, page_index(page), + xa_lock_irqsave(&mapping->i_pages, flags); + radix_tree_tag_clear(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + xa_unlock_irqrestore(&mapping->i_pages, flags); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9a99243054ba..f202398e20ea 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page) unsigned int long flags; if (PageDirty(page)) { - spin_lock_irqsave(&mapping->tree_lock, flags); - radix_tree_tag_clear(&mapping->page_tree, + xa_lock_irqsave(&mapping->i_pages, flags); + radix_tree_tag_clear(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + xa_unlock_irqrestore(&mapping->i_pages, flags); clear_page_dirty_for_io(page); dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); @@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) f2fs_bug_on(sbi, check_nid_range(sbi, nid)); rcu_read_lock(); - apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); + apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid); rcu_read_unlock(); if (apage) return; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1280f915079b..4b12ba70a895 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) * By the time control reaches here, RCU grace period has passed * since I_WB_SWITCH assertion and all wb stat update transactions * between unlocked_inode_to_wb_begin/end() are guaranteed to be - * synchronizing against mapping->tree_lock. + * synchronizing against the i_pages lock. * - * Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock + * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock * gives us exclusion against all wb related operations on @inode * including IO list manipulations and stat updates. */ @@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); } spin_lock(&inode->i_lock); - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); /* * Once I_FREEING is visible under i_lock, the eviction path owns @@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) /* * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to - * pages actually under underwriteback. + * pages actually under writeback. */ - radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0, + radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, PAGECACHE_TAG_DIRTY) { struct page *page = radix_tree_deref_slot_protected(slot, - &mapping->tree_lock); + &mapping->i_pages.xa_lock); if (likely(page) && PageDirty(page)) { dec_wb_stat(old_wb, WB_RECLAIMABLE); inc_wb_stat(new_wb, WB_RECLAIMABLE); } } - radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0, + radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, PAGECACHE_TAG_WRITEBACK) { struct page *page = radix_tree_deref_slot_protected(slot, - &mapping->tree_lock); + &mapping->i_pages.xa_lock); if (likely(page)) { WARN_ON_ONCE(!PageWriteback(page)); dec_wb_stat(old_wb, WB_WRITEBACK); @@ -430,7 +430,7 @@ skip_switch: */ smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); spin_unlock(&inode->i_lock); spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); @@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* * In addition to synchronizing among switchers, I_WB_SWITCH tells - * the RCU protected stat update paths to grab the mapping's - * tree_lock so that stat transfer can synchronize against them. + * the RCU protected stat update paths to grab the i_page + * lock so that stat transfer can synchronize against them. * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 7dc55b93a830..97137d7ec5ee 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -832,7 +832,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, /* Clear pointers back to the netfs */ cookie->netfs_data = NULL; cookie->def = NULL; - BUG_ON(cookie->stores.rnode); + BUG_ON(!radix_tree_empty(&cookie->stores)); if (cookie->parent) { ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 1085ca12e25c..20e0d0a4dc8c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -973,7 +973,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj * retire the object instead. */ if (!fscache_use_cookie(object)) { - ASSERT(object->cookie->stores.rnode == NULL); + ASSERT(radix_tree_empty(&object->cookie->stores)); set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); _leave(" [no cookie]"); return transit_to(KILL_OBJECT); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 685c305cbeb6..278ed0869c3c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1744,7 +1744,7 @@ do_grow_qunlock: * @newsize: the size to make the file * * The file size can grow, shrink, or stay the same size. This - * is called holding i_mutex and an exclusive glock on the inode + * is called holding i_rwsem and an exclusive glock on the inode * in question. * * Returns: errno diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 82fb5583445c..097bd3c0f270 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1923,28 +1923,37 @@ void gfs2_glock_exit(void) static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) { - if (n == 0) - gi->gl = rhashtable_walk_peek(&gi->hti); - else { - gi->gl = rhashtable_walk_next(&gi->hti); - n--; + struct gfs2_glock *gl = gi->gl; + + if (gl) { + if (n == 0) + return; + if (!lockref_put_not_zero(&gl->gl_lockref)) + gfs2_glock_queue_put(gl); } for (;;) { - if (IS_ERR_OR_NULL(gi->gl)) { - if (!gi->gl) - return; - if (PTR_ERR(gi->gl) != -EAGAIN) { - gi->gl = NULL; - return; + gl = rhashtable_walk_next(&gi->hti); + if (IS_ERR_OR_NULL(gl)) { + if (gl == ERR_PTR(-EAGAIN)) { + n = 1; + continue; } - n = 0; - } else if (gi->sdp == gi->gl->gl_name.ln_sbd && - !__lockref_is_dead(&gi->gl->gl_lockref)) { - if (!n--) - break; + gl = NULL; + break; + } + if (gl->gl_name.ln_sbd != gi->sdp) + continue; + if (n <= 1) { + if (!lockref_get_not_dead(&gl->gl_lockref)) + continue; + break; + } else { + if (__lockref_is_dead(&gl->gl_lockref)) + continue; + n--; } - gi->gl = rhashtable_walk_next(&gi->hti); } + gi->gl = gl; } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) @@ -1988,7 +1997,6 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock_iter *gi = seq->private; - gi->gl = NULL; rhashtable_walk_stop(&gi->hti); } @@ -2076,7 +2084,8 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; - gi->gl = NULL; + if (gi->gl) + gfs2_glock_put(gi->gl); rhashtable_walk_exit(&gi->hti); return seq_release_private(inode, file); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e6a0a8a89ea7..3ba3f167641c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -825,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) goto fail_rindex; } /* - * i_mutex on quota files is special. Since this inode is hidden system + * i_rwsem on quota files is special. Since this inode is hidden system * file, we are safe to define locking ourselves. */ lockdep_set_class(&sdp->sd_quota_inode->i_rwsem, diff --git a/fs/inode.c b/fs/inode.c index b153aeaa61ea..13ceb98c3bd3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT); - spin_lock_init(&mapping->tree_lock); + INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); @@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash); void clear_inode(struct inode *inode) { /* - * We have to cycle tree_lock here because reclaim can be still in the + * We have to cycle the i_pages lock here because reclaim can be in the * process of removing the last page (in __delete_from_page_cache()) - * and we must not free mapping under it. + * and we must not free the mapping under it. */ - spin_lock_irq(&inode->i_data.tree_lock); + xa_lock_irq(&inode->i_data.i_pages); BUG_ON(inode->i_data.nrpages); BUG_ON(inode->i_data.nrexceptional); - spin_unlock_irq(&inode->i_data.tree_lock); + xa_unlock_irq(&inode->i_data.i_pages); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); diff --git a/fs/libfs.c b/fs/libfs.c index 7ff3cb904acd..0fb590d79f30 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) } EXPORT_SYMBOL(noop_fsync); +int noop_set_page_dirty(struct page *page) +{ + /* + * Unlike __set_page_dirty_no_writeback that handles dirty page + * tracking in the page object, dax does all dirty tracking in + * the inode address_space in response to mkwrite faults. In the + * dax case we only need to worry about potentially dirty CPU + * caches, not dirty page cache pages to write back. + * + * This callback is defined to prevent fallback to + * __set_page_dirty_buffers() in set_page_dirty(). + */ + return 0; +} +EXPORT_SYMBOL_GPL(noop_set_page_dirty); + +void noop_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + /* + * There is no page cache to invalidate in the dax case, however + * we need this callback defined to prevent falling back to + * block_invalidatepage() in do_invalidatepage(). + */ +} +EXPORT_SYMBOL_GPL(noop_invalidatepage); + +ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + /* + * iomap based filesystems support direct I/O without need for + * this callback. However, it still needs to be set in + * inode->a_ops so that open/fcntl know that direct I/O is + * generally supported. + */ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(noop_direct_IO); + /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ void kfree_link(void *p) { diff --git a/fs/namei.c b/fs/namei.c index a66ed5a1622a..186bd2464fd5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -224,9 +224,10 @@ getname_kernel(const char * filename) if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { + const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); @@ -1597,22 +1598,21 @@ static int lookup_fast(struct nameidata *nd, } /* Fast lookup failed, do it the slow way */ -static struct dentry *lookup_slow(const struct qstr *name, - struct dentry *dir, - unsigned int flags) +static struct dentry *__lookup_slow(const struct qstr *name, + struct dentry *dir, + unsigned int flags) { - struct dentry *dentry = ERR_PTR(-ENOENT), *old; + struct dentry *dentry, *old; struct inode *inode = dir->d_inode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - inode_lock_shared(inode); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) - goto out; + return ERR_PTR(-ENOENT); again: dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) - goto out; + return dentry; if (unlikely(!d_in_lookup(dentry))) { if (!(flags & LOOKUP_NO_REVAL)) { int error = d_revalidate(dentry, flags); @@ -1634,11 +1634,21 @@ again: dentry = old; } } -out: - inode_unlock_shared(inode); return dentry; } +static struct dentry *lookup_slow(const struct qstr *name, + struct dentry *dir, + unsigned int flags) +{ + struct inode *inode = dir->d_inode; + struct dentry *res; + inode_lock_shared(inode); + res = __lookup_slow(name, dir, flags); + inode_unlock_shared(inode); + return res; +} + static inline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { @@ -2421,56 +2431,63 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -/** - * lookup_one_len - filesystem helper to lookup single pathname component - * @name: pathname component to lookup - * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to - * - * Note that this routine is purely a helper for filesystem usage and should - * not be called by generic code. - * - * The caller must hold base->i_mutex. - */ -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +static int lookup_one_len_common(const char *name, struct dentry *base, + int len, struct qstr *this) { - struct qstr this; - unsigned int c; - int err; - - WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - - this.name = name; - this.len = len; - this.hash = full_name_hash(base, name, len); + this->name = name; + this->len = len; + this->hash = full_name_hash(base, name, len); if (!len) - return ERR_PTR(-EACCES); + return -EACCES; if (unlikely(name[0] == '.')) { if (len < 2 || (len == 2 && name[1] == '.')) - return ERR_PTR(-EACCES); + return -EACCES; } while (len--) { - c = *(const unsigned char *)name++; + unsigned int c = *(const unsigned char *)name++; if (c == '/' || c == '\0') - return ERR_PTR(-EACCES); + return -EACCES; } /* * See if the low-level filesystem might want * to use its own hash.. */ if (base->d_flags & DCACHE_OP_HASH) { - int err = base->d_op->d_hash(base, &this); + int err = base->d_op->d_hash(base, this); if (err < 0) - return ERR_PTR(err); + return err; } - err = inode_permission(base->d_inode, MAY_EXEC); + return inode_permission(base->d_inode, MAY_EXEC); +} + +/** + * lookup_one_len - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * The caller must hold base->i_mutex. + */ +struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +{ + struct dentry *dentry; + struct qstr this; + int err; + + WARN_ON_ONCE(!inode_is_locked(base->d_inode)); + + err = lookup_one_len_common(name, base, len, &this); if (err) return ERR_PTR(err); - return __lookup_hash(&this, base, 0); + dentry = lookup_dcache(&this, base, 0); + return dentry ? dentry : __lookup_slow(&this, base, 0); } EXPORT_SYMBOL(lookup_one_len); @@ -2490,37 +2507,10 @@ struct dentry *lookup_one_len_unlocked(const char *name, struct dentry *base, int len) { struct qstr this; - unsigned int c; int err; struct dentry *ret; - this.name = name; - this.len = len; - this.hash = full_name_hash(base, name, len); - if (!len) - return ERR_PTR(-EACCES); - - if (unlikely(name[0] == '.')) { - if (len < 2 || (len == 2 && name[1] == '.')) - return ERR_PTR(-EACCES); - } - - while (len--) { - c = *(const unsigned char *)name++; - if (c == '/' || c == '\0') - return ERR_PTR(-EACCES); - } - /* - * See if the low-level filesystem might want - * to use its own hash.. - */ - if (base->d_flags & DCACHE_OP_HASH) { - int err = base->d_op->d_hash(base, &this); - if (err < 0) - return ERR_PTR(err); - } - - err = inode_permission(base->d_inode, MAY_EXEC); + err = lookup_one_len_common(name, base, len, &this); if (err) return ERR_PTR(err); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 123c069429a7..a813979b5be0 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -535,35 +535,10 @@ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char return 0; } -#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) -#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) -static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, __be32 **savep) +static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, size_t sz) { - __be32 bm[2]; - __be32 *p; - - bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); - bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); - if (bm[1] != 0) { - p = xdr_reserve_space(xdr, 16); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *p++ = htonl(2); - *p++ = bm[0]; - *p++ = bm[1]; - } else if (bm[0] != 0) { - p = xdr_reserve_space(xdr, 12); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *p++ = htonl(1); - *p++ = bm[0]; - } else { - p = xdr_reserve_space(xdr, 8); - if (unlikely(p == NULL)) - return htonl(NFS4ERR_RESOURCE); - *p++ = htonl(0); - } - *savep = p; + if (xdr_stream_encode_uint32_array(xdr, bitmap, sz) < 0) + return cpu_to_be32(NFS4ERR_RESOURCE); return 0; } @@ -656,9 +631,13 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (unlikely(status != 0)) goto out; - status = encode_attr_bitmap(xdr, res->bitmap, &savep); + status = encode_attr_bitmap(xdr, res->bitmap, ARRAY_SIZE(res->bitmap)); if (unlikely(status != 0)) goto out; + status = cpu_to_be32(NFS4ERR_RESOURCE); + savep = xdr_reserve_space(xdr, sizeof(*savep)); + if (unlikely(!savep)) + goto out; status = encode_attr_change(xdr, res->bitmap, res->change_attr); if (unlikely(status != 0)) goto out; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d8b47624fee2..1819d0d0ba4b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -19,6 +19,7 @@ #include <linux/nfs_xdr.h> #include "nfs4_fs.h" +#include "nfs4session.h" #include "delegation.h" #include "internal.h" #include "nfs4trace.h" @@ -171,11 +172,15 @@ again: * nfs_inode_reclaim_delegation - process a delegation reclaim request * @inode: inode to process * @cred: credential to use for request - * @res: new delegation state from server + * @type: delegation type + * @stateid: delegation stateid + * @pagemod_limit: write delegation "space_limit" * */ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, - struct nfs_openres *res) + fmode_t type, + const nfs4_stateid *stateid, + unsigned long pagemod_limit) { struct nfs_delegation *delegation; struct rpc_cred *oldcred = NULL; @@ -185,9 +190,9 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, if (delegation != NULL) { spin_lock(&delegation->lock); if (delegation->inode != NULL) { - nfs4_stateid_copy(&delegation->stateid, &res->delegation); - delegation->type = res->delegation_type; - delegation->pagemod_limit = res->pagemod_limit; + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; oldcred = delegation->cred; delegation->cred = get_rpccred(cred); clear_bit(NFS_DELEGATION_NEED_RECLAIM, @@ -195,14 +200,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, spin_unlock(&delegation->lock); rcu_read_unlock(); put_rpccred(oldcred); - trace_nfs4_reclaim_delegation(inode, res->delegation_type); + trace_nfs4_reclaim_delegation(inode, type); return; } /* We appear to have raced with a delegation return. */ spin_unlock(&delegation->lock); } rcu_read_unlock(); - nfs_inode_set_delegation(inode, cred, res); + nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit); } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) @@ -329,11 +334,16 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, * nfs_inode_set_delegation - set up a delegation on an inode * @inode: inode to which delegation applies * @cred: cred to use for subsequent delegation processing - * @res: new delegation state from server + * @type: delegation type + * @stateid: delegation stateid + * @pagemod_limit: write delegation "space_limit" * * Returns zero on success, or a negative errno value. */ -int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, + fmode_t type, + const nfs4_stateid *stateid, + unsigned long pagemod_limit) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -345,9 +355,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = kmalloc(sizeof(*delegation), GFP_NOFS); if (delegation == NULL) return -ENOMEM; - nfs4_stateid_copy(&delegation->stateid, &res->delegation); - delegation->type = res->delegation_type; - delegation->pagemod_limit = res->pagemod_limit; + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; delegation->change_attr = inode_peek_iversion_raw(inode); delegation->cred = get_rpccred(cred); delegation->inode = inode; @@ -392,7 +402,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; - trace_nfs4_set_delegation(inode, res->delegation_type); + trace_nfs4_set_delegation(inode, type); out: spin_unlock(&clp->cl_lock); @@ -547,6 +557,22 @@ int nfs4_inode_return_delegation(struct inode *inode) return err; } +/** + * nfs4_inode_make_writeable + * @inode: pointer to inode + * + * Make the inode writeable by returning the delegation if necessary + * + * Returns zero on success, or a negative errno value. + */ +int nfs4_inode_make_writeable(struct inode *inode) +{ + if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) || + !nfs4_check_delegation(inode, FMODE_WRITE)) + return nfs4_inode_return_delegation(inode); + return 0; +} + static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 185a09f37a89..bb1ef8c37af4 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -36,8 +36,10 @@ enum { NFS_DELEGATION_TEST_EXPIRED, }; -int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_return_delegation_noreclaim(struct inode *inode); @@ -70,6 +72,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags); bool nfs4_delegation_flush_on_close(const struct inode *inode); void nfs_inode_find_delegation_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); +int nfs4_inode_make_writeable(struct inode *inode); #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2f3f86726f5b..73f8b43d988c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1272,7 +1272,9 @@ static void nfs_drop_nlink(struct inode *inode) /* drop the inode if we're reasonably sure this is the last link */ if (inode->i_nlink == 1) clear_nlink(inode); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_OTHER; spin_unlock(&inode->i_lock); } @@ -1798,12 +1800,11 @@ static int nfs_safe_remove(struct dentry *dentry) trace_nfs_remove_enter(dir, dentry); if (inode != NULL) { - NFS_PROTO(inode)->return_delegation(inode); - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + error = NFS_PROTO(dir)->remove(dir, dentry); if (error == 0) nfs_drop_nlink(inode); } else - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + error = NFS_PROTO(dir)->remove(dir, dentry); if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); trace_nfs_remove_exit(dir, dentry, error); @@ -1932,8 +1933,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry, dentry); trace_nfs_link_enter(inode, dir, dentry); - NFS_PROTO(inode)->return_delegation(inode); - d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { @@ -2023,10 +2022,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - NFS_PROTO(old_inode)->return_delegation(old_inode); - if (new_inode != NULL) - NFS_PROTO(new_inode)->return_delegation(new_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d17a90c4fa37..bd15d0b57626 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -195,7 +195,10 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); + bool have_delegation = nfs_have_delegated_attributes(inode); + if (have_delegation) + flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE); if (inode->i_mapping->nrpages == 0) flags &= ~NFS_INO_INVALID_DATA; nfsi->cache_validity |= flags; @@ -447,7 +450,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_mode = fattr->mode; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -493,37 +496,35 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; else if (nfs_server_capable(inode, NFS_CAP_ATIME)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_PAGECACHE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_PAGECACHE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); else if (nfs_server_capable(inode, NFS_CAP_NLINK)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (nfs_server_capable(inode, NFS_CAP_OWNER)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -608,11 +609,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - /* - * Return any delegations if we're going to change ACLs - */ - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) error = nfs_refresh_inode(inode, fattr); @@ -645,6 +641,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) /* Optimisation */ if (offset == 0) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); @@ -657,6 +654,7 @@ out: * nfs_setattr_update_inode - Update inode metadata after a setattr call. * @inode: pointer to struct inode * @attr: pointer to struct iattr + * @fattr: pointer to struct nfs_fattr * * Note: we do this in the *proc.c in order to ensure that * it works for things like exclusive creates too. @@ -669,6 +667,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; @@ -683,13 +683,12 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, | NFS_INO_INVALID_ACL); } if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } if (fattr->valid) nfs_update_inode(inode, fattr); - else - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1303,24 +1302,20 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) return nfs_file_has_writers(nfsi) && nfs_file_io_is_buffered(nfsi); } -static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - unsigned long ret = 0; - if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) && inode_eq_iversion_raw(inode, fattr->pre_change_attr)) { inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); - ret |= NFS_INO_INVALID_ATTR; } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) @@ -1329,17 +1324,13 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); - ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && !nfs_have_writebacks(inode)) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); - ret |= NFS_INO_INVALID_ATTR; } - - return ret; } /** @@ -1369,33 +1360,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_CHANGE + | NFS_INO_REVAL_PAGECACHE; if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_MTIME; if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime)) - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_SIZE + | NFS_INO_REVAL_PAGECACHE; } } /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; /* Has the link count changed? */ if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; @@ -1597,10 +1596,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } EXPORT_SYMBOL_GPL(nfs_refresh_inode); -static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_post_op_update_inode_locked(struct inode *inode, + struct nfs_fattr *fattr, unsigned int invalid) { - unsigned long invalid = NFS_INO_INVALID_ATTR; - if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); @@ -1629,7 +1627,9 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_lock(&inode->i_lock); nfs_fattr_set_barrier(fattr); - status = nfs_post_op_update_inode_locked(inode, fattr); + status = nfs_post_op_update_inode_locked(inode, fattr, + NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME); spin_unlock(&inode->i_lock); return status; @@ -1681,7 +1681,10 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa fattr->valid |= NFS_ATTR_FATTR_PRESIZE; } out_noforce: - status = nfs_post_op_update_inode_locked(inode, fattr); + status = nfs_post_op_update_inode_locked(inode, fattr, + NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_MTIME); return status; } @@ -1789,7 +1792,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ - invalid |= nfs_wcc_update_inode(inode, fattr); + nfs_wcc_update_inode(inode, fattr); if (pnfs_layoutcommit_outstanding(inode)) { nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR; @@ -1803,17 +1806,25 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ if (!have_writers) { - invalid |= NFS_INO_INVALID_ATTR + invalid |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + /* Force revalidate of all attributes */ + save_cache_validity |= NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_SIZE + | NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); } inode_set_iversion_raw(inode, fattr->change_attr); } } else { - nfsi->cache_validity |= save_cache_validity; + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_CHANGE + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1821,7 +1832,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_MTIME | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1830,7 +1841,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_CTIME | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1845,7 +1856,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { i_size_write(inode, new_isize); if (!have_writers) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -1856,7 +1867,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } else { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_SIZE | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); cache_revalidated = false; @@ -1877,55 +1888,61 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) umode_t newmode = inode->i_mode & S_IFMT; newmode |= fattr->mode & S_IALLUGO; inode->i_mode = newmode; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; } } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS + (NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; inode->i_uid = fattr->uid; } } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS + (NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER; inode->i_gid = fattr->gid; } } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS + (NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { - invalid |= NFS_INO_INVALID_ATTR; + invalid |= NFS_INO_INVALID_OTHER; if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); } } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATTR + (NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); cache_revalidated = false; } @@ -1942,6 +1959,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + invalid &= ~NFS_INO_INVALID_ATTR; nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -1962,10 +1980,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attr_gencount = fattr->gencount; } - /* Don't declare attrcache up to date if there were no attrs! */ - if (cache_revalidated) - invalid &= ~NFS_INO_INVALID_ATTR; - /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7327930ad970..eadf1ab31d16 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -138,8 +138,11 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, msg.rpc_cred = nfs_file_cred(sattr->ia_file); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (status == 0) + if (status == 0) { + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); nfs_setattr_update_inode(inode, sattr, fattr); + } dprintk("NFS reply setattr: %d\n", status); return status; } @@ -383,11 +386,11 @@ out: } static int -nfs3_proc_remove(struct inode *dir, const struct qstr *name) +nfs3_proc_remove(struct inode *dir, struct dentry *dentry) { struct nfs_removeargs arg = { .fh = NFS_FH(dir), - .name = *name, + .name = dentry->d_name, }; struct nfs_removeres res; struct rpc_message msg = { @@ -397,7 +400,7 @@ nfs3_proc_remove(struct inode *dir, const struct qstr *name) }; int status = -ENOMEM; - dprintk("NFS call remove %s\n", name->name); + dprintk("NFS call remove %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) goto out; @@ -411,7 +414,7 @@ out: } static void -nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } @@ -433,7 +436,9 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) } static void -nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +nfs3_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; } @@ -908,12 +913,6 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t flags) return 0; } -static int nfs3_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} - static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -990,7 +989,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, - .return_delegation = nfs3_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 6cd33bd5da87..09ee36dd8426 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1997,6 +1997,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_entry old = *entry; __be32 *p; int error; + u64 new_cookie; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) @@ -2019,8 +2020,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (unlikely(error)) return error; - entry->prev_cookie = entry->cookie; - error = decode_cookie3(xdr, &entry->cookie); + error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) return error; @@ -2054,6 +2054,9 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, zero_nfs_fh3(entry->fh); } + entry->prev_cookie = entry->cookie; + entry->cookie = new_cookie; + return 0; out_overflow: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 47f3c273245e..b71757e85066 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1045,7 +1045,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, struct nfs_inode *nfsi = NFS_I(dir); spin_lock(&dir->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_CTIME + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_DATA; if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; @@ -1669,6 +1671,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo { struct nfs_delegation *delegation; + fmode &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL || (delegation->type & fmode) == fmode) { @@ -1751,12 +1754,16 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) } if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) nfs_inode_set_delegation(state->inode, - data->owner->so_cred, - &data->o_res); + data->owner->so_cred, + data->o_res.delegation_type, + &data->o_res.delegation, + data->o_res.pagemod_limit); else nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - &data->o_res); + data->owner->so_cred, + data->o_res.delegation_type, + &data->o_res.delegation, + data->o_res.pagemod_limit); } /* @@ -2743,27 +2750,40 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st * fields corresponding to attributes that were used to store the verifier. * Make sure we clobber those fields in the later setattr call */ -static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, +static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr, struct nfs4_label **label) { - const u32 *attrset = opendata->o_res.attrset; + const __u32 *bitmask = opendata->o_arg.server->exclcreat_bitmask; + __u32 attrset[3]; + unsigned ret; + unsigned i; - if ((attrset[1] & FATTR4_WORD1_TIME_ACCESS) && - !(sattr->ia_valid & ATTR_ATIME_SET)) - sattr->ia_valid |= ATTR_ATIME; + for (i = 0; i < ARRAY_SIZE(attrset); i++) { + attrset[i] = opendata->o_res.attrset[i]; + if (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE4_1) + attrset[i] &= ~bitmask[i]; + } + + ret = (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE) ? + sattr->ia_valid : 0; - if ((attrset[1] & FATTR4_WORD1_TIME_MODIFY) && - !(sattr->ia_valid & ATTR_MTIME_SET)) - sattr->ia_valid |= ATTR_MTIME; + if ((attrset[1] & (FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET))) { + if (sattr->ia_valid & ATTR_ATIME_SET) + ret |= ATTR_ATIME_SET; + else + ret |= ATTR_ATIME; + } - /* Except MODE, it seems harmless of setting twice. */ - if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && - (attrset[1] & FATTR4_WORD1_MODE || - attrset[2] & FATTR4_WORD2_MODE_UMASK)) - sattr->ia_valid &= ~ATTR_MODE; + if ((attrset[1] & (FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET))) { + if (sattr->ia_valid & ATTR_MTIME_SET) + ret |= ATTR_MTIME_SET; + else + ret |= ATTR_MTIME; + } - if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) + if (!(attrset[2] & FATTR4_WORD2_SECURITY_LABEL)) *label = NULL; + return ret; } static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, @@ -2892,12 +2912,15 @@ static int _nfs4_do_open(struct inode *dir, if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { - nfs4_exclusive_attrset(opendata, sattr, &label); + unsigned attrs = nfs4_exclusive_attrset(opendata, sattr, &label); /* * send create attributes which was not set by open * with an extra setattr. */ - if (sattr->ia_valid & NFS4_VALID_ATTRS) { + if (attrs || label) { + unsigned ia_old = sattr->ia_valid; + + sattr->ia_valid = attrs; nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, @@ -2907,6 +2930,7 @@ static int _nfs4_do_open(struct inode *dir, opendata->o_res.f_attr); nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } + sattr->ia_valid = ia_old; } } if (opened && opendata->file_created) @@ -3874,6 +3898,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, if (IS_ERR(label)) return PTR_ERR(label); + /* Return any delegations if we're going to change ACLs */ + if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs4_inode_make_writeable(inode); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); @@ -4048,7 +4076,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->cache_consistency_bitmask, .access = entry->mask, }; struct nfs4_accessres res = { @@ -4062,14 +4089,18 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - res.fattr = nfs_alloc_fattr(); - if (res.fattr == NULL) - return -ENOMEM; + if (!nfs_have_delegated_attributes(inode)) { + res.fattr = nfs_alloc_fattr(); + if (res.fattr == NULL) + return -ENOMEM; + args.bitmask = server->cache_consistency_bitmask; + } status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { nfs_access_set_mask(entry, res.access); - nfs_refresh_inode(inode, res.fattr); + if (res.fattr) + nfs_refresh_inode(inode, res.fattr); } nfs_free_fattr(res.fattr); return status; @@ -4199,10 +4230,32 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name) return status; } -static int nfs4_proc_remove(struct inode *dir, const struct qstr *name) +static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry) +{ + struct nfs4_exception exception = { }; + struct inode *inode = d_inode(dentry); + int err; + + if (inode) { + if (inode->i_nlink == 1) + nfs4_inode_return_delegation(inode); + else + nfs4_inode_make_writeable(inode); + } + do { + err = _nfs4_proc_remove(dir, &dentry->d_name); + trace_nfs4_remove(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, + &exception); + } while (exception.retry); + return err; +} + +static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) { struct nfs4_exception exception = { }; int err; + do { err = _nfs4_proc_remove(dir, name); trace_nfs4_remove(dir, name, err); @@ -4212,17 +4265,20 @@ static int nfs4_proc_remove(struct inode *dir, const struct qstr *name) return err; } -static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) { - struct nfs_server *server = NFS_SERVER(dir); struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; + struct inode *inode = d_inode(dentry); - res->server = server; + res->server = NFS_SB(dentry->d_sb); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); nfs_fattr_init(res->dir_attr); + + if (inode) + nfs4_inode_return_delegation(inode); } static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) @@ -4248,14 +4304,21 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) return 1; } -static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +static void nfs4_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { - struct nfs_server *server = NFS_SERVER(dir); struct nfs_renameargs *arg = msg->rpc_argp; struct nfs_renameres *res = msg->rpc_resp; + struct inode *old_inode = d_inode(old_dentry); + struct inode *new_inode = d_inode(new_dentry); + if (old_inode) + nfs4_inode_make_writeable(old_inode); + if (new_inode) + nfs4_inode_return_delegation(new_inode); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; - res->server = server; + res->server = NFS_SB(old_dentry->d_sb); nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); } @@ -4317,6 +4380,8 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct } arg.bitmask = nfs4_bitmask(server, res.label); + nfs4_inode_make_writeable(inode); + status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(dir, &res.cinfo, res.fattr->time_start); @@ -5310,7 +5375,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; - nfs4_inode_return_delegation(inode); + nfs4_inode_make_writeable(inode); ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); /* @@ -5325,7 +5390,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl * so mark the attribute cache invalid. */ spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME; spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); @@ -6621,22 +6687,24 @@ static int nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) { int ret; - struct cb_notify_lock_args *cbnl = key; struct nfs4_lock_waiter *waiter = wait->private; - struct nfs_lowner *lowner = &cbnl->cbnl_owner, - *wowner = waiter->owner; - /* Only wake if the callback was for the same owner */ - if (lowner->clientid != wowner->clientid || - lowner->id != wowner->id || - lowner->s_dev != wowner->s_dev) - return 0; + /* NULL key means to wake up everyone */ + if (key) { + struct cb_notify_lock_args *cbnl = key; + struct nfs_lowner *lowner = &cbnl->cbnl_owner, + *wowner = waiter->owner; - /* Make sure it's for the right inode */ - if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) - return 0; + /* Only wake if the callback was for the same owner. */ + if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev) + return 0; - waiter->notified = true; + /* Make sure it's for the right inode */ + if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) + return 0; + + waiter->notified = true; + } /* override "private" so we can use default_wake_function */ wait->private = waiter->task; @@ -6673,6 +6741,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) add_wait_queue(q, &wait); while(!signalled()) { + waiter.notified = false; status = nfs4_proc_setlk(state, cmd, request); if ((status != -EAGAIN) || IS_SETLK(cmd)) break; @@ -8414,6 +8483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf { switch(task->tk_status) { case 0: + wake_up_all(&clp->cl_lock_waitq); + /* Fallthrough */ case -NFS4ERR_COMPLETE_ALREADY: case -NFS4ERR_WRONG_CRED: /* What to do here? */ break; @@ -9593,7 +9664,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, .mkdir = nfs4_proc_mkdir, - .rmdir = nfs4_proc_remove, + .rmdir = nfs4_proc_rmdir, .readdir = nfs4_proc_readdir, .mknod = nfs4_proc_mknod, .statfs = nfs4_proc_statfs, @@ -9614,7 +9685,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, - .return_delegation = nfs4_inode_return_delegation, .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 91a4d4eeb235..c10a422efe6f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -428,7 +428,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; - int err; while (*p != NULL) { parent = *p; @@ -445,9 +444,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) return sp; } } - err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); - if (err) - return ERR_PTR(err); rb_link_node(&new->so_server_node, parent, p); rb_insert_color(&new->so_server_node, &server->state_owners); return new; @@ -460,7 +456,6 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) if (!RB_EMPTY_NODE(&sp->so_server_node)) rb_erase(&sp->so_server_node, &server->state_owners); - ida_remove(&server->openowner_id, sp->so_seqid.owner_id); } static void @@ -495,6 +490,12 @@ nfs4_alloc_state_owner(struct nfs_server *server, sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; + sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0, + gfp_flags); + if (sp->so_seqid.owner_id < 0) { + kfree(sp); + return NULL; + } sp->so_server = server; sp->so_cred = get_rpccred(cred); spin_lock_init(&sp->so_lock); @@ -526,6 +527,7 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { nfs4_destroy_seqid_counter(&sp->so_seqid); put_rpccred(sp->so_cred); + ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id); kfree(sp); } @@ -576,13 +578,9 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, new = nfs4_alloc_state_owner(server, cred, gfp_flags); if (new == NULL) goto out; - do { - if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) - break; - spin_lock(&clp->cl_lock); - sp = nfs4_insert_state_owner_locked(new); - spin_unlock(&clp->cl_lock); - } while (sp == ERR_PTR(-EAGAIN)); + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner_locked(new); + spin_unlock(&clp->cl_lock); if (sp != new) nfs4_free_state_owner(new); out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b993ad282de2..9b7392032321 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -98,6 +98,7 @@ static int nfs4_stat_to_errno(int); ((3+NFS4_FHSIZE) >> 2)) #define nfs4_fattr_bitmap_maxsz 4 #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define nfstime4_maxsz (3) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -112,7 +113,8 @@ static int nfs4_stat_to_errno(int); #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ - 3 + 3 + 3 + nfs4_owner_maxsz + \ + 3*nfstime4_maxsz + \ + nfs4_owner_maxsz + \ nfs4_group_maxsz + nfs4_label_maxsz + \ decode_mdsthreshold_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ @@ -123,7 +125,8 @@ static int nfs4_stat_to_errno(int); nfs4_owner_maxsz + \ nfs4_group_maxsz + \ nfs4_label_maxsz + \ - 4 + 4) + 1 + nfstime4_maxsz + \ + 1 + nfstime4_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_restorefh_maxsz (op_encode_hdr_maxsz) @@ -957,6 +960,35 @@ static void encode_uint64(struct xdr_stream *xdr, u64 n) WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0); } +static ssize_t xdr_encode_bitmap4(struct xdr_stream *xdr, + const __u32 *bitmap, size_t len) +{ + ssize_t ret; + + /* Trim empty words */ + while (len > 0 && bitmap[len-1] == 0) + len--; + ret = xdr_stream_encode_uint32_array(xdr, bitmap, len); + if (WARN_ON_ONCE(ret < 0)) + return ret; + return len; +} + +static size_t mask_bitmap4(const __u32 *bitmap, const __u32 *mask, + __u32 *res, size_t len) +{ + size_t i; + __u32 tmp; + + while (len > 0 && (bitmap[len-1] == 0 || mask[len-1] == 0)) + len--; + for (i = len; i-- > 0;) { + tmp = bitmap[i] & mask[i]; + res[i] = tmp; + } + return len; +} + static void encode_nfs4_seqid(struct xdr_stream *xdr, const struct nfs_seqid *seqid) { @@ -1011,6 +1043,14 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); } +static __be32 * +xdr_encode_nfstime4(__be32 *p, const struct timespec *t) +{ + p = xdr_encode_hyper(p, (__s64)t->tv_sec); + *p++ = cpu_to_be32(t->tv_nsec); + return p; +} + static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs4_label *label, const umode_t *umask, @@ -1022,9 +1062,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, int owner_namelen = 0; int owner_grouplen = 0; __be32 *p; - unsigned i; uint32_t len = 0; - uint32_t bmval_len; uint32_t bmval[3] = { 0 }; /* @@ -1072,7 +1110,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) { if (iap->ia_valid & ATTR_ATIME_SET) { bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; - len += 16; + len += 4 + (nfstime4_maxsz << 2); } else if (iap->ia_valid & ATTR_ATIME) { bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; len += 4; @@ -1081,7 +1119,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) { if (iap->ia_valid & ATTR_MTIME_SET) { bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; - len += 16; + len += 4 + (nfstime4_maxsz << 2); } else if (iap->ia_valid & ATTR_MTIME) { bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; len += 4; @@ -1093,19 +1131,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; } - if (bmval[2] != 0) - bmval_len = 3; - else if (bmval[1] != 0) - bmval_len = 2; - else - bmval_len = 1; - - p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len); - - *p++ = cpu_to_be32(bmval_len); - for (i = 0; i < bmval_len; i++) - *p++ = cpu_to_be32(bmval[i]); - *p++ = cpu_to_be32(len); + xdr_encode_bitmap4(xdr, bmval, ARRAY_SIZE(bmval)); + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); if (bmval[0] & FATTR4_WORD0_SIZE) p = xdr_encode_hyper(p, iap->ia_size); @@ -1118,16 +1145,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { if (iap->ia_valid & ATTR_ATIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); - *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); + p = xdr_encode_nfstime4(p, &iap->ia_atime); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { if (iap->ia_valid & ATTR_MTIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + p = xdr_encode_nfstime4(p, &iap->ia_mtime); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } @@ -1199,85 +1224,45 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * create->server, create->server->attr_bitmask); } -static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) -{ - __be32 *p; - - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(1); - *p = cpu_to_be32(bitmap); -} - -static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) -{ - __be32 *p; - - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(bm0); - *p = cpu_to_be32(bm1); -} - -static void -encode_getattr_three(struct xdr_stream *xdr, - uint32_t bm0, uint32_t bm1, uint32_t bm2, - struct compound_hdr *hdr) +static void encode_getattr(struct xdr_stream *xdr, + const __u32 *bitmap, const __u32 *mask, size_t len, + struct compound_hdr *hdr) { - __be32 *p; + __u32 masked_bitmap[nfs4_fattr_bitmap_maxsz]; encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); - if (bm2) { - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(bm0); - *p++ = cpu_to_be32(bm1); - *p = cpu_to_be32(bm2); - } else if (bm1) { - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(bm0); - *p = cpu_to_be32(bm1); - } else { - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(1); - *p = cpu_to_be32(bm0); + if (mask) { + if (WARN_ON_ONCE(len > ARRAY_SIZE(masked_bitmap))) + len = ARRAY_SIZE(masked_bitmap); + len = mask_bitmap4(bitmap, mask, masked_bitmap, len); + bitmap = masked_bitmap; } + xdr_encode_bitmap4(xdr, bitmap, len); } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], - bitmask[2] & nfs4_fattr_bitmap[2], - hdr); + encode_getattr(xdr, nfs4_fattr_bitmap, bitmask, + ARRAY_SIZE(nfs4_fattr_bitmap), hdr); } static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, const u32 *open_bitmap, struct compound_hdr *hdr) { - encode_getattr_three(xdr, - bitmask[0] & open_bitmap[0], - bitmask[1] & open_bitmap[1], - bitmask[2] & open_bitmap[2], - hdr); + encode_getattr(xdr, open_bitmap, bitmask, 3, hdr); } static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_three(xdr, - bitmask[0] & nfs4_fsinfo_bitmap[0], - bitmask[1] & nfs4_fsinfo_bitmap[1], - bitmask[2] & nfs4_fsinfo_bitmap[2], - hdr); + encode_getattr(xdr, nfs4_fsinfo_bitmap, bitmask, + ARRAY_SIZE(nfs4_fsinfo_bitmap), hdr); } static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0], - bitmask[1] & nfs4_fs_locations_bitmap[1], hdr); + encode_getattr(xdr, nfs4_fs_locations_bitmap, bitmask, + ARRAY_SIZE(nfs4_fs_locations_bitmap), hdr); } static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) @@ -2116,7 +2101,8 @@ static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_access(xdr, args->access, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2558,13 +2544,17 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + const __u32 nfs4_acl_bitmap[1] = { + [0] = FATTR4_WORD0_ACL, + }; uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); replen = hdr.replen + op_decode_hdr_maxsz; - encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); + encode_getattr(xdr, nfs4_acl_bitmap, NULL, + ARRAY_SIZE(nfs4_acl_bitmap), &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, 0, args->acl_len); @@ -2643,8 +2633,8 @@ static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], - &hdr); + encode_getattr(xdr, nfs4_pathconf_bitmap, args->bitmask, + ARRAY_SIZE(nfs4_pathconf_bitmap), &hdr); encode_nops(&hdr); } @@ -2662,8 +2652,8 @@ static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], - args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); + encode_getattr(xdr, nfs4_statfs_bitmap, args->bitmask, + ARRAY_SIZE(nfs4_statfs_bitmap), &hdr); encode_nops(&hdr); } @@ -2683,7 +2673,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); - encode_getattr_three(xdr, bitmask[0], bitmask[1], bitmask[2], &hdr); + encode_getattr(xdr, bitmask, NULL, 3, &hdr); encode_nops(&hdr); } @@ -3217,34 +3207,27 @@ static int decode_ace(struct xdr_stream *xdr, void *ace) return -EIO; } -static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +static ssize_t +decode_bitmap4(struct xdr_stream *xdr, uint32_t *bitmap, size_t sz) { - uint32_t bmlen; - __be32 *p; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - bmlen = be32_to_cpup(p); + ssize_t ret; - bitmap[0] = bitmap[1] = bitmap[2] = 0; - p = xdr_inline_decode(xdr, (bmlen << 2)); - if (unlikely(!p)) - goto out_overflow; - if (bmlen > 0) { - bitmap[0] = be32_to_cpup(p++); - if (bmlen > 1) { - bitmap[1] = be32_to_cpup(p++); - if (bmlen > 2) - bitmap[2] = be32_to_cpup(p); - } - } - return 0; -out_overflow: + ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz); + if (likely(ret >= 0)) + return ret; + if (ret == -EMSGSIZE) + return sz; print_overflow_msg(__func__, xdr); return -EIO; } +static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + ssize_t ret; + ret = decode_bitmap4(xdr, bitmap, 3); + return ret < 0 ? ret : 0; +} + static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep) { __be32 *p; @@ -3980,7 +3963,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, bitmap[1] &= ~FATTR4_WORD1_OWNER; if (owner_name != NULL) { - len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT); + len = decode_nfs4_string(xdr, owner_name, GFP_NOIO); if (len <= 0) goto out; dprintk("%s: name=%s\n", __func__, owner_name->data); @@ -4015,7 +3998,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; if (group_name != NULL) { - len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT); + len = decode_nfs4_string(xdr, group_name, GFP_NOIO); if (len <= 0) goto out; dprintk("%s: name=%s\n", __func__, group_name->data); @@ -4155,19 +4138,25 @@ out_overflow: return -EIO; } +static __be32 * +xdr_decode_nfstime4(__be32 *p, struct timespec *t) +{ + __u64 sec; + + p = xdr_decode_hyper(p, &sec); + t-> tv_sec = (time_t)sec; + t->tv_nsec = be32_to_cpup(p++); + return p; +} + static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) { __be32 *p; - uint64_t sec; - uint32_t nsec; - p = xdr_inline_decode(xdr, 12); + p = xdr_inline_decode(xdr, nfstime4_maxsz << 2); if (unlikely(!p)) goto out_overflow; - p = xdr_decode_hyper(p, &sec); - nsec = be32_to_cpup(p); - time->tv_sec = (time_t)sec; - time->tv_nsec = (long)nsec; + xdr_decode_nfstime4(p, time); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5470,21 +5459,13 @@ decode_savefh(struct xdr_stream *xdr) static int decode_setattr(struct xdr_stream *xdr) { - __be32 *p; - uint32_t bmlen; int status; status = decode_op_hdr(xdr, OP_SETATTR); if (status) return status; - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - bmlen = be32_to_cpup(p); - p = xdr_inline_decode(xdr, bmlen << 2); - if (likely(p)) + if (decode_bitmap4(xdr, NULL, 0) >= 0) return 0; -out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } @@ -6255,7 +6236,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_access(xdr, &res->supported, &res->access); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server); + if (res->fattr) + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -7535,6 +7517,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, unsigned int savep; uint32_t bitmap[3] = {0}; uint32_t len; + uint64_t new_cookie; __be32 *p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; @@ -7551,8 +7534,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) goto out_overflow; - entry->prev_cookie = entry->cookie; - p = xdr_decode_hyper(p, &entry->cookie); + p = xdr_decode_hyper(p, &new_cookie); entry->len = be32_to_cpup(p); p = xdr_inline_decode(xdr, entry->len); @@ -7586,6 +7568,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + entry->prev_cookie = entry->cookie; + entry->cookie = new_cookie; + return 0; out_overflow: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f7fd9192d4bc..4e93d6308733 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -300,11 +300,11 @@ out: } static int -nfs_proc_remove(struct inode *dir, const struct qstr *name) +nfs_proc_remove(struct inode *dir, struct dentry *dentry) { struct nfs_removeargs arg = { .fh = NFS_FH(dir), - .name = *name, + .name = dentry->d_name, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], @@ -312,7 +312,7 @@ nfs_proc_remove(struct inode *dir, const struct qstr *name) }; int status; - dprintk("NFS call remove %s\n", name->name); + dprintk("NFS call remove %pd2\n",dentry); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); @@ -321,7 +321,7 @@ nfs_proc_remove(struct inode *dir, const struct qstr *name) } static void -nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) { msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } @@ -338,7 +338,9 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) } static void -nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +nfs_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; } @@ -671,12 +673,6 @@ static int nfs_have_delegation(struct inode *inode, fmode_t flags) return 0; } -static int nfs_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} - static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -741,7 +737,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, - .return_delegation = nfs_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 630b4a3c1a93..bf54fc9ae135 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -105,7 +105,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data) data->args.fh = NFS_FH(dir); nfs_fattr_init(data->res.dir_attr); - NFS_PROTO(dir)->unlink_setup(&msg, dir); + NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); task_setup_data.rpc_client = NFS_CLIENT(dir); task = rpc_run_task(&task_setup_data); @@ -386,7 +386,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, nfs_sb_active(old_dir->i_sb); - NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir); + NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dentry, new_dentry); return rpc_run_task(&task_setup_data); } @@ -463,9 +463,6 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) fileid = NFS_FILEID(d_inode(dentry)); - /* Return delegation in anticipation of the rename */ - NFS_PROTO(d_inode(dentry))->return_delegation(d_inode(dentry)); - sdentry = NULL; do { int slen; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6579f3b367bd..0193053bc139 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -231,6 +231,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c if (i_size >= end) goto out; i_size_write(inode, end); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); out: spin_unlock(&inode->i_lock); @@ -1562,8 +1563,11 @@ static int nfs_writeback_done(struct rpc_task *task, } /* Deal with the suid/sgid bit corner case */ - if (nfs_should_remove_suid(inode)) - nfs_mark_for_revalidate(inode); + if (nfs_should_remove_suid(inode)) { + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER; + spin_unlock(&inode->i_lock); + } return 0; } diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c21e0b4454a6..dec98cab729d 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -193,9 +193,9 @@ retry: (unsigned long long)oldkey, (unsigned long long)newkey); - spin_lock_irq(&btnc->tree_lock); - err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); - spin_unlock_irq(&btnc->tree_lock); + xa_lock_irq(&btnc->i_pages); + err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page); + xa_unlock_irq(&btnc->i_pages); /* * Note: page->index will not change to newkey until * nilfs_btnode_commit_change_key() will be called. @@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, (unsigned long long)newkey); mark_buffer_dirty(obh); - spin_lock_irq(&btnc->tree_lock); - radix_tree_delete(&btnc->page_tree, oldkey); - radix_tree_tag_set(&btnc->page_tree, newkey, + xa_lock_irq(&btnc->i_pages); + radix_tree_delete(&btnc->i_pages, oldkey); + radix_tree_tag_set(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&btnc->tree_lock); + xa_unlock_irq(&btnc->i_pages); opage->index = obh->b_blocknr = newkey; unlock_page(opage); @@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, return; if (nbh == NULL) { /* blocksize == pagesize */ - spin_lock_irq(&btnc->tree_lock); - radix_tree_delete(&btnc->page_tree, newkey); - spin_unlock_irq(&btnc->tree_lock); + xa_lock_irq(&btnc->i_pages); + radix_tree_delete(&btnc->i_pages, newkey); + xa_unlock_irq(&btnc->i_pages); unlock_page(ctxt->bh->b_page); } else brelse(nbh); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 68241512d7c1..4cb850a6f1c2 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -331,15 +331,15 @@ repeat: struct page *page2; /* move the page to the destination cache */ - spin_lock_irq(&smap->tree_lock); - page2 = radix_tree_delete(&smap->page_tree, offset); + xa_lock_irq(&smap->i_pages); + page2 = radix_tree_delete(&smap->i_pages, offset); WARN_ON(page2 != page); smap->nrpages--; - spin_unlock_irq(&smap->tree_lock); + xa_unlock_irq(&smap->i_pages); - spin_lock_irq(&dmap->tree_lock); - err = radix_tree_insert(&dmap->page_tree, offset, page); + xa_lock_irq(&dmap->i_pages); + err = radix_tree_insert(&dmap->i_pages, offset, page); if (unlikely(err < 0)) { WARN_ON(err == -EEXIST); page->mapping = NULL; @@ -348,11 +348,11 @@ repeat: page->mapping = dmap; dmap->nrpages++; if (PageDirty(page)) - radix_tree_tag_set(&dmap->page_tree, + radix_tree_tag_set(&dmap->i_pages, offset, PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&dmap->tree_lock); + xa_unlock_irq(&dmap->i_pages); } unlock_page(page); } @@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page) struct address_space *mapping = page->mapping; if (mapping) { - spin_lock_irq(&mapping->tree_lock); + xa_lock_irq(&mapping->i_pages); if (test_bit(PG_dirty, &page->flags)) { - radix_tree_tag_clear(&mapping->page_tree, + radix_tree_tag_clear(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return clear_page_dirty_for_io(page); } - spin_unlock_irq(&mapping->tree_lock); + xa_unlock_irq(&mapping->i_pages); return 0; } return TestClearPageDirty(page); diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index b03057afac2a..66369ec90020 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -463,11 +463,10 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb, if (op->downcall.type != ORANGEFS_VFS_OP_READDIR) goto wakeup; - op->downcall.trailer_buf = vmalloc(op->downcall.trailer_size); + op->downcall.trailer_buf = vzalloc(op->downcall.trailer_size); if (!op->downcall.trailer_buf) goto Enomem; - memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size); if (!copy_from_iter_full(op->downcall.trailer_buf, op->downcall.trailer_size, iter)) { gossip_err("%s: failed to copy trailer.\n", __func__); @@ -779,9 +778,35 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, #endif /* CONFIG_COMPAT is in .config */ +static __poll_t orangefs_devreq_poll(struct file *file, + struct poll_table_struct *poll_table) +{ + __poll_t poll_revent_mask = 0; + + poll_wait(file, &orangefs_request_list_waitq, poll_table); + + if (!list_empty(&orangefs_request_list)) + poll_revent_mask |= EPOLLIN; + return poll_revent_mask; +} + /* the assigned character device major number */ static int orangefs_dev_major; +static const struct file_operations orangefs_devreq_file_operations = { + .owner = THIS_MODULE, + .read = orangefs_devreq_read, + .write_iter = orangefs_devreq_write_iter, + .open = orangefs_devreq_open, + .release = orangefs_devreq_release, + .unlocked_ioctl = orangefs_devreq_ioctl, + +#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ + .compat_ioctl = orangefs_devreq_compat_ioctl, +#endif + .poll = orangefs_devreq_poll +}; + /* * Initialize orangefs device specific state: * Must be called at module load time only @@ -814,29 +839,3 @@ void orangefs_dev_cleanup(void) "*** /dev/%s character device unregistered ***\n", ORANGEFS_REQDEVICE_NAME); } - -static __poll_t orangefs_devreq_poll(struct file *file, - struct poll_table_struct *poll_table) -{ - __poll_t poll_revent_mask = 0; - - poll_wait(file, &orangefs_request_list_waitq, poll_table); - - if (!list_empty(&orangefs_request_list)) - poll_revent_mask |= EPOLLIN; - return poll_revent_mask; -} - -const struct file_operations orangefs_devreq_file_operations = { - .owner = THIS_MODULE, - .read = orangefs_devreq_read, - .write_iter = orangefs_devreq_write_iter, - .open = orangefs_devreq_open, - .release = orangefs_devreq_release, - .unlocked_ioctl = orangefs_devreq_ioctl, - -#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */ - .compat_ioctl = orangefs_devreq_compat_ioctl, -#endif - .poll = orangefs_devreq_poll -}; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 0d228cd087e6..26358efbf794 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -42,70 +42,6 @@ static int flush_racache(struct inode *inode) } /* - * Copy to client-core's address space from the buffers specified - * by the iovec upto total_size bytes. - * NOTE: the iovector can either contain addresses which - * can futher be kernel-space or user-space addresses. - * or it can pointers to struct page's - */ -static int precopy_buffers(int buffer_index, - struct iov_iter *iter, - size_t total_size) -{ - int ret = 0; - /* - * copy data from application/kernel by pulling it out - * of the iovec. - */ - - - if (total_size) { - ret = orangefs_bufmap_copy_from_iovec(iter, - buffer_index, - total_size); - if (ret < 0) - gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", - __func__, - (long)ret); - } - - if (ret < 0) - gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", - __func__, - (long)ret); - return ret; -} - -/* - * Copy from client-core's address space to the buffers specified - * by the iovec upto total_size bytes. - * NOTE: the iovector can either contain addresses which - * can futher be kernel-space or user-space addresses. - * or it can pointers to struct page's - */ -static int postcopy_buffers(int buffer_index, - struct iov_iter *iter, - size_t total_size) -{ - int ret = 0; - /* - * copy data to application/kernel by pushing it out to - * the iovec. NOTE; target buffers can be addresses or - * struct page pointers. - */ - if (total_size) { - ret = orangefs_bufmap_copy_to_iovec(iter, - buffer_index, - total_size); - if (ret < 0) - gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", - __func__, - (long)ret); - } - return ret; -} - -/* * Post and wait for the I/O upcall to finish */ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, @@ -157,14 +93,15 @@ populate_shared_memory: total_size); /* * Stage 1: copy the buffers into client-core's address space - * precopy_buffers only pertains to writes. */ - if (type == ORANGEFS_IO_WRITE) { - ret = precopy_buffers(buffer_index, - iter, - total_size); - if (ret < 0) + if (type == ORANGEFS_IO_WRITE && total_size) { + ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index, + total_size); + if (ret < 0) { + gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", + __func__, (long)ret); goto out; + } } gossip_debug(GOSSIP_FILE_DEBUG, @@ -260,14 +197,20 @@ populate_shared_memory: /* * Stage 3: Post copy buffers from client-core's address space - * postcopy_buffers only pertains to reads. */ - if (type == ORANGEFS_IO_READ) { - ret = postcopy_buffers(buffer_index, - iter, - new_op->downcall.resp.io.amt_complete); - if (ret < 0) + if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) { + /* + * NOTE: the iovector can either contain addresses which + * can futher be kernel-space or user-space addresses. + * or it can pointers to struct page's + */ + ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, + new_op->downcall.resp.io.amt_complete); + if (ret < 0) { + gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", + __func__, (long)ret); goto out; + } } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Amount %s, returned by the sys-io call:%d\n", @@ -585,6 +528,28 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar return ret; } +static int orangefs_fault(struct vm_fault *vmf) +{ + struct file *file = vmf->vma->vm_file; + int rc; + rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, + STATX_SIZE); + if (rc == -ESTALE) + rc = -EIO; + if (rc) { + gossip_err("%s: orangefs_inode_getattr failed, " + "rc:%d:.\n", __func__, rc); + return rc; + } + return filemap_fault(vmf); +} + +const struct vm_operations_struct orangefs_file_vm_ops = { + .fault = orangefs_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = filemap_page_mkwrite, +}; + /* * Memory map a region of a file. */ @@ -596,12 +561,16 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) (char *)file->f_path.dentry->d_name.name : (char *)"Unknown")); + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + /* set the sequential readahead hint */ vma->vm_flags |= VM_SEQ_READ; vma->vm_flags &= ~VM_RAND_READ; - /* Use readonly mmap since we cannot support writable maps. */ - return generic_file_readonly_mmap(file, vma); + file_accessed(file); + vma->vm_ops = &orangefs_file_vm_ops; + return 0; } #define mapping_nrpages(idata) ((idata)->nrpages) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index fe1d705ad91f..79c61da8b1bc 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -138,7 +138,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb, } /** ORANGEFS2 implementation of address space operations */ -const struct address_space_operations orangefs_address_operations = { +static const struct address_space_operations orangefs_address_operations = { .readpage = orangefs_readpage, .readpages = orangefs_readpages, .invalidatepage = orangefs_invalidatepage, @@ -307,7 +307,7 @@ int orangefs_update_time(struct inode *inode, struct timespec *time, int flags) } /* ORANGEDS2 implementation of VFS inode operations for files */ -const struct inode_operations orangefs_file_inode_operations = { +static const struct inode_operations orangefs_file_inode_operations = { .get_acl = orangefs_get_acl, .set_acl = orangefs_set_acl, .setattr = orangefs_setattr, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 59f444dced9b..4f927023d095 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -71,9 +71,9 @@ static void put(struct slot_map *m, int slot) spin_lock(&m->q.lock); __clear_bit(slot, m->map); v = ++m->c; - if (unlikely(v == 1)) /* no free slots -> one free slot */ + if (v > 0) wake_up_locked(&m->q); - else if (unlikely(v == -1)) /* finished dying */ + if (unlikely(v == -1)) /* finished dying */ wake_up_all_locked(&m->q); spin_unlock(&m->q.lock); } diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h index c7db56a31b92..6e079d4230d0 100644 --- a/fs/orangefs/orangefs-debug.h +++ b/fs/orangefs/orangefs-debug.h @@ -43,12 +43,6 @@ #define GOSSIP_MAX_NR 16 #define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1) -/*function prototypes*/ -__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging); -__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging); -char *ORANGEFS_debug_mask_to_eventlog(__u64 mask); -char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask); - /* a private internal type */ struct __keyword_mask_s { const char *keyword; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index eebbaece85ef..c29bb0ebc6bb 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -65,11 +65,7 @@ #define ORANGEFS_REQDEVICE_NAME "pvfs2-req" #define ORANGEFS_DEVREQ_MAGIC 0x20030529 -#define ORANGEFS_LINK_MAX 0x000000FF #define ORANGEFS_PURGE_RETRY_COUNT 0x00000005 -#define ORANGEFS_MAX_NUM_OPTIONS 0x00000004 -#define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080 -#define ORANGEFS_MAX_FSKEY_LEN 64 #define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) + \ sizeof(__u64) + sizeof(struct orangefs_upcall_s)) @@ -113,15 +109,6 @@ extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type); extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type); /* - * Redefine xtvec structure so that we could move helper functions out of - * the define - */ -struct xtvec { - __kernel_off_t xtv_off; /* must be off_t */ - __kernel_size_t xtv_len; /* must be size_t */ -}; - -/* * orangefs data structures */ struct orangefs_kernel_op_s { @@ -224,39 +211,6 @@ struct orangefs_sb_info_s { struct list_head list; }; -/* - * structure that holds the state of any async I/O operation issued - * through the VFS. Needed especially to handle cancellation requests - * or even completion notification so that the VFS client-side daemon - * can free up its vfs_request slots. - */ -struct orangefs_kiocb_s { - /* the pointer to the task that initiated the AIO */ - struct task_struct *tsk; - - /* pointer to the kiocb that kicked this operation */ - struct kiocb *kiocb; - - /* buffer index that was used for the I/O */ - struct orangefs_bufmap *bufmap; - int buffer_index; - - /* orangefs kernel operation type */ - struct orangefs_kernel_op_s *op; - - /* set to indicate the type of the operation */ - int rw; - - /* file offset */ - loff_t offset; - - /* and the count in bytes */ - size_t bytes_to_be_copied; - - ssize_t bytes_copied; - int needs_cleanup; -}; - struct orangefs_stats { unsigned long cache_hits; unsigned long cache_misses; @@ -305,21 +259,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode) return &(ORANGEFS_I(inode)->refn.khandle); } -static inline ino_t get_ino_from_khandle(struct inode *inode) -{ - struct orangefs_khandle *khandle; - ino_t ino; - - khandle = get_khandle_from_ino(inode); - ino = orangefs_khandle_to_ino(khandle); - return ino; -} - -static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry) -{ - return get_ino_from_khandle(dentry->d_parent->d_inode); -} - static inline int is_root_handle(struct inode *inode) { gossip_debug(GOSSIP_DCACHE_DEBUG, @@ -391,7 +330,6 @@ void fsid_key_table_finalize(void); /* * defined in inode.c */ -__u32 convert_to_orangefs_mask(unsigned long lite_mask); struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, int mode, @@ -410,17 +348,6 @@ int orangefs_update_time(struct inode *, struct timespec *, int); /* * defined in xattr.c */ -int orangefs_setxattr(struct dentry *dentry, - const char *name, - const void *value, - size_t size, - int flags); - -ssize_t orangefs_getxattr(struct dentry *dentry, - const char *name, - void *buffer, - size_t size); - ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* @@ -467,8 +394,6 @@ int orangefs_inode_check_changed(struct inode *inode); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); -int orangefs_unmount_sb(struct super_block *sb); - bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); int orangefs_normalize_to_errno(__s32 error_code); @@ -487,16 +412,11 @@ extern struct list_head *orangefs_htable_ops_in_progress; extern spinlock_t orangefs_htable_ops_in_progress_lock; extern int hash_table_size; -extern const struct address_space_operations orangefs_address_operations; -extern const struct inode_operations orangefs_file_inode_operations; extern const struct file_operations orangefs_file_operations; extern const struct inode_operations orangefs_symlink_inode_operations; extern const struct inode_operations orangefs_dir_inode_operations; extern const struct file_operations orangefs_dir_operations; extern const struct dentry_operations orangefs_dentry_operations; -extern const struct file_operations orangefs_devreq_file_operations; - -extern wait_queue_head_t orangefs_bufmap_init_waitq; /* * misc convenience macros diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index dc6e3e6269c3..61ee8d64c842 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -5,11 +5,6 @@ #include <linux/slab.h> #include <linux/ioctl.h> -/* pvfs2-config.h ***********************************************************/ -#define ORANGEFS_VERSION_MAJOR 2 -#define ORANGEFS_VERSION_MINOR 9 -#define ORANGEFS_VERSION_SUB 0 - /* khandle stuff ***********************************************************/ /* @@ -70,16 +65,6 @@ static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh, } /* pvfs2-types.h ************************************************************/ -typedef __u32 ORANGEFS_uid; -typedef __u32 ORANGEFS_gid; -typedef __s32 ORANGEFS_fs_id; -typedef __u32 ORANGEFS_permissions; -typedef __u64 ORANGEFS_time; -typedef __s64 ORANGEFS_size; -typedef __u64 ORANGEFS_flags; -typedef __u64 ORANGEFS_ds_position; -typedef __s32 ORANGEFS_error; -typedef __s64 ORANGEFS_offset; #define ORANGEFS_SUPER_MAGIC 0x20030528 @@ -145,7 +130,6 @@ typedef __s64 ORANGEFS_offset; #define ORANGEFS_APPEND_FL FS_APPEND_FL #define ORANGEFS_NOATIME_FL FS_NOATIME_FL #define ORANGEFS_MIRROR_FL 0x01000000ULL -#define ORANGEFS_O_EXECUTE (1 << 0) #define ORANGEFS_FS_ID_NULL ((__s32)0) #define ORANGEFS_ATTR_SYS_UID (1 << 0) #define ORANGEFS_ATTR_SYS_GID (1 << 1) @@ -229,35 +213,6 @@ enum orangefs_ds_type { ORANGEFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ }; -/* - * ORANGEFS_certificate simply stores a buffer with the buffer size. - * The buffer can be converted to an OpenSSL X509 struct for use. - */ -struct ORANGEFS_certificate { - __u32 buf_size; - unsigned char *buf; -}; - -/* - * A credential identifies a user and is signed by the client/user - * private key. - */ -struct ORANGEFS_credential { - __u32 userid; /* user id */ - __u32 num_groups; /* length of group_array */ - __u32 *group_array; /* groups for which the user is a member */ - char *issuer; /* alias of the issuing server */ - __u64 timeout; /* seconds after epoch to time out */ - __u32 sig_size; /* length of the signature in bytes */ - unsigned char *signature; /* digital signature */ - struct ORANGEFS_certificate certificate; /* user certificate buffer */ -}; -#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS * \ - sizeof(__u32) + \ - ORANGEFS_REQ_LIMIT_ISSUER + \ - ORANGEFS_REQ_LIMIT_SIGNATURE + \ - extra_size_ORANGEFS_certificate) - /* This structure is used by the VFS-client interaction alone */ struct ORANGEFS_keyval_pair { char key[ORANGEFS_MAX_XATTR_NAMELEN]; diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index ce6ff5a0a6e4..17032631c5cf 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -86,3 +86,20 @@ config OVERLAY_FS_NFS_EXPORT case basis with the "nfs_export=on" mount option. Say N unless you fully understand the consequences. + +config OVERLAY_FS_XINO_AUTO + bool "Overlayfs: auto enable inode number mapping" + default n + depends on OVERLAY_FS + help + If this config option is enabled then overlay filesystems will use + unused high bits in undelying filesystem inode numbers to map all + inodes to a unified address space. The mapped 64bit inode numbers + might not be compatible with applications that expect 32bit inodes. + + If compatibility with applications that expect 32bit inodes is not an + issue, then it is safe and recommended to say Y here. + + For more information, see Documentation/filesystems/overlayfs.txt + + If unsure, say N. diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index d855f508fa20..8bede0742619 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -232,7 +232,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper) +struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper) { struct ovl_fh *fh; int fh_type, fh_len, dwords; @@ -300,7 +300,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower, * up and a pure upper inode. */ if (ovl_can_decode_fh(lower->d_sb)) { - fh = ovl_encode_fh(lower, false); + fh = ovl_encode_real_fh(lower, false); if (IS_ERR(fh)) return PTR_ERR(fh); } @@ -321,7 +321,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index) const struct ovl_fh *fh; int err; - fh = ovl_encode_fh(upper, true); + fh = ovl_encode_real_fh(upper, true); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 87bd4148f4fb..425a94672300 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -228,8 +228,8 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) goto fail; /* Encode an upper or lower file handle */ - fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) : - ovl_dentry_upper(dentry), !enc_lower); + fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) : + ovl_dentry_upper(dentry), !enc_lower); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; @@ -267,8 +267,8 @@ static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len) return OVL_FILEID; } -static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len, - struct inode *parent) +static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, + struct inode *parent) { struct dentry *dentry; int type; @@ -305,15 +305,12 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, if (d_is_dir(upper ?: lower)) return ERR_PTR(-EIO); - inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower); + inode = ovl_get_inode(sb, dget(upper), lowerpath, index, !!lower); if (IS_ERR(inode)) { dput(upper); return ERR_CAST(inode); } - if (index) - ovl_set_flag(OVL_INDEX, inode); - dentry = d_find_any_alias(inode); if (!dentry) { dentry = d_alloc_anon(inode->i_sb); @@ -685,7 +682,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, if (!ofs->upper_mnt) return ERR_PTR(-EACCES); - upper = ovl_decode_fh(fh, ofs->upper_mnt); + upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true); if (IS_ERR_OR_NULL(upper)) return upper; @@ -703,25 +700,39 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, struct ovl_path *stack = &origin; struct dentry *dentry = NULL; struct dentry *index = NULL; - struct inode *inode = NULL; - bool is_deleted = false; + struct inode *inode; int err; - /* First lookup indexed upper by fh */ + /* First lookup overlay inode in inode cache by origin fh */ + err = ovl_check_origin_fh(ofs, fh, false, NULL, &stack); + if (err) + return ERR_PTR(err); + + if (!d_is_dir(origin.dentry) || + !(origin.dentry->d_flags & DCACHE_DISCONNECTED)) { + inode = ovl_lookup_inode(sb, origin.dentry, false); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_err; + if (inode) { + dentry = d_find_any_alias(inode); + iput(inode); + if (dentry) + goto out; + } + } + + /* Then lookup indexed upper/whiteout by origin fh */ if (ofs->indexdir) { index = ovl_get_index_fh(ofs, fh); err = PTR_ERR(index); if (IS_ERR(index)) { - if (err != -ESTALE) - return ERR_PTR(err); - - /* Found a whiteout index - treat as deleted inode */ - is_deleted = true; index = NULL; + goto out_err; } } - /* Then try to get upper dir by index */ + /* Then try to get a connected upper dir by index */ if (index && d_is_dir(index)) { struct dentry *upper = ovl_index_upper(ofs, index); @@ -734,24 +745,19 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, goto out; } - /* Then lookup origin by fh */ - err = ovl_check_origin_fh(ofs, fh, NULL, &stack); - if (err) { - goto out_err; - } else if (index) { - err = ovl_verify_origin(index, origin.dentry, false); + /* Otherwise, get a connected non-upper dir or disconnected non-dir */ + if (d_is_dir(origin.dentry) && + (origin.dentry->d_flags & DCACHE_DISCONNECTED)) { + dput(origin.dentry); + origin.dentry = NULL; + err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack); if (err) goto out_err; - } else if (is_deleted) { - /* Lookup deleted non-dir by origin inode */ - if (!d_is_dir(origin.dentry)) - inode = ovl_lookup_inode(sb, origin.dentry, false); - err = -ESTALE; - if (!inode || atomic_read(&inode->i_count) == 1) + } + if (index) { + err = ovl_verify_origin(index, origin.dentry, false); + if (err) goto out_err; - - /* Deleted but still open? */ - index = dget(ovl_i_dentry_upper(inode)); } dentry = ovl_get_dentry(sb, NULL, &origin, index); @@ -759,7 +765,6 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, out: dput(origin.dentry); dput(index); - iput(inode); return dentry; out_err: @@ -829,7 +834,7 @@ static struct dentry *ovl_get_parent(struct dentry *dentry) } const struct export_operations ovl_export_operations = { - .encode_fh = ovl_encode_inode_fh, + .encode_fh = ovl_encode_fh, .fh_to_dentry = ovl_fh_to_dentry, .fh_to_parent = ovl_fh_to_parent, .get_name = ovl_get_name, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 3b1bd469accd..6e3815fb006b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -16,13 +16,6 @@ #include "overlayfs.h" -static dev_t ovl_get_pseudo_dev(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->lowerstack[0].layer->pseudo_dev; -} - int ovl_setattr(struct dentry *dentry, struct iattr *attr) { int err; @@ -66,6 +59,69 @@ out: return err; } +static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, + struct ovl_layer *lower_layer) +{ + bool samefs = ovl_same_sb(dentry->d_sb); + unsigned int xinobits = ovl_xino_bits(dentry->d_sb); + + if (samefs) { + /* + * When all layers are on the same fs, all real inode + * number are unique, so we use the overlay st_dev, + * which is friendly to du -x. + */ + stat->dev = dentry->d_sb->s_dev; + return 0; + } else if (xinobits) { + unsigned int shift = 64 - xinobits; + /* + * All inode numbers of underlying fs should not be using the + * high xinobits, so we use high xinobits to partition the + * overlay st_ino address space. The high bits holds the fsid + * (upper fsid is 0). This way overlay inode numbers are unique + * and all inodes use overlay st_dev. Inode numbers are also + * persistent for a given layer configuration. + */ + if (stat->ino >> shift) { + pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", + dentry, stat->ino, xinobits); + } else { + if (lower_layer) + stat->ino |= ((u64)lower_layer->fsid) << shift; + + stat->dev = dentry->d_sb->s_dev; + return 0; + } + } + + /* The inode could not be mapped to a unified st_ino address space */ + if (S_ISDIR(dentry->d_inode->i_mode)) { + /* + * Always use the overlay st_dev for directories, so 'find + * -xdev' will scan the entire overlay mount and won't cross the + * overlay mount boundaries. + * + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino for directories. + */ + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; + } else if (lower_layer && lower_layer->fsid) { + /* + * For non-samefs setup, if we cannot map all layers st_ino + * to a unified address space, we need to make sure that st_dev + * is unique per lower fs. Upper layer uses real st_dev and + * lower layers use the unique anonymous bdev assigned to the + * lower fs. + */ + stat->dev = lower_layer->fs->pseudo_dev; + } + + return 0; +} + int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -75,6 +131,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); bool samefs = ovl_same_sb(dentry->d_sb); + struct ovl_layer *lower_layer = NULL; int err; type = ovl_path_real(dentry, &realpath); @@ -84,14 +141,18 @@ int ovl_getattr(const struct path *path, struct kstat *stat, goto out; /* - * For non-dir or same fs, we use st_ino of the copy up origin, if we - * know it. This guaranties constant st_dev/st_ino across copy up. + * For non-dir or same fs, we use st_ino of the copy up origin. + * This guaranties constant st_dev/st_ino across copy up. + * With xino feature and non-samefs, we use st_ino of the copy up + * origin masked with high bits that represent the layer id. * - * If filesystem supports NFS export ops, this also guaranties + * If lower filesystem supports NFS file handles, this also guaranties * persistent st_ino across mount cycle. */ - if (!is_dir || samefs) { - if (OVL_TYPE_ORIGIN(type)) { + if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { + if (!OVL_TYPE_UPPER(type)) { + lower_layer = ovl_layer_lower(dentry); + } else if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); @@ -118,43 +179,17 @@ int ovl_getattr(const struct path *path, struct kstat *stat, */ if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || (!ovl_verify_lower(dentry->d_sb) && - (is_dir || lowerstat.nlink == 1))) + (is_dir || lowerstat.nlink == 1))) { stat->ino = lowerstat.ino; - - if (samefs) - WARN_ON_ONCE(stat->dev != lowerstat.dev); - else - stat->dev = ovl_get_pseudo_dev(dentry); - } - if (samefs) { - /* - * When all layers are on the same fs, all real inode - * number are unique, so we use the overlay st_dev, - * which is friendly to du -x. - */ - stat->dev = dentry->d_sb->s_dev; - } else if (!OVL_TYPE_UPPER(type)) { - /* - * For non-samefs setup, to make sure that st_dev/st_ino - * pair is unique across the system, we use a unique - * anonymous st_dev for lower layer inode. - */ - stat->dev = ovl_get_pseudo_dev(dentry); + lower_layer = ovl_layer_lower(dentry); + } } - } else { - /* - * Always use the overlay st_dev for directories, so 'find - * -xdev' will scan the entire overlay mount and won't cross the - * overlay mount boundaries. - * - * If not all layers are on the same fs the pair {real st_ino; - * overlay st_dev} is not unique, so use the non persistent - * overlay st_ino for directories. - */ - stat->dev = dentry->d_sb->s_dev; - stat->ino = dentry->d_inode->i_ino; } + err = ovl_map_dev_ino(dentry, stat, lower_layer); + if (err) + goto out; + /* * It's probably not worth it to count subdirs to get the * correct link count. nlink=1 seems to pacify 'find' and @@ -383,24 +418,18 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) int ovl_update_time(struct inode *inode, struct timespec *ts, int flags) { - struct dentry *alias; - struct path upperpath; - - if (!(flags & S_ATIME)) - return 0; - - alias = d_find_any_alias(inode); - if (!alias) - return 0; - - ovl_path_upper(alias, &upperpath); - if (upperpath.dentry) { - touch_atime(&upperpath); - inode->i_atime = d_inode(upperpath.dentry)->i_atime; + if (flags & S_ATIME) { + struct ovl_fs *ofs = inode->i_sb->s_fs_info; + struct path upperpath = { + .mnt = ofs->upper_mnt, + .dentry = ovl_upperdentry_dereference(OVL_I(inode)), + }; + + if (upperpath.dentry) { + touch_atime(&upperpath); + inode->i_atime = d_inode(upperpath.dentry)->i_atime; + } } - - dput(alias); - return 0; } @@ -459,9 +488,27 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) #endif } -static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, + unsigned long ino, int fsid) { - inode->i_ino = get_next_ino(); + int xinobits = ovl_xino_bits(inode->i_sb); + + /* + * When NFS export is enabled and d_ino is consistent with st_ino + * (samefs or i_ino has enough bits to encode layer), set the same + * value used for d_ino to i_ino, because nfsd readdirplus compares + * d_ino values to i_ino values of child entries. When called from + * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real + * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). + */ + if (inode->i_sb->s_export_op && + (ovl_same_sb(inode->i_sb) || xinobits)) { + inode->i_ino = ino; + if (xinobits && fsid && !(ino >> (64 - xinobits))) + inode->i_ino |= (unsigned long)fsid << (64 - xinobits); + } else { + inode->i_ino = get_next_ino(); + } inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; #ifdef CONFIG_FS_POSIX_ACL @@ -597,7 +644,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) inode = new_inode(sb); if (inode) - ovl_fill_inode(inode, mode, rdev); + ovl_fill_inode(inode, mode, rdev, 0, 0); return inode; } @@ -703,13 +750,16 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, } struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *index, + struct ovl_path *lowerpath, struct dentry *index, unsigned int numlower) { struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; + struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); + int fsid = bylower ? lowerpath->layer->fsid : 0; bool is_dir; + unsigned long ino = 0; if (!realinode) realinode = d_inode(lowerdentry); @@ -748,18 +798,22 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, if (!is_dir) nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); + ino = key->i_ino; } else { /* Lower hardlink that will be broken on copy up */ inode = new_inode(sb); if (!inode) goto out_nomem; } - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); ovl_inode_init(inode, upperdentry, lowerdentry); if (upperdentry && ovl_is_impuredir(upperdentry)) ovl_set_flag(OVL_IMPURE, inode); + if (index) + ovl_set_flag(OVL_INDEX, inode); + /* Check for non-merge dir that may have whiteouts */ if (is_dir) { if (((upperdentry && lowerdentry) || numlower > 1) || diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 70fcfcc684cc..2dba29eadde6 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -56,6 +56,15 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, if (s == next) goto invalid; } + /* + * One of the ancestor path elements in an absolute path + * lookup in ovl_lookup_layer() could have been opaque and + * that will stop further lookup in lower layers (d->stop=true) + * But we have found an absolute redirect in decendant path + * element and that should force continue lookup in lower + * layers (reset d->stop). + */ + d->stop = false; } else { if (strchr(buf, '/') != NULL) goto invalid; @@ -171,7 +180,8 @@ invalid: goto out; } -struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) +struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt, + bool connected) { struct dentry *real; int bytes; @@ -186,7 +196,7 @@ struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) bytes = (fh->len - offsetof(struct ovl_fh, fid)); real = exportfs_decode_fh(mnt, (struct fid *)fh->fid, bytes >> 2, (int)fh->type, - ovl_acceptable, mnt); + connected ? ovl_acceptable : NULL, mnt); if (IS_ERR(real)) { /* * Treat stale file handle to lower file as "origin unknown". @@ -220,6 +230,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, { struct dentry *this; int err; + bool last_element = !post[0]; this = lookup_one_len_unlocked(name, base, namelen); if (IS_ERR(this)) { @@ -245,11 +256,23 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, d->stop = true; if (d->is_dir) goto put_and_out; + + /* + * NB: handle failure to lookup non-last element when non-dir + * redirects become possible + */ + WARN_ON(!last_element); goto out; } - d->is_dir = true; - if (!d->last && ovl_is_opaquedir(this)) { - d->stop = d->opaque = true; + if (last_element) + d->is_dir = true; + if (d->last) + goto out; + + if (ovl_is_opaquedir(this)) { + d->stop = true; + if (last_element) + d->opaque = true; goto out; } err = ovl_check_redirect(this, d, prelen, post); @@ -310,14 +333,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp) { struct dentry *origin = NULL; int i; for (i = 0; i < ofs->numlower; i++) { - origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt); + origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt, + connected); if (origin) break; } @@ -361,7 +385,7 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, if (IS_ERR_OR_NULL(fh)) return PTR_ERR(fh); - err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp); + err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp); kfree(fh); if (err) { @@ -415,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name, struct ovl_fh *fh; int err; - fh = ovl_encode_fh(real, is_upper); + fh = ovl_encode_real_fh(real, is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; @@ -451,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) if (IS_ERR_OR_NULL(fh)) return ERR_CAST(fh); - upper = ovl_decode_fh(fh, ofs->upper_mnt); + upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true); kfree(fh); if (IS_ERR_OR_NULL(upper)) @@ -558,7 +582,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) /* Check if non-dir index is orphan and don't warn before cleaning it */ if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) { - err = ovl_check_origin_fh(ofs, fh, index, &stack); + err = ovl_check_origin_fh(ofs, fh, false, index, &stack); if (err) goto fail; @@ -619,7 +643,7 @@ int ovl_get_index_name(struct dentry *origin, struct qstr *name) struct ovl_fh *fh; int err; - fh = ovl_encode_fh(origin, false); + fh = ovl_encode_real_fh(origin, false); if (IS_ERR(fh)) return PTR_ERR(fh); @@ -815,7 +839,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, .is_dir = false, .opaque = false, .stop = false, - .last = !poe->numlower, + .last = ofs->config.redirect_follow ? false : !poe->numlower, .redirect = NULL, }; @@ -873,7 +897,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, for (i = 0; !d.stop && i < poe->numlower; i++) { struct ovl_path lower = poe->lowerstack[i]; - d.last = i == poe->numlower - 1; + if (!ofs->config.redirect_follow) + d.last = i == poe->numlower - 1; + else + d.last = lower.layer->idx == roe->numlower; + err = ovl_lookup_layer(lower.dentry, &d, &this); if (err) goto out_put; @@ -976,17 +1004,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperdentry = dget(index); if (upperdentry || ctr) { - if (ctr) - origin = stack[0].dentry; - inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index, + inode = ovl_get_inode(dentry->d_sb, upperdentry, stack, index, ctr); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; + /* + * NB: handle redirected hard links when non-dir redirects + * become possible + */ + WARN_ON(OVL_I(inode)->redirect); OVL_I(inode)->redirect = upperredirect; - if (index) - ovl_set_flag(OVL_INDEX, inode); } revert_creds(old_cred); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 225ff1171147..e0b7de799f6b 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -202,7 +202,7 @@ void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); struct super_block *ovl_same_sb(struct super_block *sb); -bool ovl_can_decode_fh(struct super_block *sb); +int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); @@ -215,6 +215,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path); enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); +struct ovl_layer *ovl_layer_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); @@ -263,11 +264,19 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); } +static inline unsigned int ovl_xino_bits(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->xino_bits; +} + /* namei.c */ int ovl_check_fh_len(struct ovl_fh *fh, int fh_len); -struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt); -int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, +struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt, + bool connected); +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct dentry *dentry, const char *name, struct dentry *real, bool is_upper, bool set); @@ -329,7 +338,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *index, + struct ovl_path *lowerpath, struct dentry *index, unsigned int numlower); static inline void ovl_copyattr(struct inode *from, struct inode *to) { @@ -361,7 +370,7 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper); +struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper); int ovl_set_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index bfef6edcc111..41655a7d6894 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -18,13 +18,21 @@ struct ovl_config { const char *redirect_mode; bool index; bool nfs_export; + int xino; +}; + +struct ovl_sb { + struct super_block *sb; + dev_t pseudo_dev; }; struct ovl_layer { struct vfsmount *mnt; - dev_t pseudo_dev; - /* Index of this layer in fs root (upper == 0) */ + struct ovl_sb *fs; + /* Index of this layer in fs root (upper idx == 0) */ int idx; + /* One fsid per unique underlying sb (upper fsid == 0) */ + int fsid; }; struct ovl_path { @@ -35,8 +43,11 @@ struct ovl_path { /* private information held for overlayfs's superblock */ struct ovl_fs { struct vfsmount *upper_mnt; - unsigned numlower; + unsigned int numlower; + /* Number of unique lower sb that differ from upper sb */ + unsigned int numlowerfs; struct ovl_layer *lower_layers; + struct ovl_sb *lower_fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; /* workdir is the 'work' directory under workbasedir */ @@ -50,11 +61,11 @@ struct ovl_fs { const struct cred *creator_cred; bool tmpfile; bool noxattr; - /* sb common to all layers */ - struct super_block *same_sb; /* Did we take the inuse lock? */ bool upperdir_locked; bool workdir_locked; + /* Inode numbers in all layers do not use the high xino_bits */ + unsigned int xino_bits; }; /* private information held for every overlayfs dentry */ diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index c11f5c0906c3..ef1fe42ff7bb 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -120,6 +120,10 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd, if (!rdd->dentry) return false; + /* Always recalc d_ino when remapping lower inode numbers */ + if (ovl_xino_bits(rdd->dentry->d_sb)) + return true; + /* Always recalc d_ino for parent */ if (strcmp(p->name, "..") == 0) return true; @@ -435,6 +439,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) return cache; } +/* Map inode number to lower fs unique range */ +static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, + const char *name, int namelen) +{ + if (ino >> (64 - xinobits)) { + pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", + namelen, name, ino, xinobits); + return ino; + } + + return ino | ((u64)fsid) << (64 - xinobits); +} + /* * Set d_ino for upper entries. Non-upper entries should always report * the uppermost real inode ino and should not call this function. @@ -452,9 +469,10 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) struct dentry *this = NULL; enum ovl_path_type type; u64 ino = p->real_ino; + int xinobits = ovl_xino_bits(dir->d_sb); int err = 0; - if (!ovl_same_sb(dir->d_sb)) + if (!ovl_same_sb(dir->d_sb) && !xinobits) goto out; if (p->name[0] == '.') { @@ -491,6 +509,10 @@ get: WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); ino = stat.ino; + } else if (xinobits && !OVL_TYPE_UPPER(type)) { + ino = ovl_remap_lower_ino(ino, xinobits, + ovl_layer_lower(this)->fsid, + p->name, p->len); } out: @@ -618,6 +640,8 @@ struct ovl_readdir_translate { struct ovl_dir_cache *cache; struct dir_context ctx; u64 parent_ino; + int fsid; + int xinobits; }; static int ovl_fill_real(struct dir_context *ctx, const char *name, @@ -628,14 +652,17 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name, container_of(ctx, struct ovl_readdir_translate, ctx); struct dir_context *orig_ctx = rdt->orig_ctx; - if (rdt->parent_ino && strcmp(name, "..") == 0) + if (rdt->parent_ino && strcmp(name, "..") == 0) { ino = rdt->parent_ino; - else if (rdt->cache) { + } else if (rdt->cache) { struct ovl_cache_entry *p; p = ovl_cache_entry_find(&rdt->cache->root, name, namelen); if (p) ino = p->ino; + } else if (rdt->xinobits) { + ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, + name, namelen); } return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); @@ -646,11 +673,16 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) int err; struct ovl_dir_file *od = file->private_data; struct dentry *dir = file->f_path.dentry; + struct ovl_layer *lower_layer = ovl_layer_lower(dir); struct ovl_readdir_translate rdt = { .ctx.actor = ovl_fill_real, .orig_ctx = ctx, + .xinobits = ovl_xino_bits(dir->d_sb), }; + if (rdt.xinobits && lower_layer) + rdt.fsid = lower_layer->fsid; + if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) { struct kstat stat; struct path statpath = file->f_path; @@ -693,9 +725,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) * dir is impure then need to adjust d_ino for copied up * entries. */ - if (ovl_same_sb(dentry->d_sb) && - (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) || - OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) { + if (ovl_xino_bits(dentry->d_sb) || + (ovl_same_sb(dentry->d_sb) && + (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) || + OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) { return ovl_iterate_real(file, ctx); } return iterate_dir(od->realfile, ctx); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 7c24619ae7fc..e8551c97de51 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -17,6 +17,7 @@ #include <linux/statfs.h> #include <linux/seq_file.h> #include <linux/posix_acl_xattr.h> +#include <linux/exportfs.h> #include "overlayfs.h" MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); @@ -50,6 +51,11 @@ module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); MODULE_PARM_DESC(ovl_nfs_export_def, "Default to on or off for the NFS export feature"); +static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); +module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); +MODULE_PARM_DESC(ovl_xino_auto_def, + "Auto enable xino feature"); + static void ovl_entry_stack_free(struct ovl_entry *oe) { unsigned int i; @@ -236,11 +242,12 @@ static void ovl_free_fs(struct ovl_fs *ofs) if (ofs->upperdir_locked) ovl_inuse_unlock(ofs->upper_mnt->mnt_root); mntput(ofs->upper_mnt); - for (i = 0; i < ofs->numlower; i++) { + for (i = 0; i < ofs->numlower; i++) mntput(ofs->lower_layers[i].mnt); - free_anon_bdev(ofs->lower_layers[i].pseudo_dev); - } + for (i = 0; i < ofs->numlowerfs; i++) + free_anon_bdev(ofs->lower_fs[i].pseudo_dev); kfree(ofs->lower_layers); + kfree(ofs->lower_fs); kfree(ofs->config.lowerdir); kfree(ofs->config.upperdir); @@ -325,6 +332,23 @@ static const char *ovl_redirect_mode_def(void) return ovl_redirect_dir_def ? "on" : "off"; } +enum { + OVL_XINO_OFF, + OVL_XINO_AUTO, + OVL_XINO_ON, +}; + +static const char * const ovl_xino_str[] = { + "off", + "auto", + "on", +}; + +static inline int ovl_xino_def(void) +{ + return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; +} + /** * ovl_show_options * @@ -350,6 +374,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ofs->config.nfs_export != ovl_nfs_export_def) seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? "on" : "off"); + if (ofs->config.xino != ovl_xino_def()) + seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); return 0; } @@ -384,6 +410,9 @@ enum { OPT_INDEX_OFF, OPT_NFS_EXPORT_ON, OPT_NFS_EXPORT_OFF, + OPT_XINO_ON, + OPT_XINO_OFF, + OPT_XINO_AUTO, OPT_ERR, }; @@ -397,6 +426,9 @@ static const match_table_t ovl_tokens = { {OPT_INDEX_OFF, "index=off"}, {OPT_NFS_EXPORT_ON, "nfs_export=on"}, {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, + {OPT_XINO_ON, "xino=on"}, + {OPT_XINO_OFF, "xino=off"}, + {OPT_XINO_AUTO, "xino=auto"}, {OPT_ERR, NULL} }; @@ -511,6 +543,18 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->nfs_export = false; break; + case OPT_XINO_ON: + config->xino = OVL_XINO_ON; + break; + + case OPT_XINO_OFF: + config->xino = OVL_XINO_OFF; + break; + + case OPT_XINO_AUTO: + config->xino = OVL_XINO_AUTO; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -700,6 +744,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, static int ovl_lower_dir(const char *name, struct path *path, struct ovl_fs *ofs, int *stack_depth, bool *remote) { + int fh_type; int err; err = ovl_mount_dir_noesc(name, path); @@ -719,15 +764,19 @@ static int ovl_lower_dir(const char *name, struct path *path, * The inodes index feature and NFS export need to encode and decode * file handles, so they require that all layers support them. */ + fh_type = ovl_can_decode_fh(path->dentry->d_sb); if ((ofs->config.nfs_export || - (ofs->config.index && ofs->config.upperdir)) && - !ovl_can_decode_fh(path->dentry->d_sb)) { + (ofs->config.index && ofs->config.upperdir)) && !fh_type) { ofs->config.index = false; ofs->config.nfs_export = false; pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", name); } + /* Check if lower fs has 32bit inode numbers */ + if (fh_type != FILEID_INO32_GEN) + ofs->xino_bits = 0; + return 0; out_put: @@ -951,6 +1000,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) { struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; + int fh_type; int err; err = mnt_want_write(mnt); @@ -1000,12 +1050,16 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) } /* Check if upper/work fs supports file handles */ - if (ofs->config.index && - !ovl_can_decode_fh(ofs->workdir->d_sb)) { + fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); + if (ofs->config.index && !fh_type) { ofs->config.index = false; pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); } + /* Check if upper fs has 32bit inode numbers */ + if (fh_type != FILEID_INO32_GEN) + ofs->xino_bits = 0; + /* NFS export of r/w mount depends on index */ if (ofs->config.nfs_export && !ofs->config.index) { pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); @@ -1108,6 +1162,35 @@ out: return err; } +/* Get a unique fsid for the layer */ +static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb) +{ + unsigned int i; + dev_t dev; + int err; + + /* fsid 0 is reserved for upper fs even with non upper overlay */ + if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb) + return 0; + + for (i = 0; i < ofs->numlowerfs; i++) { + if (ofs->lower_fs[i].sb == sb) + return i + 1; + } + + err = get_anon_bdev(&dev); + if (err) { + pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); + return err; + } + + ofs->lower_fs[ofs->numlowerfs].sb = sb; + ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev; + ofs->numlowerfs++; + + return ofs->numlowerfs; +} + static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, unsigned int numlower) { @@ -1119,23 +1202,27 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, GFP_KERNEL); if (ofs->lower_layers == NULL) goto out; + + ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb), + GFP_KERNEL); + if (ofs->lower_fs == NULL) + goto out; + for (i = 0; i < numlower; i++) { struct vfsmount *mnt; - dev_t dev; + int fsid; - err = get_anon_bdev(&dev); - if (err) { - pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); + err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb); + if (err < 0) goto out; - } mnt = clone_private_mount(&stack[i]); err = PTR_ERR(mnt); if (IS_ERR(mnt)) { pr_err("overlayfs: failed to clone lowerpath\n"); - free_anon_bdev(dev); goto out; } + /* * Make lower layers R/O. That way fchmod/fchown on lower file * will fail instead of modifying lower fs. @@ -1143,16 +1230,41 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; ofs->lower_layers[ofs->numlower].mnt = mnt; - ofs->lower_layers[ofs->numlower].pseudo_dev = dev; ofs->lower_layers[ofs->numlower].idx = i + 1; + ofs->lower_layers[ofs->numlower].fsid = fsid; + if (fsid) { + ofs->lower_layers[ofs->numlower].fs = + &ofs->lower_fs[fsid - 1]; + } ofs->numlower++; + } + + /* + * When all layers on same fs, overlay can use real inode numbers. + * With mount option "xino=on", mounter declares that there are enough + * free high bits in underlying fs to hold the unique fsid. + * If overlayfs does encounter underlying inodes using the high xino + * bits reserved for fsid, it emits a warning and uses the original + * inode number. + */ + if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) { + ofs->xino_bits = 0; + ofs->config.xino = OVL_XINO_OFF; + } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) { + /* + * This is a roundup of number of bits needed for numlowerfs+1 + * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for + * upper fs even with non upper overlay. + */ + BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); + ofs->xino_bits = ilog2(ofs->numlowerfs) + 1; + } - /* Check if all lower layers are on same sb */ - if (i == 0) - ofs->same_sb = mnt->mnt_sb; - else if (ofs->same_sb != mnt->mnt_sb) - ofs->same_sb = NULL; + if (ofs->xino_bits) { + pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n", + ofs->xino_bits); } + err = 0; out: return err; @@ -1263,6 +1375,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ofs->config.index = ovl_index_def; ofs->config.nfs_export = ovl_nfs_export_def; + ofs->config.xino = ovl_xino_def(); err = ovl_parse_opt((char *) data, &ofs->config); if (err) goto out_err; @@ -1276,6 +1389,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth = 0; sb->s_maxbytes = MAX_LFS_FILESIZE; + /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ + if (ofs->config.xino != OVL_XINO_OFF) + ofs->xino_bits = BITS_PER_LONG - 32; + if (ofs->config.upperdir) { if (!ofs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); @@ -1305,8 +1422,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ofs->upper_mnt) sb->s_flags |= SB_RDONLY; - else if (ofs->upper_mnt->mnt_sb != ofs->same_sb) - ofs->same_sb = NULL; if (!(ovl_force_readonly(ofs)) && ofs->config.index) { err = ovl_get_indexdir(ofs, oe, &upperpath); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 930784a26623..6f1078028c66 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -47,13 +47,29 @@ struct super_block *ovl_same_sb(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; - return ofs->same_sb; + if (!ofs->numlowerfs) + return ofs->upper_mnt->mnt_sb; + else if (ofs->numlowerfs == 1 && !ofs->upper_mnt) + return ofs->lower_fs[0].sb; + else + return NULL; } -bool ovl_can_decode_fh(struct super_block *sb) +/* + * Check if underlying fs supports file handles and try to determine encoding + * type, in order to deduce maximum inode number used by fs. + * + * Return 0 if file handles are not supported. + * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding. + * Return -1 if fs uses a non default encoding with unknown inode size. + */ +int ovl_can_decode_fh(struct super_block *sb) { - return (sb->s_export_op && sb->s_export_op->fh_to_dentry && - !uuid_is_null(&sb->s_uuid)); + if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry || + uuid_is_null(&sb->s_uuid)) + return 0; + + return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; } struct dentry *ovl_indexdir(struct super_block *sb) @@ -172,6 +188,13 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry) return oe->numlower ? oe->lowerstack[0].dentry : NULL; } +struct ovl_layer *ovl_layer_lower(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->numlower ? oe->lowerstack[0].layer : NULL; +} + struct dentry *ovl_dentry_real(struct dentry *dentry) { return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); @@ -279,12 +302,16 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, struct dentry *lowerdentry) { + struct inode *realinode = d_inode(upperdentry ?: lowerdentry); + if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry) OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); - ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); + ovl_copyattr(realinode, inode); + if (!inode->i_ino) + inode->i_ino = realinode->i_ino; } void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) @@ -299,6 +326,8 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; if (inode_unhashed(inode)) { + if (!inode->i_ino) + inode->i_ino = upperinode->i_ino; inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } diff --git a/fs/proc/array.c b/fs/proc/array.c index 598803576e4c..ae2c807fd719 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -141,25 +141,12 @@ static inline const char *get_task_state(struct task_struct *tsk) return task_state_array[task_state_index(tsk)]; } -static inline int get_task_umask(struct task_struct *tsk) -{ - struct fs_struct *fs; - int umask = -ENOENT; - - task_lock(tsk); - fs = tsk->fs; - if (fs) - umask = fs->umask; - task_unlock(tsk); - return umask; -} - static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; - int g, umask; + int g, umask = -1; struct task_struct *tracer; const struct cred *cred; pid_t ppid, tpid = 0, tgid, ngid; @@ -177,17 +164,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, ngid = task_numa_group_id(p); cred = get_task_cred(p); - umask = get_task_umask(p); - if (umask >= 0) - seq_printf(m, "Umask:\t%#04o\n", umask); - task_lock(p); + if (p->fs) + umask = p->fs->umask; if (p->files) max_fds = files_fdtable(p->files)->max_fds; task_unlock(p); rcu_read_unlock(); - seq_printf(m, "State:\t%s", get_task_state(p)); + if (umask >= 0) + seq_printf(m, "Umask:\t%#04o\n", umask); + seq_puts(m, "State:\t"); + seq_puts(m, get_task_state(p)); seq_put_decimal_ull(m, "\nTgid:\t", tgid); seq_put_decimal_ull(m, "\nNgid:\t", ngid); @@ -313,8 +301,8 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_puts(m, header); CAP_FOR_EACH_U32(__capi) { - seq_printf(m, "%08x", - a->cap[CAP_LAST_U32 - __capi]); + seq_put_hex_ll(m, NULL, + a->cap[CAP_LAST_U32 - __capi], 8); } seq_putc(m, '\n'); } @@ -368,7 +356,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) { - seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state); + seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state); + seq_putc(m, '\n'); } int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, @@ -504,7 +493,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* convert nsec -> ticks */ start_time = nsec_to_clock_t(task->real_start_time); - seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); + seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns)); + seq_puts(m, " ("); + seq_puts(m, tcomm); + seq_puts(m, ") "); + seq_putc(m, state); seq_put_decimal_ll(m, " ", ppid); seq_put_decimal_ll(m, " ", pgid); seq_put_decimal_ll(m, " ", sid); diff --git a/fs/proc/base.c b/fs/proc/base.c index d53246863cfb..eafa39a3a88c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -388,14 +388,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, unsigned long wchan; char symname[KSYM_NAME_LEN]; - wchan = get_wchan(task); + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + goto print0; - if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) - && !lookup_symbol_name(wchan, symname)) - seq_printf(m, "%s", symname); - else - seq_putc(m, '0'); + wchan = get_wchan(task); + if (wchan && !lookup_symbol_name(wchan, symname)) { + seq_puts(m, symname); + return 0; + } +print0: + seq_putc(m, '0'); return 0; } #endif /* CONFIG_KALLSYMS */ @@ -1910,6 +1913,8 @@ static int dname_to_vma_addr(struct dentry *dentry, unsigned long long sval, eval; unsigned int len; + if (str[0] == '0' && str[1] != '-') + return -EINVAL; len = _parse_integer(str, 16, &sval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; @@ -1921,6 +1926,8 @@ static int dname_to_vma_addr(struct dentry *dentry, return -EINVAL; str++; + if (str[0] == '0' && str[1]) + return -EINVAL; len = _parse_integer(str, 16, &eval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; @@ -2204,6 +2211,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) } } up_read(&mm->mmap_sem); + mmput(mm); for (i = 0; i < nr_files; i++) { char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ @@ -2221,7 +2229,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) } if (fa) flex_array_free(fa); - mmput(mm); out_put_task: put_task_struct(task); diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 403cbb12a6e9..8233e7af9389 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -6,7 +6,8 @@ static int cmdline_proc_show(struct seq_file *m, void *v) { - seq_printf(m, "%s\n", saved_command_line); + seq_puts(m, saved_command_line); + seq_putc(m, '\n'); return 0; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5d709fa8f3a2..2078e70e1595 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -8,12 +8,14 @@ * Copyright (C) 1997 Theodore Ts'o */ +#include <linux/cache.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/namei.h> #include <linux/slab.h> #include <linux/printk.h> #include <linux/mount.h> @@ -28,6 +30,17 @@ static DEFINE_RWLOCK(proc_subdir_lock); +struct kmem_cache *proc_dir_entry_cache __ro_after_init; + +void pde_free(struct proc_dir_entry *pde) +{ + if (S_ISLNK(pde->mode)) + kfree(pde->data); + if (pde->name != pde->inline_name) + kfree(pde->name); + kmem_cache_free(proc_dir_entry_cache, pde); +} + static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) { if (len < de->namelen) @@ -40,8 +53,8 @@ static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) { - return rb_entry_safe(rb_first_cached(&dir->subdir), - struct proc_dir_entry, subdir_node); + return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, + subdir_node); } static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) @@ -54,7 +67,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, const char *name, unsigned int len) { - struct rb_node *node = dir->subdir.rb_root.rb_node; + struct rb_node *node = dir->subdir.rb_node; while (node) { struct proc_dir_entry *de = rb_entry(node, @@ -75,9 +88,8 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, static bool pde_subdir_insert(struct proc_dir_entry *dir, struct proc_dir_entry *de) { - struct rb_root_cached *root = &dir->subdir; - struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; - bool leftmost = true; + struct rb_root *root = &dir->subdir; + struct rb_node **new = &root->rb_node, *parent = NULL; /* Figure out where to put new node */ while (*new) { @@ -89,16 +101,15 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, parent = *new; if (result < 0) new = &(*new)->rb_left; - else if (result > 0) { + else if (result > 0) new = &(*new)->rb_right; - leftmost = false; - } else + else return false; } /* Add new node and rebalance tree. */ rb_link_node(&de->subdir_node, parent, new); - rb_insert_color_cached(&de->subdir_node, root, leftmost); + rb_insert_color(&de->subdir_node, root); return true; } @@ -207,6 +218,26 @@ void proc_free_inum(unsigned int inum) ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); } +static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (atomic_read(&PDE(d_inode(dentry))->in_use) < 0) + return 0; /* revalidate */ + return 1; +} + +static int proc_misc_d_delete(const struct dentry *dentry) +{ + return atomic_read(&PDE(d_inode(dentry))->in_use) < 0; +} + +static const struct dentry_operations proc_misc_dentry_ops = { + .d_revalidate = proc_misc_d_revalidate, + .d_delete = proc_misc_d_delete, +}; + /* * Don't create negative dentries here, return -ENOENT by hand * instead. @@ -224,7 +255,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &simple_dentry_operations); + d_set_d_op(dentry, &proc_misc_dentry_ops); d_add(dentry, inode); return NULL; } @@ -354,6 +385,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, WARN(1, "name len %u\n", qstr.len); return NULL; } + if (qstr.len == 1 && fn[0] == '.') { + WARN(1, "name '.'\n"); + return NULL; + } + if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') { + WARN(1, "name '..'\n"); + return NULL; + } if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { WARN(1, "create '/proc/%s' by hand\n", qstr.name); return NULL; @@ -363,16 +402,26 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, return NULL; } - ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); + ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); if (!ent) goto out; + if (qstr.len + 1 <= sizeof(ent->inline_name)) { + ent->name = ent->inline_name; + } else { + ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); + if (!ent->name) { + pde_free(ent); + return NULL; + } + } + memcpy(ent->name, fn, qstr.len + 1); ent->namelen = qstr.len; ent->mode = mode; ent->nlink = nlink; - ent->subdir = RB_ROOT_CACHED; - atomic_set(&ent->count, 1); + ent->subdir = RB_ROOT; + refcount_set(&ent->refcnt, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); proc_set_user(ent, (*parent)->uid, (*parent)->gid); @@ -395,12 +444,11 @@ struct proc_dir_entry *proc_symlink(const char *name, strcpy((char*)ent->data,dest); ent->proc_iops = &proc_link_inode_operations; if (proc_register(parent, ent) < 0) { - kfree(ent->data); - kfree(ent); + pde_free(ent); ent = NULL; } } else { - kfree(ent); + pde_free(ent); ent = NULL; } } @@ -423,7 +471,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent->proc_iops = &proc_dir_inode_operations; parent->nlink++; if (proc_register(parent, ent) < 0) { - kfree(ent); + pde_free(ent); parent->nlink--; ent = NULL; } @@ -458,7 +506,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->proc_iops = NULL; parent->nlink++; if (proc_register(parent, ent) < 0) { - kfree(ent); + pde_free(ent); parent->nlink--; ent = NULL; } @@ -495,7 +543,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, goto out_free; return pde; out_free: - kfree(pde); + pde_free(pde); out: return NULL; } @@ -522,19 +570,12 @@ void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) } EXPORT_SYMBOL(proc_set_user); -static void free_proc_entry(struct proc_dir_entry *de) -{ - proc_free_inum(de->low_ino); - - if (S_ISLNK(de->mode)) - kfree(de->data); - kfree(de); -} - void pde_put(struct proc_dir_entry *pde) { - if (atomic_dec_and_test(&pde->count)) - free_proc_entry(pde); + if (refcount_dec_and_test(&pde->refcnt)) { + proc_free_inum(pde->low_ino); + pde_free(pde); + } } /* @@ -555,7 +596,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) de = pde_subdir_find(parent, fn, len); if (de) - rb_erase_cached(&de->subdir_node, &parent->subdir); + rb_erase(&de->subdir_node, &parent->subdir); write_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -592,13 +633,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) write_unlock(&proc_subdir_lock); return -ENOENT; } - rb_erase_cached(&root->subdir_node, &parent->subdir); + rb_erase(&root->subdir_node, &parent->subdir); de = root; while (1) { next = pde_subdir_first(de); if (next) { - rb_erase_cached(&next->subdir_node, &de->subdir); + rb_erase(&next->subdir_node, &de->subdir); de = next; continue; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6e8724958116..2cf3b74391ca 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -54,6 +54,7 @@ static void proc_evict_inode(struct inode *inode) } static struct kmem_cache *proc_inode_cachep __ro_after_init; +static struct kmem_cache *pde_opener_cache __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { @@ -92,7 +93,7 @@ static void init_once(void *foo) inode_init_once(&ei->vfs_inode); } -void __init proc_init_inodecache(void) +void __init proc_init_kmemcache(void) { proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), @@ -100,6 +101,13 @@ void __init proc_init_inodecache(void) SLAB_MEM_SPREAD|SLAB_ACCOUNT| SLAB_PANIC), init_once); + pde_opener_cache = + kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0, + SLAB_ACCOUNT|SLAB_PANIC, NULL); + proc_dir_entry_cache = kmem_cache_create_usercopy( + "proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC, + offsetof(struct proc_dir_entry, inline_name), + sizeof_field(struct proc_dir_entry, inline_name), NULL); } static int proc_show_options(struct seq_file *seq, struct dentry *root) @@ -138,7 +146,7 @@ static void unuse_pde(struct proc_dir_entry *pde) complete(pde->pde_unload_completion); } -/* pde is locked */ +/* pde is locked on entry, unlocked on exit */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) { /* @@ -157,9 +165,10 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) pdeo->c = &c; spin_unlock(&pde->pde_unload_lock); wait_for_completion(&c); - spin_lock(&pde->pde_unload_lock); } else { struct file *file; + struct completion *c; + pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); file = pdeo->file; @@ -167,9 +176,11 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); /* After ->release. */ list_del(&pdeo->lh); - if (unlikely(pdeo->c)) - complete(pdeo->c); - kfree(pdeo); + c = pdeo->c; + spin_unlock(&pde->pde_unload_lock); + if (unlikely(c)) + complete(c); + kmem_cache_free(pde_opener_cache, pdeo); } } @@ -188,6 +199,7 @@ void proc_entry_rundown(struct proc_dir_entry *de) struct pde_opener *pdeo; pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); close_pdeo(de, pdeo); + spin_lock(&de->pde_unload_lock); } spin_unlock(&de->pde_unload_lock); } @@ -338,31 +350,36 @@ static int proc_reg_open(struct inode *inode, struct file *file) * * Save every "struct file" with custom ->release hook. */ - pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); - if (!pdeo) - return -ENOMEM; - - if (!use_pde(pde)) { - kfree(pdeo); + if (!use_pde(pde)) return -ENOENT; - } - open = pde->proc_fops->open; + release = pde->proc_fops->release; + if (release) { + pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); + if (!pdeo) { + rv = -ENOMEM; + goto out_unuse; + } + } + open = pde->proc_fops->open; if (open) rv = open(inode, file); - if (rv == 0 && release) { - /* To know what to release. */ - pdeo->file = file; - pdeo->closing = false; - pdeo->c = NULL; - spin_lock(&pde->pde_unload_lock); - list_add(&pdeo->lh, &pde->pde_openers); - spin_unlock(&pde->pde_unload_lock); - } else - kfree(pdeo); + if (release) { + if (rv == 0) { + /* To know what to release. */ + pdeo->file = file; + pdeo->closing = false; + pdeo->c = NULL; + spin_lock(&pde->pde_unload_lock); + list_add(&pdeo->lh, &pde->pde_openers); + spin_unlock(&pde->pde_unload_lock); + } else + kmem_cache_free(pde_opener_cache, pdeo); + } +out_unuse: unuse_pde(pde); return rv; } @@ -375,7 +392,7 @@ static int proc_reg_release(struct inode *inode, struct file *file) list_for_each_entry(pdeo, &pde->pde_openers, lh) { if (pdeo->file == file) { close_pdeo(pde, pdeo); - break; + return 0; } } spin_unlock(&pde->pde_unload_lock); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d697c8ab0a14..0f1692e63cb6 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include <linux/proc_fs.h> #include <linux/proc_ns.h> +#include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/binfmts.h> @@ -36,7 +37,7 @@ struct proc_dir_entry { * negative -> it's going away RSN */ atomic_t in_use; - atomic_t count; /* use count */ + refcount_t refcnt; struct list_head pde_openers; /* who did ->open, but not ->release */ /* protects ->pde_openers and all struct pde_opener instances */ spinlock_t pde_unload_lock; @@ -50,13 +51,22 @@ struct proc_dir_entry { kgid_t gid; loff_t size; struct proc_dir_entry *parent; - struct rb_root_cached subdir; + struct rb_root subdir; struct rb_node subdir_node; + char *name; umode_t mode; u8 namelen; - char name[]; +#ifdef CONFIG_64BIT +#define SIZEOF_PDE_INLINE_NAME (192-139) +#else +#define SIZEOF_PDE_INLINE_NAME (128-87) +#endif + char inline_name[SIZEOF_PDE_INLINE_NAME]; } __randomize_layout; +extern struct kmem_cache *proc_dir_entry_cache; +void pde_free(struct proc_dir_entry *pde); + union proc_op { int (*proc_get_link)(struct dentry *, struct path *); int (*proc_show)(struct seq_file *m, @@ -159,7 +169,7 @@ int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry * static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) { - atomic_inc(&pde->count); + refcount_inc(&pde->refcnt); return pde; } extern void pde_put(struct proc_dir_entry *); @@ -177,12 +187,12 @@ struct pde_opener { struct list_head lh; bool closing; struct completion *c; -}; +} __randomize_layout; extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; -extern void proc_init_inodecache(void); +void proc_init_kmemcache(void); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern int proc_fill_super(struct super_block *, void *data, int flags); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 6bb20f864259..65a72ab57471 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) { - char v[32]; - static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '}; - int len; - - len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10)); - - seq_write(m, s, 16); - - if (len > 0) { - if (len < 8) - seq_write(m, blanks, 8 - len); - - seq_write(m, v, len); - } + seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8); seq_write(m, " kB\n", 4); } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 68c06ae7888c..1763f370489d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -192,15 +192,16 @@ static __net_init int proc_net_ns_init(struct net *net) int err; err = -ENOMEM; - netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL); + netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); if (!netd) goto out; - netd->subdir = RB_ROOT_CACHED; + netd->subdir = RB_ROOT; netd->data = net; netd->nlink = 2; netd->namelen = 3; netd->parent = &proc_root; + netd->name = netd->inline_name; memcpy(netd->name, "net", 4); uid = make_kuid(net->user_ns, 0); @@ -223,7 +224,7 @@ static __net_init int proc_net_ns_init(struct net *net) return 0; free_net: - kfree(netd); + pde_free(netd); out: return err; } @@ -231,7 +232,7 @@ out: static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); - kfree(net->proc_net); + pde_free(net->proc_net); } static struct pernet_operations __net_initdata proc_net_ns_ops = { diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c41ab261397d..8989936f2995 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -707,14 +707,14 @@ static bool proc_sys_link_fill_cache(struct file *file, struct ctl_table *table) { bool ret = true; + head = sysctl_head_grab(head); + if (IS_ERR(head)) + return false; - if (S_ISLNK(table->mode)) { - /* It is not an error if we can not follow the link ignore it */ - int err = sysctl_follow_link(&head, &table); - if (err) - goto out; - } + /* It is not an error if we can not follow the link ignore it */ + if (sysctl_follow_link(&head, &table)) + goto out; ret = proc_sys_fill_cache(file, ctx, head, table); out: @@ -1086,7 +1086,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table) if ((table->proc_handler == proc_douintvec) || (table->proc_handler == proc_douintvec_minmax)) { if (table->maxlen != sizeof(unsigned int)) - err |= sysctl_err(path, table, "array now allowed"); + err |= sysctl_err(path, table, "array not allowed"); } return err; diff --git a/fs/proc/root.c b/fs/proc/root.c index ede8e64974be..61b7340b357a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -123,23 +123,13 @@ static struct file_system_type proc_fs_type = { void __init proc_root_init(void) { - int err; - - proc_init_inodecache(); + proc_init_kmemcache(); set_proc_pid_nlink(); - err = register_filesystem(&proc_fs_type); - if (err) - return; - proc_self_init(); proc_thread_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); - -#ifdef CONFIG_SYSVIPC - proc_mkdir("sysvipc", NULL); -#endif proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ @@ -150,6 +140,8 @@ void __init proc_root_init(void) proc_tty_init(); proc_mkdir("bus", NULL); proc_sys_init(); + + register_filesystem(&proc_fs_type); } static int proc_root_getattr(const struct path *path, struct kstat *stat, @@ -207,12 +199,13 @@ struct proc_dir_entry proc_root = { .namelen = 5, .mode = S_IFDIR | S_IRUGO | S_IXUGO, .nlink = 2, - .count = ATOMIC_INIT(1), + .refcnt = REFCOUNT_INIT(1), .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, - .subdir = RB_ROOT_CACHED, - .name = "/proc", + .subdir = RB_ROOT, + .name = proc_root.inline_name, + .inline_name = "/proc", }; int pid_ns_prepare_proc(struct pid_namespace *ns) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ec6d2983a5cb..65ae54659833 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -24,6 +24,8 @@ #include <asm/tlbflush.h> #include "internal.h" +#define SEQ_PUT_DEC(str, val) \ + seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long text, lib, swap, anon, file, shmem; @@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) lib = (mm->exec_vm << PAGE_SHIFT) - text; swap = get_mm_counter(mm, MM_SWAPENTS); - seq_printf(m, - "VmPeak:\t%8lu kB\n" - "VmSize:\t%8lu kB\n" - "VmLck:\t%8lu kB\n" - "VmPin:\t%8lu kB\n" - "VmHWM:\t%8lu kB\n" - "VmRSS:\t%8lu kB\n" - "RssAnon:\t%8lu kB\n" - "RssFile:\t%8lu kB\n" - "RssShmem:\t%8lu kB\n" - "VmData:\t%8lu kB\n" - "VmStk:\t%8lu kB\n" - "VmExe:\t%8lu kB\n" - "VmLib:\t%8lu kB\n" - "VmPTE:\t%8lu kB\n" - "VmSwap:\t%8lu kB\n", - hiwater_vm << (PAGE_SHIFT-10), - total_vm << (PAGE_SHIFT-10), - mm->locked_vm << (PAGE_SHIFT-10), - mm->pinned_vm << (PAGE_SHIFT-10), - hiwater_rss << (PAGE_SHIFT-10), - total_rss << (PAGE_SHIFT-10), - anon << (PAGE_SHIFT-10), - file << (PAGE_SHIFT-10), - shmem << (PAGE_SHIFT-10), - mm->data_vm << (PAGE_SHIFT-10), - mm->stack_vm << (PAGE_SHIFT-10), - text >> 10, - lib >> 10, - mm_pgtables_bytes(mm) >> 10, - swap << (PAGE_SHIFT-10)); + SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); + SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); + SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); + SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm); + SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); + SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); + SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); + SEQ_PUT_DEC(" kB\nRssFile:\t", file); + SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem); + SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm); + SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm); + seq_put_decimal_ull_width(m, + " kB\nVmExe:\t", text >> 10, 8); + seq_put_decimal_ull_width(m, + " kB\nVmLib:\t", lib >> 10, 8); + seq_put_decimal_ull_width(m, + " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); + SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); + seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); } +#undef SEQ_PUT_DEC unsigned long task_vsize(struct mm_struct *mm) { @@ -287,15 +278,18 @@ static void show_vma_header_prefix(struct seq_file *m, dev_t dev, unsigned long ino) { seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", - start, - end, - flags & VM_READ ? 'r' : '-', - flags & VM_WRITE ? 'w' : '-', - flags & VM_EXEC ? 'x' : '-', - flags & VM_MAYSHARE ? 's' : 'p', - pgoff, - MAJOR(dev), MINOR(dev), ino); + seq_put_hex_ll(m, NULL, start, 8); + seq_put_hex_ll(m, "-", end, 8); + seq_putc(m, ' '); + seq_putc(m, flags & VM_READ ? 'r' : '-'); + seq_putc(m, flags & VM_WRITE ? 'w' : '-'); + seq_putc(m, flags & VM_EXEC ? 'x' : '-'); + seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p'); + seq_put_hex_ll(m, " ", pgoff, 8); + seq_put_hex_ll(m, " ", MAJOR(dev), 2); + seq_put_hex_ll(m, ":", MINOR(dev), 2); + seq_put_decimal_ull(m, " ", ino); + seq_putc(m, ' '); } static void @@ -694,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) if (!mnemonics[i][0]) continue; if (vma->vm_flags & (1UL << i)) { - seq_printf(m, "%c%c ", - mnemonics[i][0], mnemonics[i][1]); + seq_putc(m, mnemonics[i][0]); + seq_putc(m, mnemonics[i][1]); + seq_putc(m, ' '); } } seq_putc(m, '\n'); @@ -736,6 +731,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma) { } +#define SEQ_PUT_DEC(str, val) \ + seq_put_decimal_ull_width(m, str, (val) >> 10, 8) static int show_smap(struct seq_file *m, void *v, int is_pid) { struct proc_maps_private *priv = m->private; @@ -809,51 +806,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) ret = SEQ_SKIP; } - if (!rollup_mode) - seq_printf(m, - "Size: %8lu kB\n" - "KernelPageSize: %8lu kB\n" - "MMUPageSize: %8lu kB\n", - (vma->vm_end - vma->vm_start) >> 10, - vma_kernel_pagesize(vma) >> 10, - vma_mmu_pagesize(vma) >> 10); - - - if (!rollup_mode || last_vma) - seq_printf(m, - "Rss: %8lu kB\n" - "Pss: %8lu kB\n" - "Shared_Clean: %8lu kB\n" - "Shared_Dirty: %8lu kB\n" - "Private_Clean: %8lu kB\n" - "Private_Dirty: %8lu kB\n" - "Referenced: %8lu kB\n" - "Anonymous: %8lu kB\n" - "LazyFree: %8lu kB\n" - "AnonHugePages: %8lu kB\n" - "ShmemPmdMapped: %8lu kB\n" - "Shared_Hugetlb: %8lu kB\n" - "Private_Hugetlb: %7lu kB\n" - "Swap: %8lu kB\n" - "SwapPss: %8lu kB\n" - "Locked: %8lu kB\n", - mss->resident >> 10, - (unsigned long)(mss->pss >> (10 + PSS_SHIFT)), - mss->shared_clean >> 10, - mss->shared_dirty >> 10, - mss->private_clean >> 10, - mss->private_dirty >> 10, - mss->referenced >> 10, - mss->anonymous >> 10, - mss->lazyfree >> 10, - mss->anonymous_thp >> 10, - mss->shmem_thp >> 10, - mss->shared_hugetlb >> 10, - mss->private_hugetlb >> 10, - mss->swap >> 10, - (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)), - (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); + if (!rollup_mode) { + SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start); + SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma)); + SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); + seq_puts(m, " kB\n"); + } + if (!rollup_mode || last_vma) { + SEQ_PUT_DEC("Rss: ", mss->resident); + SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); + SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); + SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); + SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty); + SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced); + SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous); + SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); + SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); + SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); + SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); + seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", + mss->private_hugetlb >> 10, 7); + SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); + SEQ_PUT_DEC(" kB\nSwapPss: ", + mss->swap_pss >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT); + seq_puts(m, " kB\n"); + } if (!rollup_mode) { arch_show_smap(m, vma); show_smap_vma_flags(m, vma); @@ -861,6 +841,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) m_cache_vma(m, vma); return ret; } +#undef SEQ_PUT_DEC static int show_pid_smap(struct seq_file *m, void *v) { diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 1143ef351c58..dc720573fd53 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -258,7 +258,7 @@ static int pstore_decompress(void *in, void *out, static void allocate_buf_for_compression(void) { - if (!zbackend) + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend) return; if (!crypto_has_comp(zbackend->name, 0, 0)) { @@ -287,7 +287,7 @@ static void allocate_buf_for_compression(void) static void free_buf_for_compression(void) { - if (!IS_ERR_OR_NULL(tfm)) + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && !IS_ERR_OR_NULL(tfm)) crypto_free_comp(tfm); kfree(big_oops_buf); big_oops_buf = NULL; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 70057359fbaf..23148c3ed675 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super, if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; - reiserfs_warning(super, + reiserfs_warning(super, "sh-457", "journal_init_dev: Cannot open '%s': %i", jdev_name, result); return result; diff --git a/fs/seq_file.c b/fs/seq_file.c index eea09f6d8830..c6c27f1f9c98 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -6,6 +6,7 @@ * initial implementation -- AV, Oct 2001. */ +#include <linux/cache.h> #include <linux/fs.h> #include <linux/export.h> #include <linux/seq_file.h> @@ -19,6 +20,8 @@ #include <linux/uaccess.h> #include <asm/page.h> +static struct kmem_cache *seq_file_cache __ro_after_init; + static void seq_set_overflow(struct seq_file *m) { m->count = m->size; @@ -26,7 +29,7 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { - return kvmalloc(size, GFP_KERNEL); + return kvmalloc(size, GFP_KERNEL_ACCOUNT); } /** @@ -51,7 +54,7 @@ int seq_open(struct file *file, const struct seq_operations *op) WARN_ON(file->private_data); - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL); if (!p) return -ENOMEM; @@ -366,7 +369,7 @@ int seq_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; kvfree(m->buf); - kfree(m); + kmem_cache_free(seq_file_cache, m); return 0; } EXPORT_SYMBOL(seq_release); @@ -563,7 +566,7 @@ static void single_stop(struct seq_file *p, void *v) int single_open(struct file *file, int (*show)(struct seq_file *, void *), void *data) { - struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL); + struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT); int res = -ENOMEM; if (op) { @@ -625,7 +628,7 @@ void *__seq_open_private(struct file *f, const struct seq_operations *ops, void *private; struct seq_file *seq; - private = kzalloc(psize, GFP_KERNEL); + private = kzalloc(psize, GFP_KERNEL_ACCOUNT); if (private == NULL) goto out; @@ -673,29 +676,37 @@ void seq_puts(struct seq_file *m, const char *s) } EXPORT_SYMBOL(seq_puts); -/* +/** * A helper routine for putting decimal numbers without rich format of printf(). * only 'unsigned long long' is supported. - * This routine will put strlen(delimiter) + number into seq_file. + * @m: seq_file identifying the buffer to which data should be written + * @delimiter: a string which is printed before the number + * @num: the number + * @width: a minimum field width + * + * This routine will put strlen(delimiter) + number into seq_filed. * This routine is very quick when you show lots of numbers. * In usual cases, it will be better to use seq_printf(). It's easier to read. */ -void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, - unsigned long long num) +void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, + unsigned long long num, unsigned int width) { int len; if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ goto overflow; - len = strlen(delimiter); - if (m->count + len >= m->size) - goto overflow; + if (delimiter && delimiter[0]) { + if (delimiter[1] == 0) + seq_putc(m, delimiter[0]); + else + seq_puts(m, delimiter); + } - memcpy(m->buf + m->count, delimiter, len); - m->count += len; + if (!width) + width = 1; - if (m->count + 1 >= m->size) + if (m->count + width >= m->size) goto overflow; if (num < 10) { @@ -703,7 +714,7 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, return; } - len = num_to_str(m->buf + m->count, m->size - m->count, num); + len = num_to_str(m->buf + m->count, m->size - m->count, num, width); if (!len) goto overflow; @@ -713,8 +724,60 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, overflow: seq_set_overflow(m); } + +void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, + unsigned long long num) +{ + return seq_put_decimal_ull_width(m, delimiter, num, 0); +} EXPORT_SYMBOL(seq_put_decimal_ull); +/** + * seq_put_hex_ll - put a number in hexadecimal notation + * @m: seq_file identifying the buffer to which data should be written + * @delimiter: a string which is printed before the number + * @v: the number + * @width: a minimum field width + * + * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v) + * + * This routine is very quick when you show lots of numbers. + * In usual cases, it will be better to use seq_printf(). It's easier to read. + */ +void seq_put_hex_ll(struct seq_file *m, const char *delimiter, + unsigned long long v, unsigned int width) +{ + unsigned int len; + int i; + + if (delimiter && delimiter[0]) { + if (delimiter[1] == 0) + seq_putc(m, delimiter[0]); + else + seq_puts(m, delimiter); + } + + /* If x is 0, the result of __builtin_clzll is undefined */ + if (v == 0) + len = 1; + else + len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4; + + if (len < width) + len = width; + + if (m->count + len > m->size) { + seq_set_overflow(m); + return; + } + + for (i = len - 1; i >= 0; i--) { + m->buf[m->count + i] = hex_asc[0xf & v]; + v = v >> 4; + } + m->count += len; +} + void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) { int len; @@ -722,12 +785,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ goto overflow; - len = strlen(delimiter); - if (m->count + len >= m->size) - goto overflow; - - memcpy(m->buf + m->count, delimiter, len); - m->count += len; + if (delimiter && delimiter[0]) { + if (delimiter[1] == 0) + seq_putc(m, delimiter[0]); + else + seq_puts(m, delimiter); + } if (m->count + 2 >= m->size) goto overflow; @@ -742,7 +805,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num return; } - len = num_to_str(m->buf + m->count, m->size - m->count, num); + len = num_to_str(m->buf + m->count, m->size - m->count, num, 0); if (!len) goto overflow; @@ -782,8 +845,14 @@ EXPORT_SYMBOL(seq_write); void seq_pad(struct seq_file *m, char c) { int size = m->pad_until - m->count; - if (size > 0) - seq_printf(m, "%*s", size, ""); + if (size > 0) { + if (size + m->count > m->size) { + seq_set_overflow(m); + return; + } + memset(m->buf + m->count, ' ', size); + m->count += size; + } if (c) seq_putc(m, c); } @@ -1040,3 +1109,8 @@ seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, return NULL; } EXPORT_SYMBOL(seq_hlist_next_percpu); + +void __init seq_file_init(void) +{ + seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC); +} diff --git a/fs/super.c b/fs/super.c index 672538ca9831..5fa9a8d8d865 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,6 +37,7 @@ #include <linux/user_namespace.h> #include "internal.h" +static int thaw_super_locked(struct super_block *sb); static LIST_HEAD(super_blocks); static DEFINE_SPINLOCK(sb_lock); @@ -574,6 +575,28 @@ void drop_super_exclusive(struct super_block *sb) } EXPORT_SYMBOL(drop_super_exclusive); +static void __iterate_supers(void (*f)(struct super_block *)) +{ + struct super_block *sb, *p = NULL; + + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (hlist_unhashed(&sb->s_instances)) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + + f(sb); + + spin_lock(&sb_lock); + if (p) + __put_super(p); + p = sb; + } + if (p) + __put_super(p); + spin_unlock(&sb_lock); +} /** * iterate_supers - call function for all active superblocks * @f: function to call @@ -881,33 +904,22 @@ cancel_readonly: return retval; } -static void do_emergency_remount(struct work_struct *work) +static void do_emergency_remount_callback(struct super_block *sb) { - struct super_block *sb, *p = NULL; - - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (hlist_unhashed(&sb->s_instances)) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_write(&sb->s_umount); - if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && - !sb_rdonly(sb)) { - /* - * What lock protects sb->s_flags?? - */ - do_remount_sb(sb, SB_RDONLY, NULL, 1); - } - up_write(&sb->s_umount); - spin_lock(&sb_lock); - if (p) - __put_super(p); - p = sb; + down_write(&sb->s_umount); + if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) && + !sb_rdonly(sb)) { + /* + * What lock protects sb->s_flags?? + */ + do_remount_sb(sb, SB_RDONLY, NULL, 1); } - if (p) - __put_super(p); - spin_unlock(&sb_lock); + up_write(&sb->s_umount); +} + +static void do_emergency_remount(struct work_struct *work) +{ + __iterate_supers(do_emergency_remount_callback); kfree(work); printk("Emergency Remount complete\n"); } @@ -923,6 +935,40 @@ void emergency_remount(void) } } +static void do_thaw_all_callback(struct super_block *sb) +{ + down_write(&sb->s_umount); + if (sb->s_root && sb->s_flags & MS_BORN) { + emergency_thaw_bdev(sb); + thaw_super_locked(sb); + } else { + up_write(&sb->s_umount); + } +} + +static void do_thaw_all(struct work_struct *work) +{ + __iterate_supers(do_thaw_all_callback); + kfree(work); + printk(KERN_WARNING "Emergency Thaw complete\n"); +} + +/** + * emergency_thaw_all -- forcibly thaw every frozen filesystem + * + * Used for emergency unfreeze of all filesystems via SysRq + */ +void emergency_thaw_all(void) +{ + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_thaw_all); + schedule_work(work); + } +} + /* * Unnamed block devices are dummy devices used by virtual * filesystems which don't use real block-devices. -- jrs @@ -1492,11 +1538,10 @@ EXPORT_SYMBOL(freeze_super); * * Unlocks the filesystem and marks it writeable again after freeze_super(). */ -int thaw_super(struct super_block *sb) +static int thaw_super_locked(struct super_block *sb) { int error; - down_write(&sb->s_umount); if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) { up_write(&sb->s_umount); return -EINVAL; @@ -1527,4 +1572,10 @@ out: deactivate_locked_super(sb); return 0; } + +int thaw_super(struct super_block *sb) +{ + down_write(&sb->s_umount); + return thaw_super_locked(sb); +} EXPORT_SYMBOL(thaw_super); diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 2dcf3d473fec..9571616b5dda 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -632,7 +632,7 @@ static int scan_for_idx_cb(struct ubifs_info *c, */ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) { - struct ubifs_lprops *lprops; + const struct ubifs_lprops *lprops; struct scan_data data; int err; diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 6c3a1abd0e22..f5a46844340c 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -244,7 +244,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c, /** * lpt_heap_replace - replace lprops in a category heap. * @c: UBIFS file-system description object - * @old_lprops: LEB properties to replace * @new_lprops: LEB properties with which to replace * @cat: LEB category * @@ -254,7 +253,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c, * lprops. This function does that. */ static void lpt_heap_replace(struct ubifs_info *c, - struct ubifs_lprops *old_lprops, struct ubifs_lprops *new_lprops, int cat) { struct ubifs_lpt_heap *heap; @@ -362,7 +360,7 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, case LPROPS_DIRTY: case LPROPS_DIRTY_IDX: case LPROPS_FREE: - lpt_heap_replace(c, old_lprops, new_lprops, cat); + lpt_heap_replace(c, new_lprops, cat); break; case LPROPS_UNCAT: case LPROPS_EMPTY: diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index aab87340d3de..16f03d9929e5 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -175,7 +175,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, int lnum, int offs) { - lnum = lnum; dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); ubifs_assert(offs % c->min_io_size == 0); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index b16ef162344a..6c397a389105 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1737,8 +1737,11 @@ static void ubifs_remount_ro(struct ubifs_info *c) dbg_save_space_info(c); - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); @@ -1804,8 +1807,11 @@ static void ubifs_put_super(struct super_block *sb) int err; /* Synchronize write-buffers */ - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } /* * We are being cleanly unmounted which means the diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 39387bdd225d..4bcc095fe44a 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1947,7 +1947,7 @@ void xfs_alloc_compute_maxlevels( xfs_mount_t *mp) /* file system mount structure */ { - mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr, + mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, (mp->m_sb.sb_agblocks + 1) / 2); } @@ -1959,7 +1959,6 @@ xfs_alloc_compute_maxlevels( */ xfs_extlen_t xfs_alloc_longest_free_extent( - struct xfs_mount *mp, struct xfs_perag *pag, xfs_extlen_t need, xfs_extlen_t reserved) @@ -2038,8 +2037,7 @@ xfs_alloc_space_available( /* do we have enough contiguous free space for the allocation? */ alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; - longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, - reservation); + longest = xfs_alloc_longest_free_extent(pag, min_free, reservation); if (longest < alloc_len) return false; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index a311a2414a6b..cbf789ea5a4e 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -116,9 +116,8 @@ xfs_alloc_allow_busy_reuse(int datatype) unsigned int xfs_alloc_set_aside(struct xfs_mount *mp); unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); -xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, - struct xfs_perag *pag, xfs_extlen_t need, - xfs_extlen_t reserved); +xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_perag *pag, + xfs_extlen_t need, xfs_extlen_t reserved); unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, struct xfs_perag *pag); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 3b03d886df66..6a7c2f03ea11 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3225,7 +3225,7 @@ xfs_bmap_longest_free_extent( } } - longest = xfs_alloc_longest_free_extent(mp, pag, + longest = xfs_alloc_longest_free_extent(pag, xfs_alloc_min_freelist(mp, pag), xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); if (*blen < longest) @@ -5667,7 +5667,6 @@ xfs_bmap_collapse_extents( xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, - xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops) { diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index f3be6416260b..2b766b37096d 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -228,7 +228,7 @@ void xfs_bmap_del_extent_cow(struct xfs_inode *ip, uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, - bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, + bool *done, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops); int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index edc0193358a5..ac7d66427e42 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4531,7 +4531,6 @@ xfs_btree_sblock_verify( */ uint xfs_btree_compute_maxlevels( - struct xfs_mount *mp, uint *limits, unsigned long len) { @@ -4839,7 +4838,6 @@ xfs_btree_query_all( */ xfs_extlen_t xfs_btree_calc_size( - struct xfs_mount *mp, uint *limits, unsigned long long len) { diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 58e30c0975c3..9227159a751e 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -481,10 +481,8 @@ xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); -uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, - unsigned long len); -xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, - unsigned long long len); +uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); +xfs_extlen_t xfs_btree_calc_size(uint *limits, unsigned long long len); /* return codes */ #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0e2cf5f0be1f..de627fa19168 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2406,7 +2406,7 @@ xfs_ialloc_compute_maxlevels( uint inodes; inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; - mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_inobt_mnr, + mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr, inodes); } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index a2dd7f4a2719..367e9a0726e6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -556,7 +556,7 @@ xfs_inobt_max_size( if (mp->m_inobt_mxr[0] == 0) return 0; - return xfs_btree_calc_size(mp, mp->m_inobt_mnr, + return xfs_btree_calc_size(mp->m_inobt_mnr, (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK); } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index bee68c23d612..560e28473024 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -351,7 +351,6 @@ xfs_refcount_merge_center_extents( struct xfs_refcount_irec *center, struct xfs_refcount_irec *right, unsigned long long extlen, - xfs_agblock_t *agbno, xfs_extlen_t *aglen) { int error; @@ -471,7 +470,6 @@ xfs_refcount_merge_right_extent( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, - xfs_agblock_t *agbno, xfs_extlen_t *aglen) { int error; @@ -749,7 +747,7 @@ xfs_refcount_merge_extents( ulen < MAXREFCEXTLEN) { *shape_changed = true; return xfs_refcount_merge_center_extents(cur, &left, &cleft, - &right, ulen, agbno, aglen); + &right, ulen, aglen); } /* Try to merge left and cleft. */ @@ -778,7 +776,7 @@ xfs_refcount_merge_extents( ulen < MAXREFCEXTLEN) { *shape_changed = true; return xfs_refcount_merge_right_extent(cur, &right, &cright, - agbno, aglen); + aglen); } return error; @@ -1356,9 +1354,7 @@ xfs_refcount_adjust_cow_extents( struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, - enum xfs_refc_adjust_op adj, - struct xfs_defer_ops *dfops, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; @@ -1437,8 +1433,7 @@ xfs_refcount_adjust_cow( struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, - enum xfs_refc_adjust_op adj, - struct xfs_defer_ops *dfops) + enum xfs_refc_adjust_op adj) { bool shape_changed; int error; @@ -1465,8 +1460,7 @@ xfs_refcount_adjust_cow( goto out_error; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj, - dfops, NULL); + error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); if (error) goto out_error; @@ -1493,7 +1487,7 @@ __xfs_refcount_cow_alloc( /* Add refcount btree reservation */ return xfs_refcount_adjust_cow(rcur, agbno, aglen, - XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); + XFS_REFCOUNT_ADJUST_COW_ALLOC); } /* @@ -1511,7 +1505,7 @@ __xfs_refcount_cow_free( /* Remove refcount btree reservation */ return xfs_refcount_adjust_cow(rcur, agbno, aglen, - XFS_REFCOUNT_ADJUST_COW_FREE, dfops); + XFS_REFCOUNT_ADJUST_COW_FREE); } /* Record a CoW staging extent in the refcount btree. */ @@ -1568,7 +1562,7 @@ struct xfs_refcount_recovery { /* Stuff an extent on the recovery list. */ STATIC int xfs_refcount_recover_extent( - struct xfs_btree_cur *cur, + struct xfs_btree_cur *cur, union xfs_btree_rec *rec, void *priv) { diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 265fdcefcbae..375abfeb6267 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -373,7 +373,6 @@ xfs_refcountbt_init_cursor( */ int xfs_refcountbt_maxrecs( - struct xfs_mount *mp, int blocklen, bool leaf) { @@ -390,7 +389,7 @@ void xfs_refcountbt_compute_maxlevels( struct xfs_mount *mp) { - mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(mp, + mp->m_refc_maxlevels = xfs_btree_compute_maxlevels( mp->m_refc_mnr, mp->m_sb.sb_agblocks); } @@ -400,7 +399,7 @@ xfs_refcountbt_calc_size( struct xfs_mount *mp, unsigned long long len) { - return xfs_btree_calc_size(mp, mp->m_refc_mnr, len); + return xfs_btree_calc_size(mp->m_refc_mnr, len); } /* diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 9db008b955b7..2bc4694ef146 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -60,8 +60,7 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno, struct xfs_defer_ops *dfops); -extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen, - bool leaf); +extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 79822cf6ebe3..fba8d2718017 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -376,7 +376,6 @@ xfs_rmap_free_check_owner( struct xfs_mount *mp, uint64_t ltoff, struct xfs_rmap_irec *rec, - xfs_fsblock_t bno, xfs_filblks_t len, uint64_t owner, uint64_t offset, @@ -519,7 +518,7 @@ xfs_rmap_unmap( bno + len, out_error); /* Check owner information. */ - error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, len, owner, offset, flags); if (error) goto out_error; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 8b0d0de1cd11..d756e0b84abf 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -499,7 +499,6 @@ xfs_rmapbt_init_cursor( */ int xfs_rmapbt_maxrecs( - struct xfs_mount *mp, int blocklen, int leaf) { @@ -534,7 +533,7 @@ xfs_rmapbt_compute_maxlevels( if (xfs_sb_version_hasreflink(&mp->m_sb)) mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; else - mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp, + mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( mp->m_rmap_mnr, mp->m_sb.sb_agblocks); } @@ -544,7 +543,7 @@ xfs_rmapbt_calc_size( struct xfs_mount *mp, unsigned long long len) { - return xfs_btree_calc_size(mp, mp->m_rmap_mnr, len); + return xfs_btree_calc_size(mp->m_rmap_mnr, len); } /* diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 19c08e933049..d68d96eed7ea 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -55,7 +55,7 @@ struct xfs_mount; struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, xfs_agnumber_t agno); -int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf); +int xfs_rmapbt_maxrecs(int blocklen, int leaf); extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 53433cc024fd..d9b94bd5f689 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -756,15 +756,13 @@ xfs_sb_mount_common( mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; - mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1); + mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0); mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; - mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, - true); - mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, - false); + mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true); + mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false); mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 5f17641f040f..3bccdf73e141 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -734,8 +734,7 @@ xfs_calc_clear_agi_bucket_reservation( * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) */ STATIC uint -xfs_calc_qm_setqlim_reservation( - struct xfs_mount *mp) +xfs_calc_qm_setqlim_reservation(void) { return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); } @@ -772,8 +771,7 @@ xfs_calc_qm_quotaoff_reservation( * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 */ STATIC uint -xfs_calc_qm_quotaoff_end_reservation( - struct xfs_mount *mp) +xfs_calc_qm_quotaoff_end_reservation(void) { return sizeof(struct xfs_qoff_logitem) * 2; } @@ -877,14 +875,14 @@ xfs_trans_resv_calc( * The following transactions are logged in logical format with * a default log count. */ - resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); + resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(); resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp); resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; resp->tr_qm_equotaoff.tr_logres = - xfs_calc_qm_quotaoff_end_reservation(mp); + xfs_calc_qm_quotaoff_end_reservation(); resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT; resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 31f1f10eecd1..0ab824f574ed 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1195,16 +1195,22 @@ xfs_vm_writepages( int ret; xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); - if (dax_mapping(mapping)) - return dax_writeback_mapping_range(mapping, - xfs_find_bdev_for_inode(mapping->host), wbc); - ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); if (wpc.ioend) ret = xfs_submit_ioend(wbc, wpc.ioend, ret); return ret; } +STATIC int +xfs_dax_writepages( + struct address_space *mapping, + struct writeback_control *wbc) +{ + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + return dax_writeback_mapping_range(mapping, + xfs_find_bdev_for_inode(mapping->host), wbc); +} + /* * Called to move a page into cleanable state - and from there * to be released. The page should already be clean. We always @@ -1367,17 +1373,6 @@ out_unlock: return error; } -STATIC ssize_t -xfs_vm_direct_IO( - struct kiocb *iocb, - struct iov_iter *iter) -{ - /* - * We just need the method present so that open/fcntl allow direct I/O. - */ - return -EINVAL; -} - STATIC sector_t xfs_vm_bmap( struct address_space *mapping, @@ -1472,19 +1467,8 @@ xfs_vm_set_page_dirty( newly_dirty = !TestSetPageDirty(page); spin_unlock(&mapping->private_lock); - if (newly_dirty) { - /* sigh - __set_page_dirty() is static, so copy it here, too */ - unsigned long flags; - - spin_lock_irqsave(&mapping->tree_lock, flags); - if (page->mapping) { /* Race with truncate? */ - WARN_ON_ONCE(!PageUptodate(page)); - account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - } - spin_unlock_irqrestore(&mapping->tree_lock, flags); - } + if (newly_dirty) + __set_page_dirty(page, mapping, 1); unlock_page_memcg(page); if (newly_dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); @@ -1500,8 +1484,15 @@ const struct address_space_operations xfs_address_space_operations = { .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .bmap = xfs_vm_bmap, - .direct_IO = xfs_vm_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; + +const struct address_space_operations xfs_dax_aops = { + .writepages = xfs_dax_writepages, + .direct_IO = noop_direct_IO, + .set_page_dirty = noop_set_page_dirty, + .invalidatepage = noop_invalidatepage, +}; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 88c85ea63da0..69346d460dfa 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -54,6 +54,7 @@ struct xfs_ioend { }; extern const struct address_space_operations xfs_address_space_operations; +extern const struct address_space_operations xfs_dax_aops; int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e5fb008d75e8..2203465e63ea 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -53,6 +53,25 @@ xfs_bui_item_free( kmem_zone_free(xfs_bui_zone, buip); } +/* + * Freeing the BUI requires that we remove it from the AIL if it has already + * been placed there. However, the BUI may not yet have been placed in the AIL + * when called by xfs_bui_release() from BUD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the BUI. + */ +void +xfs_bui_release( + struct xfs_bui_log_item *buip) +{ + ASSERT(atomic_read(&buip->bui_refcount) > 0); + if (atomic_dec_and_test(&buip->bui_refcount)) { + xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); + xfs_bui_item_free(buip); + } +} + + STATIC void xfs_bui_item_size( struct xfs_log_item *lip, @@ -142,7 +161,7 @@ xfs_bui_item_unlock( struct xfs_log_item *lip) { if (lip->li_flags & XFS_LI_ABORTED) - xfs_bui_item_free(BUI_ITEM(lip)); + xfs_bui_release(BUI_ITEM(lip)); } /* @@ -206,24 +225,6 @@ xfs_bui_init( return buip; } -/* - * Freeing the BUI requires that we remove it from the AIL if it has already - * been placed there. However, the BUI may not yet have been placed in the AIL - * when called by xfs_bui_release() from BUD processing due to the ordering of - * committed vs unpin operations in bulk insert operations. Hence the reference - * count to ensure only the last caller frees the BUI. - */ -void -xfs_bui_release( - struct xfs_bui_log_item *buip) -{ - ASSERT(atomic_read(&buip->bui_refcount) > 0); - if (atomic_dec_and_test(&buip->bui_refcount)) { - xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); - xfs_bui_item_free(buip); - } -} - static inline struct xfs_bud_log_item *BUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_bud_log_item, bud_item); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 05dee8fdd895..8cd8c412f52d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1326,7 +1326,6 @@ xfs_collapse_file_space( int error; struct xfs_defer_ops dfops; xfs_fsblock_t first_block; - xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); @@ -1361,7 +1360,7 @@ xfs_collapse_file_space( xfs_defer_init(&dfops, &first_block); error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, - &done, stop_fsb, &first_block, &dfops); + &done, &first_block, &dfops); if (error) goto out_bmap_cancel; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ac669a10c62f..55661cbdb51b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1754,7 +1754,6 @@ xfs_buftarg_shrink_count( void xfs_free_buftarg( - struct xfs_mount *mp, struct xfs_buftarg *btp) { unregister_shrinker(&btp->bt_shrinker); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 2f4c91452861..edced162a674 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -388,7 +388,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, struct block_device *, struct dax_device *); -extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); +extern void xfs_free_buftarg(struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index b2cde5426182..7b68e6c9a474 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -50,19 +50,19 @@ xfs_trim_extents( pag = xfs_perag_get(mp, agno); - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); - if (error || !agbp) - goto out_put_perag; - - cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); - /* * Force out the log. This means any transactions that might have freed - * space before we took the AGF buffer lock are now on disk, and the + * space before we take the AGF buffer lock are now on disk, and the * volatile disk cache is flushed. */ xfs_log_force(mp, XFS_LOG_SYNC); + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error || !agbp) + goto out_put_perag; + + cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); + /* * Look up the longest btree in the AGF and start with it. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 64da90655e95..b5b1e567b9f4 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -51,6 +51,24 @@ xfs_efi_item_free( } /* + * Freeing the efi requires that we remove it from the AIL if it has already + * been placed there. However, the EFI may not yet have been placed in the AIL + * when called by xfs_efi_release() from EFD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the EFI. + */ +void +xfs_efi_release( + struct xfs_efi_log_item *efip) +{ + ASSERT(atomic_read(&efip->efi_refcount) > 0); + if (atomic_dec_and_test(&efip->efi_refcount)) { + xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); + xfs_efi_item_free(efip); + } +} + +/* * This returns the number of iovecs needed to log the given efi item. * We only need 1 iovec for an efi item. It just logs the efi_log_format * structure. @@ -151,7 +169,7 @@ xfs_efi_item_unlock( struct xfs_log_item *lip) { if (lip->li_flags & XFS_LI_ABORTED) - xfs_efi_item_free(EFI_ITEM(lip)); + xfs_efi_release(EFI_ITEM(lip)); } /* @@ -279,24 +297,6 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) return -EFSCORRUPTED; } -/* - * Freeing the efi requires that we remove it from the AIL if it has already - * been placed there. However, the EFI may not yet have been placed in the AIL - * when called by xfs_efi_release() from EFD processing due to the ordering of - * committed vs unpin operations in bulk insert operations. Hence the reference - * count to ensure only the last caller frees the EFI. - */ -void -xfs_efi_release( - struct xfs_efi_log_item *efip) -{ - ASSERT(atomic_read(&efip->efi_refcount) > 0); - if (atomic_dec_and_test(&efip->efi_refcount)) { - xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); - xfs_efi_item_free(efip); - } -} - static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_efd_log_item, efd_item); diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 043ca3808ea2..3f8722e51dbe 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -34,7 +34,6 @@ struct xfs_fstrm_item { struct xfs_mru_cache_elem mru; - struct xfs_inode *ip; xfs_agnumber_t ag; /* AG in use for this directory */ }; @@ -122,14 +121,15 @@ xfs_filestream_put_ag( static void xfs_fstrm_free_func( + void *data, struct xfs_mru_cache_elem *mru) { + struct xfs_mount *mp = data; struct xfs_fstrm_item *item = container_of(mru, struct xfs_fstrm_item, mru); - xfs_filestream_put_ag(item->ip->i_mount, item->ag); - - trace_xfs_filestream_free(item->ip, item->ag); + xfs_filestream_put_ag(mp, item->ag); + trace_xfs_filestream_free(mp, mru->key, item->ag); kmem_free(item); } @@ -165,7 +165,7 @@ xfs_filestream_pick_ag( trylock = XFS_ALLOC_FLAG_TRYLOCK; for (nscan = 0; 1; nscan++) { - trace_xfs_filestream_scan(ip, ag); + trace_xfs_filestream_scan(mp, ip->i_ino, ag); pag = xfs_perag_get(mp, ag); @@ -198,7 +198,7 @@ xfs_filestream_pick_ag( goto next_ag; } - longest = xfs_alloc_longest_free_extent(mp, pag, + longest = xfs_alloc_longest_free_extent(pag, xfs_alloc_min_freelist(mp, pag), xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); if (((minlen && longest >= minlen) || @@ -265,7 +265,6 @@ next_ag: goto out_put_ag; item->ag = *agp; - item->ip = ip; err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); if (err) { @@ -333,7 +332,7 @@ xfs_filestream_lookup_ag( ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; xfs_mru_cache_done(mp->m_filestream); - trace_xfs_filestream_lookup(ip, ag); + trace_xfs_filestream_lookup(mp, ip->i_ino, ag); goto out; } @@ -399,7 +398,7 @@ xfs_filestream_new_ag( * Only free the item here so we skip over the old AG earlier. */ if (mru) - xfs_fstrm_free_func(mru); + xfs_fstrm_free_func(mp, mru); IRELE(pip); exit: @@ -426,8 +425,8 @@ xfs_filestream_mount( * timer tunable to within about 10 percent. This requires at least 10 * groups. */ - return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, - 10, xfs_fstrm_free_func); + return xfs_mru_cache_create(&mp->m_filestream, mp, + xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); } void diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3e3aab3888fa..2b70c8b4cee2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -972,10 +972,8 @@ xfs_dir_ialloc( xfs_nlink_t nlink, dev_t rdev, prid_t prid, /* project id */ - xfs_inode_t **ipp, /* pointer to inode; it will be + xfs_inode_t **ipp) /* pointer to inode; it will be locked. */ - int *committed) - { xfs_trans_t *tp; xfs_inode_t *ip; @@ -1050,8 +1048,6 @@ xfs_dir_ialloc( } code = xfs_trans_roll(&tp); - if (committed != NULL) - *committed = 1; /* * Re-attach the quota info that we detached from prev trx. @@ -1088,9 +1084,6 @@ xfs_dir_ialloc( } ASSERT(!ialloc_context && ip); - } else { - if (committed != NULL) - *committed = 0; } *ipp = ip; @@ -1217,8 +1210,7 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip, - NULL); + error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip); if (error) goto out_trans_cancel; @@ -1309,7 +1301,6 @@ xfs_create( int xfs_create_tmpfile( struct xfs_inode *dp, - struct dentry *dentry, umode_t mode, struct xfs_inode **ipp) { @@ -1351,7 +1342,7 @@ xfs_create_tmpfile( if (error) goto out_trans_cancel; - error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip); if (error) goto out_trans_cancel; @@ -1611,13 +1602,15 @@ xfs_itruncate_extents( goto out; } - /* Remove all pending CoW reservations. */ - error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, - last_block, true); - if (error) - goto out; + if (whichfork == XFS_DATA_FORK) { + /* Remove all pending CoW reservations. */ + error = xfs_reflink_cancel_cow_blocks(ip, &tp, + first_unmap_block, last_block, true); + if (error) + goto out; - xfs_itruncate_clear_reflink_flags(ip); + xfs_itruncate_clear_reflink_flags(ip); + } /* * Always re-log the inode so that our permanent transaction can keep @@ -2903,7 +2896,7 @@ xfs_rename_alloc_whiteout( struct xfs_inode *tmpfile; int error; - error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile); + error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile); if (error) return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 132d8aa2afc4..1eebc53df7d7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -393,8 +393,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode, dev_t rdev, struct xfs_inode **ipp); -int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, - umode_t mode, struct xfs_inode **ipp); +int xfs_create_tmpfile(struct xfs_inode *dp, umode_t mode, + struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, @@ -431,7 +431,7 @@ xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, xfs_nlink_t, dev_t, prid_t, - struct xfs_inode **, int *); + struct xfs_inode **); /* from xfs_file.c */ enum xfs_prealloc_flags { diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e0307fbff911..a3ed3c811dfa 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -177,7 +177,7 @@ xfs_generic_create( if (!tmpfile) { error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); } else { - error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); + error = xfs_create_tmpfile(XFS_I(dir), mode, &ip); } if (unlikely(error)) goto out_free_acl; @@ -1285,7 +1285,10 @@ xfs_setup_iops( case S_IFREG: inode->i_op = &xfs_inode_operations; inode->i_fop = &xfs_file_operations; - inode->i_mapping->a_ops = &xfs_address_space_operations; + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &xfs_dax_aops; + else + inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b9c9c848146b..2fcd9ed5d075 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -560,7 +560,6 @@ xfs_log_done( */ int xfs_log_notify( - struct xfs_mount *mp, struct xlog_in_core *iclog, xfs_log_callback_t *cb) { diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 7e2d62922a16..fa8ad31d587f 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -141,8 +141,7 @@ int xfs_log_mount_cancel(struct xfs_mount *); xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); -int xfs_log_notify(struct xfs_mount *mp, - struct xlog_in_core *iclog, +int xfs_log_notify(struct xlog_in_core *iclog, struct xfs_log_callback *callback_entry); int xfs_log_release_iclog(struct xfs_mount *mp, struct xlog_in_core *iclog); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index cb376ac8a595..4668403b1741 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -848,7 +848,7 @@ restart: /* attach all the transactions w/ busy extents to iclog */ ctx->log_cb.cb_func = xlog_cil_committed; ctx->log_cb.cb_arg = ctx; - error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb); + error = xfs_log_notify(commit_iclog, &ctx->log_cb); if (error) goto out_abort; diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index f8a674d7f092..70eea7ae2876 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -112,6 +112,7 @@ struct xfs_mru_cache { xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ struct delayed_work work; /* Workqueue data for reaping. */ unsigned int queued; /* work has been queued */ + void *data; }; static struct workqueue_struct *xfs_mru_reap_wq; @@ -259,7 +260,7 @@ _xfs_mru_cache_clear_reap_list( list_for_each_entry_safe(elem, next, &tmp, list_node) { list_del_init(&elem->list_node); - mru->free_func(elem); + mru->free_func(mru->data, elem); } spin_lock(&mru->lock); @@ -326,6 +327,7 @@ xfs_mru_cache_uninit(void) int xfs_mru_cache_create( struct xfs_mru_cache **mrup, + void *data, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func) @@ -369,7 +371,7 @@ xfs_mru_cache_create( mru->grp_time = grp_time; mru->free_func = free_func; - + mru->data = data; *mrup = mru; exit: @@ -492,7 +494,7 @@ xfs_mru_cache_delete( elem = xfs_mru_cache_remove(mru, key); if (elem) - mru->free_func(elem); + mru->free_func(mru->data, elem); } /* diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index fb5245ba5ff7..b3f3fbdfcc47 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -26,13 +26,13 @@ struct xfs_mru_cache_elem { }; /* Function pointer type for callback to free a client's data pointer. */ -typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem); +typedef void (*xfs_mru_cache_free_func_t)(void *, struct xfs_mru_cache_elem *); int xfs_mru_cache_init(void); void xfs_mru_cache_uninit(void); -int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, - unsigned int grp_count, - xfs_mru_cache_free_func_t free_func); +int xfs_mru_cache_create(struct xfs_mru_cache **mrup, void *data, + unsigned int lifetime_ms, unsigned int grp_count, + xfs_mru_cache_free_func_t free_func); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, struct xfs_mru_cache_elem *elem); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 5b848f4b637f..ec39ae274c78 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -748,7 +748,6 @@ xfs_qm_qino_alloc( { xfs_trans_t *tp; int error; - int committed; bool need_alloc = true; *ip = NULL; @@ -788,8 +787,7 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip, - &committed); + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip); if (error) { xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7a39f40645f7..15c9393dd7a7 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -52,6 +52,25 @@ xfs_cui_item_free( kmem_zone_free(xfs_cui_zone, cuip); } +/* + * Freeing the CUI requires that we remove it from the AIL if it has already + * been placed there. However, the CUI may not yet have been placed in the AIL + * when called by xfs_cui_release() from CUD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the CUI. + */ +void +xfs_cui_release( + struct xfs_cui_log_item *cuip) +{ + ASSERT(atomic_read(&cuip->cui_refcount) > 0); + if (atomic_dec_and_test(&cuip->cui_refcount)) { + xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); + xfs_cui_item_free(cuip); + } +} + + STATIC void xfs_cui_item_size( struct xfs_log_item *lip, @@ -141,7 +160,7 @@ xfs_cui_item_unlock( struct xfs_log_item *lip) { if (lip->li_flags & XFS_LI_ABORTED) - xfs_cui_item_free(CUI_ITEM(lip)); + xfs_cui_release(CUI_ITEM(lip)); } /* @@ -211,24 +230,6 @@ xfs_cui_init( return cuip; } -/* - * Freeing the CUI requires that we remove it from the AIL if it has already - * been placed there. However, the CUI may not yet have been placed in the AIL - * when called by xfs_cui_release() from CUD processing due to the ordering of - * committed vs unpin operations in bulk insert operations. Hence the reference - * count to ensure only the last caller frees the CUI. - */ -void -xfs_cui_release( - struct xfs_cui_log_item *cuip) -{ - ASSERT(atomic_read(&cuip->cui_refcount) > 0); - if (atomic_dec_and_test(&cuip->cui_refcount)) { - xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); - xfs_cui_item_free(cuip); - } -} - static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_cud_log_item, cud_item); diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 49d3124863a8..06a07846c9b3 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -52,6 +52,24 @@ xfs_rui_item_free( kmem_zone_free(xfs_rui_zone, ruip); } +/* + * Freeing the RUI requires that we remove it from the AIL if it has already + * been placed there. However, the RUI may not yet have been placed in the AIL + * when called by xfs_rui_release() from RUD processing due to the ordering of + * committed vs unpin operations in bulk insert operations. Hence the reference + * count to ensure only the last caller frees the RUI. + */ +void +xfs_rui_release( + struct xfs_rui_log_item *ruip) +{ + ASSERT(atomic_read(&ruip->rui_refcount) > 0); + if (atomic_dec_and_test(&ruip->rui_refcount)) { + xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); + xfs_rui_item_free(ruip); + } +} + STATIC void xfs_rui_item_size( struct xfs_log_item *lip, @@ -141,7 +159,7 @@ xfs_rui_item_unlock( struct xfs_log_item *lip) { if (lip->li_flags & XFS_LI_ABORTED) - xfs_rui_item_free(RUI_ITEM(lip)); + xfs_rui_release(RUI_ITEM(lip)); } /* @@ -233,24 +251,6 @@ xfs_rui_copy_format( return 0; } -/* - * Freeing the RUI requires that we remove it from the AIL if it has already - * been placed there. However, the RUI may not yet have been placed in the AIL - * when called by xfs_rui_release() from RUD processing due to the ordering of - * committed vs unpin operations in bulk insert operations. Hence the reference - * count to ensure only the last caller frees the RUI. - */ -void -xfs_rui_release( - struct xfs_rui_log_item *ruip) -{ - ASSERT(atomic_read(&ruip->rui_refcount) > 0); - if (atomic_dec_and_test(&ruip->rui_refcount)) { - xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); - xfs_rui_item_free(ruip); - } -} - static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_rud_log_item, rud_item); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 612c1d5348b3..d71424052917 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -722,7 +722,7 @@ xfs_close_devices( struct block_device *logdev = mp->m_logdev_targp->bt_bdev; struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev; - xfs_free_buftarg(mp, mp->m_logdev_targp); + xfs_free_buftarg(mp->m_logdev_targp); xfs_blkdev_put(logdev); fs_put_dax(dax_logdev); } @@ -730,11 +730,11 @@ xfs_close_devices( struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev; - xfs_free_buftarg(mp, mp->m_rtdev_targp); + xfs_free_buftarg(mp->m_rtdev_targp); xfs_blkdev_put(rtdev); fs_put_dax(dax_rtdev); } - xfs_free_buftarg(mp, mp->m_ddev_targp); + xfs_free_buftarg(mp->m_ddev_targp); fs_put_dax(dax_ddev); } @@ -808,9 +808,9 @@ xfs_open_devices( out_free_rtdev_targ: if (mp->m_rtdev_targp) - xfs_free_buftarg(mp, mp->m_rtdev_targp); + xfs_free_buftarg(mp->m_rtdev_targp); out_free_ddev_targ: - xfs_free_buftarg(mp, mp->m_ddev_targp); + xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: xfs_blkdev_put(rtdev); fs_put_dax(dax_rtdev); @@ -1247,7 +1247,6 @@ xfs_quiesce_attr( STATIC int xfs_test_remount_options( struct super_block *sb, - struct xfs_mount *mp, char *options) { int error = 0; @@ -1278,7 +1277,7 @@ xfs_fs_remount( int error; /* First, check for complete junk; i.e. invalid options */ - error = xfs_test_remount_options(sb, mp, options); + error = xfs_test_remount_options(sb, options); if (error) return error; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 2e9e793a8f9d..5b66ac12913c 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -264,7 +264,7 @@ xfs_symlink( * Allocate an inode for the symlink. */ error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, - prid, &ip, NULL); + prid, &ip); if (error) goto out_trans_cancel; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a982c0b623d0..8955254b900e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -506,8 +506,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); DECLARE_EVENT_CLASS(xfs_filestream_class, - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), - TP_ARGS(ip, agno), + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), + TP_ARGS(mp, ino, agno), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) @@ -515,10 +515,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, __field(int, streams) ), TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; + __entry->dev = mp->m_super->s_dev; + __entry->ino = ino; __entry->agno = agno; - __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); + __entry->streams = xfs_filestream_peek_ag(mp, agno); ), TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -528,8 +528,8 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, ) #define DEFINE_FILESTREAM_EVENT(name) \ DEFINE_EVENT(xfs_filestream_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \ - TP_ARGS(ip, agno)) + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \ + TP_ARGS(mp, ino, agno)) DEFINE_FILESTREAM_EVENT(xfs_filestream_free); DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); |