diff options
Diffstat (limited to 'fs')
37 files changed, 6444 insertions, 5277 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 641148208e90..45b7fc405fa6 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -7,6 +7,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o kafs-objs := \ $(afs-cache-y) \ + addr_list.o \ callback.o \ cell.o \ cmservice.o \ @@ -19,14 +20,14 @@ kafs-objs := \ misc.o \ mntpt.o \ proc.o \ + rotate.o \ rxrpc.o \ security.o \ server.o \ + server_list.o \ super.o \ netdevices.o \ vlclient.o \ - vlocation.o \ - vnode.o \ volume.o \ write.o \ xattr.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c new file mode 100644 index 000000000000..a537368ba0db --- /dev/null +++ b/fs/afs/addr_list.c @@ -0,0 +1,381 @@ +/* Server address list management + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/ctype.h> +#include <linux/dns_resolver.h> +#include <linux/inet.h> +#include <keys/rxrpc-type.h> +#include "internal.h" +#include "afs_fs.h" + +//#define AFS_MAX_ADDRESSES +// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / +// sizeof(struct sockaddr_rxrpc))) +#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) + +/* + * Release an address list. + */ +void afs_put_addrlist(struct afs_addr_list *alist) +{ + if (alist && refcount_dec_and_test(&alist->usage)) + call_rcu(&alist->rcu, (rcu_callback_t)kfree); +} + +/* + * Allocate an address list. + */ +struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, + unsigned short service, + unsigned short port) +{ + struct afs_addr_list *alist; + unsigned int i; + + _enter("%u,%u,%u", nr, service, port); + + alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr, + GFP_KERNEL); + if (!alist) + return NULL; + + refcount_set(&alist->usage, 1); + + for (i = 0; i < nr; i++) { + struct sockaddr_rxrpc *srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->srx_service = service; + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(port); + } + + return alist; +} + +/* + * Parse a text string consisting of delimited addresses. + */ +struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, + char delim, + unsigned short service, + unsigned short port) +{ + struct afs_addr_list *alist; + const char *p, *end = text + len; + unsigned int nr = 0; + + _enter("%*.*s,%c", (int)len, (int)len, text, delim); + + if (!len) + return ERR_PTR(-EDESTADDRREQ); + + if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) + delim = ','; + + /* Count the addresses */ + p = text; + do { + if (!*p) + return ERR_PTR(-EINVAL); + if (*p == delim) + continue; + nr++; + if (*p == '[') { + p++; + if (p == end) + return ERR_PTR(-EINVAL); + p = memchr(p, ']', end - p); + if (!p) + return ERR_PTR(-EINVAL); + p++; + if (p >= end) + break; + } + + p = memchr(p, delim, end - p); + if (!p) + break; + p++; + } while (p < end); + + _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); + if (nr > AFS_MAX_ADDRESSES) + nr = AFS_MAX_ADDRESSES; + + alist = afs_alloc_addrlist(nr, service, port); + if (!alist) + return ERR_PTR(-ENOMEM); + + /* Extract the addresses */ + p = text; + do { + struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; + char tdelim = delim; + + if (*p == delim) { + p++; + continue; + } + + if (*p == '[') { + p++; + tdelim = ']'; + } + + if (in4_pton(p, end - p, + (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], + tdelim, &p)) { + srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + } else if (in6_pton(p, end - p, + srx->transport.sin6.sin6_addr.s6_addr, + tdelim, &p)) { + /* Nothing to do */ + } else { + goto bad_address; + } + + if (tdelim == ']') { + if (p == end || *p != ']') + goto bad_address; + p++; + } + + if (p < end) { + if (*p == '+') { + /* Port number specification "+1234" */ + unsigned int xport = 0; + p++; + if (p >= end || !isdigit(*p)) + goto bad_address; + do { + xport *= 10; + xport += *p - '0'; + if (xport > 65535) + goto bad_address; + p++; + } while (p < end && isdigit(*p)); + srx->transport.sin6.sin6_port = htons(xport); + } else if (*p == delim) { + p++; + } else { + goto bad_address; + } + } + + alist->nr_addrs++; + } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES); + + _leave(" = [nr %u]", alist->nr_addrs); + return alist; + +bad_address: + kfree(alist); + return ERR_PTR(-EINVAL); +} + +/* + * Compare old and new address lists to see if there's been any change. + * - How to do this in better than O(Nlog(N)) time? + * - We don't really want to sort the address list, but would rather take the + * list as we got it so as not to undo record rotation by the DNS server. + */ +#if 0 +static int afs_cmp_addr_list(const struct afs_addr_list *a1, + const struct afs_addr_list *a2) +{ +} +#endif + +/* + * Perform a DNS query for VL servers and build a up an address list. + */ +struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) +{ + struct afs_addr_list *alist; + char *vllist = NULL; + int ret; + + _enter("%s", cell->name); + + ret = dns_query("afsdb", cell->name, cell->name_len, + "ipv4", &vllist, _expiry); + if (ret < 0) + return ERR_PTR(ret); + + alist = afs_parse_text_addrs(vllist, strlen(vllist), ',', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(alist)) { + kfree(vllist); + if (alist != ERR_PTR(-ENOMEM)) + pr_err("Failed to parse DNS data\n"); + return alist; + } + + kfree(vllist); + return alist; +} + +/* + * Merge an IPv4 entry into a fileserver address list. + */ +void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) +{ + struct sockaddr_in6 *a; + __be16 xport = htons(port); + int i; + + for (i = 0; i < alist->nr_ipv4; i++) { + a = &alist->addrs[i].transport.sin6; + if (xdr == a->sin6_addr.s6_addr32[3] && + xport == a->sin6_port) + return; + if (xdr == a->sin6_addr.s6_addr32[3] && + xport < a->sin6_port) + break; + if (xdr < a->sin6_addr.s6_addr32[3]) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + a = &alist->addrs[i].transport.sin6; + a->sin6_port = xport; + a->sin6_addr.s6_addr32[0] = 0; + a->sin6_addr.s6_addr32[1] = 0; + a->sin6_addr.s6_addr32[2] = htonl(0xffff); + a->sin6_addr.s6_addr32[3] = xdr; + alist->nr_ipv4++; + alist->nr_addrs++; +} + +/* + * Merge an IPv6 entry into a fileserver address list. + */ +void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) +{ + struct sockaddr_in6 *a; + __be16 xport = htons(port); + int i, diff; + + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + a = &alist->addrs[i].transport.sin6; + diff = memcmp(xdr, &a->sin6_addr, 16); + if (diff == 0 && + xport == a->sin6_port) + return; + if (diff == 0 && + xport < a->sin6_port) + break; + if (diff < 0) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + a = &alist->addrs[i].transport.sin6; + a->sin6_port = xport; + a->sin6_addr.s6_addr32[0] = xdr[0]; + a->sin6_addr.s6_addr32[1] = xdr[1]; + a->sin6_addr.s6_addr32[2] = xdr[2]; + a->sin6_addr.s6_addr32[3] = xdr[3]; + alist->nr_addrs++; +} + +/* + * Get an address to try. + */ +bool afs_iterate_addresses(struct afs_addr_cursor *ac) +{ + _enter("%hu+%hd", ac->start, (short)ac->index); + + if (!ac->alist) + return false; + + if (ac->begun) { + ac->index++; + if (ac->index == ac->alist->nr_addrs) + ac->index = 0; + + if (ac->index == ac->start) { + ac->error = -EDESTADDRREQ; + return false; + } + } + + ac->begun = true; + ac->responded = false; + ac->addr = &ac->alist->addrs[ac->index]; + return true; +} + +/* + * Release an address list cursor. + */ +int afs_end_cursor(struct afs_addr_cursor *ac) +{ + if (ac->responded && ac->index != ac->start) + WRITE_ONCE(ac->alist->index, ac->index); + + afs_put_addrlist(ac->alist); + ac->alist = NULL; + return ac->error; +} + +/* + * Set the address cursor for iterating over VL servers. + */ +int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell) +{ + struct afs_addr_list *alist; + int ret; + + if (!rcu_access_pointer(cell->vl_addrs)) { + ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, + TASK_INTERRUPTIBLE); + if (ret < 0) + return ret; + + if (!rcu_access_pointer(cell->vl_addrs) && + ktime_get_real_seconds() < cell->dns_expiry) + return cell->error; + } + + read_lock(&cell->vl_addrs_lock); + alist = rcu_dereference_protected(cell->vl_addrs, + lockdep_is_held(&cell->vl_addrs_lock)); + if (alist->nr_addrs > 0) + afs_get_addrlist(alist); + else + alist = NULL; + read_unlock(&cell->vl_addrs_lock); + + if (!alist) + return -EDESTADDRREQ; + + ac->alist = alist; + ac->addr = NULL; + ac->start = READ_ONCE(alist->index); + ac->index = ac->start; + ac->error = 0; + ac->begun = false; + return 0; +} diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 3c462ff6db63..b94d0edc2b78 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -14,11 +14,14 @@ #include <linux/in.h> -#define AFS_MAXCELLNAME 64 /* maximum length of a cell name */ -#define AFS_MAXVOLNAME 64 /* maximum length of a volume name */ -#define AFSNAMEMAX 256 /* maximum length of a filename plus NUL */ -#define AFSPATHMAX 1024 /* maximum length of a pathname plus NUL */ -#define AFSOPAQUEMAX 1024 /* maximum length of an opaque field */ +#define AFS_MAXCELLNAME 64 /* Maximum length of a cell name */ +#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ +#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */ +#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ +#define AFS_MAXTYPES 3 /* Maximum number of volume types */ +#define AFSNAMEMAX 256 /* Maximum length of a filename plus NUL */ +#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ +#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ typedef unsigned afs_volid_t; typedef unsigned afs_vnodeid_t; @@ -72,6 +75,15 @@ struct afs_callback { #define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ +struct afs_uuid { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ + __s8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ + __s8 clock_seq_low; /* clock seq low */ + __s8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + /* * AFS volume information */ @@ -124,7 +136,6 @@ struct afs_file_status { afs_access_t caller_access; /* access rights for authenticated caller */ afs_access_t anon_access; /* access rights for unauthenticated caller */ umode_t mode; /* UNIX mode */ - struct afs_fid parent; /* parent dir ID for non-dirs only */ time_t mtime_client; /* last time client changed data */ time_t mtime_server; /* last time server changed data */ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ @@ -167,4 +178,16 @@ struct afs_volume_status { #define AFS_BLOCK_SIZE 1024 +/* + * XDR encoding of UUID in AFS. + */ +struct afs_uuid__xdr { + __be32 time_low; + __be32 time_mid; + __be32 time_hi_and_version; + __be32 clock_seq_hi_and_reserved; + __be32 clock_seq_low; + __be32 node[6]; +}; + #endif /* AFS_H */ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index eb647323d8f0..d47b6d01e4c0 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -37,9 +37,12 @@ enum AFS_FS_Operations { FSLOOKUP = 161, /* AFS lookup file in directory */ FSFETCHDATA64 = 65537, /* AFS Fetch file data */ FSSTOREDATA64 = 65538, /* AFS Store file data */ + FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ + FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */ }; enum AFS_FS_Errors { + VRESTARTING = -100, /* Server is restarting */ VSALVAGE = 101, /* volume needs salvaging */ VNOVNODE = 102, /* no such file/dir (vnode) */ VNOVOL = 103, /* no such volume or volume unavailable */ @@ -51,6 +54,9 @@ enum AFS_FS_Errors { VOVERQUOTA = 109, /* volume's maximum quota exceeded */ VBUSY = 110, /* volume is temporarily unavailable */ VMOVED = 111, /* volume moved to new server - ask this FS where */ + VIO = 112, /* I/O error in volume */ + VSALVAGING = 113, /* Volume is being salvaged */ + VRESTRICTED = 120, /* Volume is restricted from using */ }; #endif /* AFS_FS_H */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 800f607ffaf5..e3c4688f573b 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -16,11 +16,17 @@ #define AFS_VL_PORT 7003 /* volume location service port */ #define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ +#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */ enum AFSVL_Operations { - VLGETENTRYBYID = 503, /* AFS Get Cache Entry By ID operation ID */ - VLGETENTRYBYNAME = 504, /* AFS Get Cache Entry By Name operation ID */ - VLPROBE = 514, /* AFS Probe Volume Location Service operation ID */ + VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */ + VLGETENTRYBYNAME = 504, /* AFS Get VLDB entry by name */ + VLPROBE = 514, /* AFS probe VL service */ + VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */ + VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */ + VLGETADDRSU = 533, /* AFS Get addrs for fileserver */ + YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */ + VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */ }; enum AFSVL_Errors { @@ -54,6 +60,19 @@ enum AFSVL_Errors { AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ }; +enum { + YFS_SERVER_INDEX = 0, + YFS_SERVER_UUID = 1, + YFS_SERVER_ENDPOINT = 2, +}; + +enum { + YFS_ENDPOINT_IPV4 = 0, + YFS_ENDPOINT_IPV6 = 1, +}; + +#define YFS_MAXENDPOINTS 16 + /* * maps to "struct vldbentry" in vvl-spec.pdf */ @@ -74,11 +93,57 @@ struct afs_vldbentry { struct in_addr addr; /* server address */ unsigned partition; /* partition ID on this server */ unsigned flags; /* server specific flags */ -#define AFS_VLSF_NEWREPSITE 0x0001 /* unused */ +#define AFS_VLSF_NEWREPSITE 0x0001 /* Ignore all 'non-new' servers */ #define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */ #define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */ #define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */ +#define AFS_VLSF_UUID 0x0010 /* This server is referred to by its UUID */ +#define AFS_VLSF_DONTUSE 0x0020 /* This server ref should be ignored */ } servers[8]; }; +#define AFS_VLDB_MAXNAMELEN 65 + + +struct afs_ListAddrByAttributes__xdr { + __be32 Mask; +#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */ +#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */ +#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */ + __be32 ipaddr; + __be32 index; + __be32 spare; + struct afs_uuid__xdr uuid; +}; + +struct afs_uvldbentry__xdr { + __be32 name[AFS_VLDB_MAXNAMELEN]; + __be32 nServers; + struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS]; + __be32 serverUnique[AFS_NMAXNSERVERS]; + __be32 serverPartition[AFS_NMAXNSERVERS]; + __be32 serverFlags[AFS_NMAXNSERVERS]; + __be32 volumeId[AFS_MAXTYPES]; + __be32 cloneId; + __be32 flags; + __be32 spares1; + __be32 spares2; + __be32 spares3; + __be32 spares4; + __be32 spares5; + __be32 spares6; + __be32 spares7; + __be32 spares8; + __be32 spares9; +}; + +struct afs_address_list { + refcount_t usage; + unsigned int version; + unsigned int nr_addrs; + struct sockaddr_rxrpc addrs[]; +}; + +extern void afs_put_address_list(struct afs_address_list *alist); + #endif /* AFS_VL_H */ diff --git a/fs/afs/cache.c b/fs/afs/cache.c index 1fe855191261..f62ff71d28c9 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -14,19 +14,6 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, void *buffer, uint16_t buflen); -static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen); - -static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t buflen); -static enum fscache_checkaux afs_vlocation_cache_check_aux( - void *cookie_netfs_data, const void *buffer, uint16_t buflen); - static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, void *buffer, uint16_t buflen); @@ -42,23 +29,13 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, struct fscache_netfs afs_cache_netfs = { .name = "afs", - .version = 0, + .version = 1, }; struct fscache_cookie_def afs_cell_cache_index_def = { .name = "AFS.cell", .type = FSCACHE_COOKIE_TYPE_INDEX, .get_key = afs_cell_cache_get_key, - .get_aux = afs_cell_cache_get_aux, - .check_aux = afs_cell_cache_check_aux, -}; - -struct fscache_cookie_def afs_vlocation_cache_index_def = { - .name = "AFS.vldb", - .type = FSCACHE_COOKIE_TYPE_INDEX, - .get_key = afs_vlocation_cache_get_key, - .get_aux = afs_vlocation_cache_get_aux, - .check_aux = afs_vlocation_cache_check_aux, }; struct fscache_cookie_def afs_volume_cache_index_def = { @@ -95,150 +72,26 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, return klen; } -/* - * provide new auxiliary cache data - */ -static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_cell *cell = cookie_netfs_data; - uint16_t dlen; - - _enter("%p,%p,%u", cell, buffer, bufmax); - - dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); - dlen = min(dlen, bufmax); - dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); - - memcpy(buffer, cell->vl_addrs, dlen); - return dlen; -} - -/* - * check that the auxiliary data indicates that the entry is still valid - */ -static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen) -{ - _leave(" = OKAY"); - return FSCACHE_CHECKAUX_OKAY; -} - -/*****************************************************************************/ -/* - * set the key for the index entry - */ -static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_vlocation *vlocation = cookie_netfs_data; - uint16_t klen; - - _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); - - klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); - if (klen > bufmax) - return 0; - - memcpy(buffer, vlocation->vldb.name, klen); - - _leave(" = %u", klen); - return klen; -} - -/* - * provide new auxiliary cache data - */ -static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) -{ - const struct afs_vlocation *vlocation = cookie_netfs_data; - uint16_t dlen; - - _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); - - dlen = sizeof(struct afs_cache_vlocation); - dlen -= offsetof(struct afs_cache_vlocation, nservers); - if (dlen > bufmax) - return 0; - - memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); - - _leave(" = %u", dlen); - return dlen; -} - -/* - * check that the auxiliary data indicates that the entry is still valid - */ -static -enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen) -{ - const struct afs_cache_vlocation *cvldb; - struct afs_vlocation *vlocation = cookie_netfs_data; - uint16_t dlen; - - _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); - - /* check the size of the data is what we're expecting */ - dlen = sizeof(struct afs_cache_vlocation); - dlen -= offsetof(struct afs_cache_vlocation, nservers); - if (dlen != buflen) - return FSCACHE_CHECKAUX_OBSOLETE; - - cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); - - /* if what's on disk is more valid than what's in memory, then use the - * VL record from the cache */ - if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { - memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); - vlocation->valid = 1; - _leave(" = SUCCESS [c->m]"); - return FSCACHE_CHECKAUX_OKAY; - } - - /* need to update the cache if the cached info differs */ - if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { - /* delete if the volume IDs for this name differ */ - if (memcmp(&vlocation->vldb.vid, &cvldb->vid, - sizeof(cvldb->vid)) != 0 - ) { - _leave(" = OBSOLETE"); - return FSCACHE_CHECKAUX_OBSOLETE; - } - - _leave(" = UPDATE"); - return FSCACHE_CHECKAUX_NEEDS_UPDATE; - } - - _leave(" = OKAY"); - return FSCACHE_CHECKAUX_OKAY; -} - /*****************************************************************************/ /* * set the key for the volume index entry */ static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, - void *buffer, uint16_t bufmax) + void *buffer, uint16_t bufmax) { const struct afs_volume *volume = cookie_netfs_data; - uint16_t klen; + struct { + u64 volid; + } __packed key; _enter("{%u},%p,%u", volume->type, buffer, bufmax); - klen = sizeof(volume->type); - if (klen > bufmax) + if (bufmax < sizeof(key)) return 0; - memcpy(buffer, &volume->type, sizeof(volume->type)); - - _leave(" = %u", klen); - return klen; - + key.volid = volume->vid; + memcpy(buffer, &key, sizeof(key)); + return sizeof(key); } /*****************************************************************************/ @@ -249,20 +102,25 @@ static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) { const struct afs_vnode *vnode = cookie_netfs_data; - uint16_t klen; + struct { + u32 vnode_id[3]; + } __packed key; _enter("{%x,%x,%llx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, bufmax); - klen = sizeof(vnode->fid.vnode); - if (klen > bufmax) - return 0; + /* Allow for a 96-bit key */ + memset(&key, 0, sizeof(key)); + key.vnode_id[0] = vnode->fid.vnode; + key.vnode_id[1] = 0; + key.vnode_id[2] = 0; - memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); + if (sizeof(key) > bufmax) + return 0; - _leave(" = %u", klen); - return klen; + memcpy(buffer, &key, sizeof(key)); + return sizeof(key); } /* @@ -280,6 +138,11 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, *size = vnode->status.size; } +struct afs_vnode_cache_aux { + u64 data_version; + u32 fid_unique; +} __packed; + /* * provide new auxiliary cache data */ @@ -287,23 +150,21 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) { const struct afs_vnode *vnode = cookie_netfs_data; - uint16_t dlen; + struct afs_vnode_cache_aux aux; _enter("{%x,%x,%Lx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, bufmax); - dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); - if (dlen > bufmax) - return 0; + memset(&aux, 0, sizeof(aux)); + aux.data_version = vnode->status.data_version; + aux.fid_unique = vnode->fid.unique; - memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); - buffer += sizeof(vnode->fid.unique); - memcpy(buffer, &vnode->status.data_version, - sizeof(vnode->status.data_version)); + if (bufmax < sizeof(aux)) + return 0; - _leave(" = %u", dlen); - return dlen; + memcpy(buffer, &aux, sizeof(aux)); + return sizeof(aux); } /* @@ -314,43 +175,29 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, uint16_t buflen) { struct afs_vnode *vnode = cookie_netfs_data; - uint16_t dlen; + struct afs_vnode_cache_aux aux; _enter("{%x,%x,%llx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, buflen); + memcpy(&aux, buffer, sizeof(aux)); + /* check the size of the data is what we're expecting */ - dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); - if (dlen != buflen) { - _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); + if (buflen != sizeof(aux)) { + _leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux)); return FSCACHE_CHECKAUX_OBSOLETE; } - if (memcmp(buffer, - &vnode->fid.unique, - sizeof(vnode->fid.unique) - ) != 0) { - unsigned unique; - - memcpy(&unique, buffer, sizeof(unique)); - + if (vnode->fid.unique != aux.fid_unique) { _leave(" = OBSOLETE [uniq %x != %x]", - unique, vnode->fid.unique); + aux.fid_unique, vnode->fid.unique); return FSCACHE_CHECKAUX_OBSOLETE; } - if (memcmp(buffer + sizeof(vnode->fid.unique), - &vnode->status.data_version, - sizeof(vnode->status.data_version) - ) != 0) { - afs_dataversion_t version; - - memcpy(&version, buffer + sizeof(vnode->fid.unique), - sizeof(version)); - + if (vnode->status.data_version != aux.data_version) { _leave(" = OBSOLETE [vers %llx != %llx]", - version, vnode->status.data_version); + aux.data_version, vnode->status.data_version); return FSCACHE_CHECKAUX_OBSOLETE; } diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 25d404d22cae..f4291b576054 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -20,118 +20,151 @@ #include <linux/sched.h> #include "internal.h" -#if 0 -unsigned afs_vnode_update_timeout = 10; -#endif /* 0 */ - -#define afs_breakring_space(server) \ - CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail, \ - ARRAY_SIZE((server)->cb_break)) - -//static void afs_callback_updater(struct work_struct *); - -static struct workqueue_struct *afs_callback_update_worker; - /* - * allow the fileserver to request callback state (re-)initialisation + * Set up an interest-in-callbacks record for a volume on a server and + * register it with the server. + * - Called with volume->server_sem held. */ -void afs_init_callback_state(struct afs_server *server) +int afs_register_server_cb_interest(struct afs_vnode *vnode, + struct afs_server_entry *entry) { - struct afs_vnode *vnode; - - _enter("{%p}", server); + struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x; + struct afs_server *server = entry->server; + +again: + vcbi = vnode->cb_interest; + if (vcbi) { + if (vcbi == cbi) + return 0; + + if (cbi && vcbi->server == cbi->server) { + write_seqlock(&vnode->cb_lock); + vnode->cb_interest = afs_get_cb_interest(cbi); + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); + return 0; + } - spin_lock(&server->cb_lock); + if (!cbi && vcbi->server == server) { + afs_get_cb_interest(vcbi); + x = cmpxchg(&entry->cb_interest, cbi, vcbi); + if (x != cbi) { + cbi = x; + afs_put_cb_interest(afs_v2net(vnode), vcbi); + goto again; + } + return 0; + } + } - /* kill all the promises on record from this server */ - while (!RB_EMPTY_ROOT(&server->cb_promises)) { - vnode = rb_entry(server->cb_promises.rb_node, - struct afs_vnode, cb_promise); - _debug("UNPROMISE { vid=%x:%u uq=%u}", - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; + if (!cbi) { + new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); + if (!new) + return -ENOMEM; + + refcount_set(&new->usage, 1); + new->sb = vnode->vfs_inode.i_sb; + new->vid = vnode->volume->vid; + new->server = afs_get_server(server); + INIT_LIST_HEAD(&new->cb_link); + + write_lock(&server->cb_break_lock); + list_add_tail(&new->cb_link, &server->cb_interests); + write_unlock(&server->cb_break_lock); + + x = cmpxchg(&entry->cb_interest, cbi, new); + if (x == cbi) { + cbi = new; + } else { + cbi = x; + afs_put_cb_interest(afs_v2net(vnode), new); + } } - spin_unlock(&server->cb_lock); - _leave(""); + ASSERT(cbi); + + /* Change the server the vnode is using. This entails scrubbing any + * interest the vnode had in the previous server it was using. + */ + write_seqlock(&vnode->cb_lock); + + vnode->cb_interest = afs_get_cb_interest(cbi); + vnode->cb_s_break = cbi->server->cb_s_break; + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + write_sequnlock(&vnode->cb_lock); + return 0; } /* - * handle the data invalidation side of a callback being broken + * Set a vnode's interest on a server. */ -void afs_broken_callback_work(struct work_struct *work) +void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi) { - struct afs_vnode *vnode = - container_of(work, struct afs_vnode, cb_broken_work); + struct afs_cb_interest *old_cbi = NULL; - _enter(""); - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + if (vnode->cb_interest == cbi) return; - /* we're only interested in dealing with a broken callback on *this* - * vnode and only if no-one else has dealt with it yet */ - if (!mutex_trylock(&vnode->validate_lock)) - return; /* someone else is dealing with it */ - - if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { - if (S_ISDIR(vnode->vfs_inode.i_mode)) - afs_clear_permits(vnode); - - if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0) - goto out; - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - goto out; - - /* if the vnode's data version number changed then its contents - * are different */ - if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) - afs_zap_data(vnode); + write_seqlock(&vnode->cb_lock); + if (vnode->cb_interest != cbi) { + afs_get_cb_interest(cbi); + old_cbi = vnode->cb_interest; + vnode->cb_interest = cbi; } + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); +} -out: - mutex_unlock(&vnode->validate_lock); - - /* avoid the potential race whereby the mutex_trylock() in this - * function happens again between the clear_bit() and the - * mutex_unlock() */ - if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { - _debug("requeue"); - queue_work(afs_callback_update_worker, &vnode->cb_broken_work); +/* + * Remove an interest on a server. + */ +void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) +{ + if (cbi && refcount_dec_and_test(&cbi->usage)) { + if (!list_empty(&cbi->cb_link)) { + write_lock(&cbi->server->cb_break_lock); + list_del_init(&cbi->cb_link); + write_unlock(&cbi->server->cb_break_lock); + afs_put_server(net, cbi->server); + } + kfree(cbi); } - _leave(""); +} + +/* + * allow the fileserver to request callback state (re-)initialisation + */ +void afs_init_callback_state(struct afs_server *server) +{ + if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags)) + server->cb_s_break++; } /* * actually break a callback */ -static void afs_break_callback(struct afs_server *server, - struct afs_vnode *vnode) +void afs_break_callback(struct afs_vnode *vnode) { _enter(""); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); + write_seqlock(&vnode->cb_lock); + + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + vnode->cb_break++; + afs_clear_permits(vnode); - if (vnode->cb_promised) { spin_lock(&vnode->lock); _debug("break callback"); - spin_lock(&server->cb_lock); - if (vnode->cb_promised) { - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&server->cb_lock); - - queue_work(afs_callback_update_worker, &vnode->cb_broken_work); if (list_empty(&vnode->granted_locks) && !list_empty(&vnode->pending_locks)) afs_lock_may_be_available(vnode); spin_unlock(&vnode->lock); } + + write_sequnlock(&vnode->cb_lock); } /* @@ -143,49 +176,31 @@ static void afs_break_callback(struct afs_server *server, static void afs_break_one_callback(struct afs_server *server, struct afs_fid *fid) { + struct afs_cb_interest *cbi; + struct afs_iget_data data; struct afs_vnode *vnode; - struct rb_node *p; - - _debug("find"); - spin_lock(&server->fs_lock); - p = server->fs_vnodes.rb_node; - while (p) { - vnode = rb_entry(p, struct afs_vnode, server_rb); - if (fid->vid < vnode->fid.vid) - p = p->rb_left; - else if (fid->vid > vnode->fid.vid) - p = p->rb_right; - else if (fid->vnode < vnode->fid.vnode) - p = p->rb_left; - else if (fid->vnode > vnode->fid.vnode) - p = p->rb_right; - else if (fid->unique < vnode->fid.unique) - p = p->rb_left; - else if (fid->unique > vnode->fid.unique) - p = p->rb_right; - else - goto found; - } - - /* not found so we just ignore it (it may have moved to another - * server) */ -not_available: - _debug("not avail"); - spin_unlock(&server->fs_lock); - _leave(""); - return; + struct inode *inode; -found: - _debug("found"); - ASSERTCMP(server, ==, vnode->server); + read_lock(&server->cb_break_lock); - if (!igrab(AFS_VNODE_TO_I(vnode))) - goto not_available; - spin_unlock(&server->fs_lock); + /* Step through all interested superblocks. There may be more than one + * because of cell aliasing. + */ + list_for_each_entry(cbi, &server->cb_interests, cb_link) { + if (cbi->vid != fid->vid) + continue; + + data.volume = NULL; + data.fid = *fid; + inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode); + iput(inode); + } + } - afs_break_callback(server, vnode); - iput(&vnode->vfs_inode); - _leave(""); + read_unlock(&server->cb_break_lock); } /* @@ -216,261 +231,14 @@ void afs_break_callbacks(struct afs_server *server, size_t count, } /* - * record the callback for breaking - * - the caller must hold server->cb_lock + * Clear the callback interests in a server list. */ -static void afs_do_give_up_callback(struct afs_server *server, - struct afs_vnode *vnode) +void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist) { - struct afs_callback *cb; - - _enter("%p,%p", server, vnode); - - cb = &server->cb_break[server->cb_break_head]; - cb->fid = vnode->fid; - cb->version = vnode->cb_version; - cb->expiry = vnode->cb_expiry; - cb->type = vnode->cb_type; - smp_wmb(); - server->cb_break_head = - (server->cb_break_head + 1) & - (ARRAY_SIZE(server->cb_break) - 1); - - /* defer the breaking of callbacks to try and collect as many as - * possible to ship in one operation */ - switch (atomic_inc_return(&server->cb_break_n)) { - case 1 ... AFSCBMAX - 1: - queue_delayed_work(afs_callback_update_worker, - &server->cb_break_work, HZ * 2); - break; - case AFSCBMAX: - afs_flush_callback_breaks(server); - break; - default: - break; - } - - ASSERT(server->cb_promises.rb_node != NULL); - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - _leave(""); -} - -/* - * discard the callback on a deleted item - */ -void afs_discard_callback_on_delete(struct afs_vnode *vnode) -{ - struct afs_server *server = vnode->server; + int i; - _enter("%d", vnode->cb_promised); - - if (!vnode->cb_promised) { - _leave(" [not promised]"); - return; - } - - ASSERT(server != NULL); - - spin_lock(&server->cb_lock); - if (vnode->cb_promised) { - ASSERT(server->cb_promises.rb_node != NULL); - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; + for (i = 0; i < slist->nr_servers; i++) { + afs_put_cb_interest(net, slist->servers[i].cb_interest); + slist->servers[i].cb_interest = NULL; } - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * give up the callback registered for a vnode on the file server when the - * inode is being cleared - */ -void afs_give_up_callback(struct afs_vnode *vnode) -{ - struct afs_server *server = vnode->server; - - DECLARE_WAITQUEUE(myself, current); - - _enter("%d", vnode->cb_promised); - - _debug("GIVE UP INODE %p", &vnode->vfs_inode); - - if (!vnode->cb_promised) { - _leave(" [not promised]"); - return; - } - - ASSERT(server != NULL); - - spin_lock(&server->cb_lock); - if (vnode->cb_promised && afs_breakring_space(server) == 0) { - add_wait_queue(&server->cb_break_waitq, &myself); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!vnode->cb_promised || - afs_breakring_space(server) != 0) - break; - spin_unlock(&server->cb_lock); - schedule(); - spin_lock(&server->cb_lock); - } - remove_wait_queue(&server->cb_break_waitq, &myself); - __set_current_state(TASK_RUNNING); - } - - /* of course, it's always possible for the server to break this vnode's - * callback first... */ - if (vnode->cb_promised) - afs_do_give_up_callback(server, vnode); - - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * dispatch a deferred give up callbacks operation - */ -void afs_dispatch_give_up_callbacks(struct work_struct *work) -{ - struct afs_server *server = - container_of(work, struct afs_server, cb_break_work.work); - - _enter(""); - - /* tell the fileserver to discard the callback promises it has - * - in the event of ENOMEM or some other error, we just forget that we - * had callbacks entirely, and the server will call us later to break - * them - */ - afs_fs_give_up_callbacks(server, true); -} - -/* - * flush the outstanding callback breaks on a server - */ -void afs_flush_callback_breaks(struct afs_server *server) -{ - mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0); -} - -#if 0 -/* - * update a bunch of callbacks - */ -static void afs_callback_updater(struct work_struct *work) -{ - struct afs_server *server; - struct afs_vnode *vnode, *xvnode; - time64_t now; - long timeout; - int ret; - - server = container_of(work, struct afs_server, updater); - - _enter(""); - - now = ktime_get_real_seconds(); - - /* find the first vnode to update */ - spin_lock(&server->cb_lock); - for (;;) { - if (RB_EMPTY_ROOT(&server->cb_promises)) { - spin_unlock(&server->cb_lock); - _leave(" [nothing]"); - return; - } - - vnode = rb_entry(rb_first(&server->cb_promises), - struct afs_vnode, cb_promise); - if (atomic_read(&vnode->usage) > 0) - break; - rb_erase(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = false; - } - - timeout = vnode->update_at - now; - if (timeout > 0) { - queue_delayed_work(afs_vnode_update_worker, - &afs_vnode_update, timeout * HZ); - spin_unlock(&server->cb_lock); - _leave(" [nothing]"); - return; - } - - list_del_init(&vnode->update); - atomic_inc(&vnode->usage); - spin_unlock(&server->cb_lock); - - /* we can now perform the update */ - _debug("update %s", vnode->vldb.name); - vnode->state = AFS_VL_UPDATING; - vnode->upd_rej_cnt = 0; - vnode->upd_busy_cnt = 0; - - ret = afs_vnode_update_record(vl, &vldb); - switch (ret) { - case 0: - afs_vnode_apply_update(vl, &vldb); - vnode->state = AFS_VL_UPDATING; - break; - case -ENOMEDIUM: - vnode->state = AFS_VL_VOLUME_DELETED; - break; - default: - vnode->state = AFS_VL_UNCERTAIN; - break; - } - - /* and then reschedule */ - _debug("reschedule"); - vnode->update_at = ktime_get_real_seconds() + - afs_vnode_update_timeout; - - spin_lock(&server->cb_lock); - - if (!list_empty(&server->cb_promises)) { - /* next update in 10 minutes, but wait at least 1 second more - * than the newest record already queued so that we don't spam - * the VL server suddenly with lots of requests - */ - xvnode = list_entry(server->cb_promises.prev, - struct afs_vnode, update); - if (vnode->update_at <= xvnode->update_at) - vnode->update_at = xvnode->update_at + 1; - xvnode = list_entry(server->cb_promises.next, - struct afs_vnode, update); - timeout = xvnode->update_at - now; - if (timeout < 0) - timeout = 0; - } else { - timeout = afs_vnode_update_timeout; - } - - list_add_tail(&vnode->update, &server->cb_promises); - - _debug("timeout %ld", timeout); - queue_delayed_work(afs_vnode_update_worker, - &afs_vnode_update, timeout * HZ); - spin_unlock(&server->cb_lock); - afs_put_vnode(vl); -} -#endif - -/* - * initialise the callback update process - */ -int __init afs_callback_update_init(void) -{ - afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd", - WQ_MEM_RECLAIM); - return afs_callback_update_worker ? 0 : -ENOMEM; -} - -/* - * shut down the callback update process - */ -void afs_callback_update_kill(void) -{ - destroy_workqueue(afs_callback_update_worker); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ca0a3cf93791..1858c91169e4 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -1,6 +1,6 @@ /* AFS cell and server record management * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -9,213 +9,296 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/module.h> #include <linux/slab.h> #include <linux/key.h> #include <linux/ctype.h> #include <linux/dns_resolver.h> #include <linux/sched.h> +#include <linux/inet.h> #include <keys/rxrpc-type.h> #include "internal.h" -DECLARE_RWSEM(afs_proc_cells_sem); -LIST_HEAD(afs_proc_cells); +unsigned __read_mostly afs_cell_gc_delay = 10; -static LIST_HEAD(afs_cells); -static DEFINE_RWLOCK(afs_cells_lock); -static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ -static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); -static struct afs_cell *afs_cell_root; +static void afs_manage_cell(struct work_struct *); + +static void afs_dec_cells_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->cells_outstanding)) + wake_up_atomic_t(&net->cells_outstanding); +} /* - * allocate a cell record and fill in its name, VL server address list and - * allocate an anonymous key + * Set the cell timer to fire after a given delay, assuming it's not already + * set for an earlier time. */ -static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, - char *vllist) +static void afs_set_cell_timer(struct afs_net *net, time64_t delay) { - struct afs_cell *cell; - struct key *key; - char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; - char *dvllist = NULL, *_vllist = NULL; - char delimiter = ':'; - int ret; + if (net->live) { + atomic_inc(&net->cells_outstanding); + if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) + afs_dec_cells_outstanding(net); + } +} - _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); +/* + * Look up and get an activation reference on a cell record under RCU + * conditions. The caller must hold the RCU read lock. + */ +struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, + const char *name, unsigned int namesz) +{ + struct afs_cell *cell = NULL; + struct rb_node *p; + int n, seq = 0, ret = 0; - BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ + _enter("%*.*s", namesz, namesz, name); - if (namelen > AFS_MAXCELLNAME) { - _leave(" = -ENAMETOOLONG"); + if (name && namesz == 0) + return ERR_PTR(-EINVAL); + if (namesz > AFS_MAXCELLNAME) return ERR_PTR(-ENAMETOOLONG); - } - /* allocate and initialise a cell record */ - cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); - if (!cell) { - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); - } + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + if (cell) + afs_put_cell(net, cell); + cell = NULL; + ret = -ENOENT; - memcpy(cell->name, name, namelen); - cell->name[namelen] = 0; - - atomic_set(&cell->usage, 1); - INIT_LIST_HEAD(&cell->link); - rwlock_init(&cell->servers_lock); - INIT_LIST_HEAD(&cell->servers); - init_rwsem(&cell->vl_sem); - INIT_LIST_HEAD(&cell->vl_list); - spin_lock_init(&cell->vl_lock); - - /* if the ip address is invalid, try dns query */ - if (!vllist || strlen(vllist) < 7) { - ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); - if (ret < 0) { - if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY) - /* translate these errors into something - * userspace might understand */ - ret = -EDESTADDRREQ; - _leave(" = %d", ret); - return ERR_PTR(ret); + read_seqbegin_or_lock(&net->cells_lock, &seq); + + if (!name) { + cell = rcu_dereference_raw(net->ws_cell); + if (cell) { + afs_get_cell(cell); + continue; + } + ret = -EDESTADDRREQ; + continue; } - _vllist = dvllist; - /* change the delimiter for user-space reply */ - delimiter = ','; + p = rcu_dereference_raw(net->cells.rb_node); + while (p) { + cell = rb_entry(p, struct afs_cell, net_node); + + n = strncasecmp(cell->name, name, + min_t(size_t, cell->name_len, namesz)); + if (n == 0) + n = cell->name_len - namesz; + if (n < 0) { + p = rcu_dereference_raw(p->rb_left); + } else if (n > 0) { + p = rcu_dereference_raw(p->rb_right); + } else { + if (atomic_inc_not_zero(&cell->usage)) { + ret = 0; + break; + } + /* We want to repeat the search, this time with + * the lock properly locked. + */ + } + cell = NULL; + } - } else { - _vllist = vllist; - } + } while (need_seqretry(&net->cells_lock, seq)); - /* fill in the VL server list from the rest of the string */ - do { - unsigned a, b, c, d; + done_seqretry(&net->cells_lock, seq); - next = strchr(_vllist, delimiter); - if (next) - *next++ = 0; + return ret == 0 ? cell : ERR_PTR(ret); +} - if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) - goto bad_address; +/* + * Set up a cell record and fill in its name, VL server address list and + * allocate an anonymous key + */ +static struct afs_cell *afs_alloc_cell(struct afs_net *net, + const char *name, unsigned int namelen, + const char *vllist) +{ + struct afs_cell *cell; + int i, ret; - if (a > 255 || b > 255 || c > 255 || d > 255) - goto bad_address; + ASSERT(name); + if (namelen == 0) + return ERR_PTR(-EINVAL); + if (namelen > AFS_MAXCELLNAME) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } - cell->vl_addrs[cell->vl_naddrs++].s_addr = - htonl((a << 24) | (b << 16) | (c << 8) | d); + _enter("%*.*s,%s", namelen, namelen, name, vllist); - } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); + cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); + if (!cell) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } - /* create a key to represent an anonymous user */ - memcpy(keyname, "afs@", 4); - dp = keyname + 4; - cp = cell->name; - do { - *dp++ = toupper(*cp); - } while (*cp++); + cell->net = net; + cell->name_len = namelen; + for (i = 0; i < namelen; i++) + cell->name[i] = tolower(name[i]); + + atomic_set(&cell->usage, 2); + INIT_WORK(&cell->manager, afs_manage_cell); + cell->flags = ((1 << AFS_CELL_FL_NOT_READY) | + (1 << AFS_CELL_FL_NO_LOOKUP_YET)); + INIT_LIST_HEAD(&cell->proc_volumes); + rwlock_init(&cell->proc_lock); + rwlock_init(&cell->vl_addrs_lock); + + /* Fill in the VL server list if we were given a list of addresses to + * use. + */ + if (vllist) { + struct afs_addr_list *alist; + + alist = afs_parse_text_addrs(vllist, strlen(vllist), ':', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(alist)) { + ret = PTR_ERR(alist); + goto parse_failed; + } - key = rxrpc_get_null_key(keyname); - if (IS_ERR(key)) { - _debug("no key"); - ret = PTR_ERR(key); - goto error; + rcu_assign_pointer(cell->vl_addrs, alist); + cell->dns_expiry = TIME64_MAX; } - cell->anonymous_key = key; - - _debug("anon key %p{%x}", - cell->anonymous_key, key_serial(cell->anonymous_key)); _leave(" = %p", cell); return cell; -bad_address: - printk(KERN_ERR "kAFS: bad VL server IP address\n"); - ret = -EINVAL; -error: - key_put(cell->anonymous_key); - kfree(dvllist); +parse_failed: + if (ret == -EINVAL) + printk(KERN_ERR "kAFS: bad VL server IP address\n"); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); } /* - * afs_cell_crate() - create a cell record - * @name: is the name of the cell. - * @namsesz: is the strlen of the cell name. - * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. - * @retref: is T to return the cell reference when the cell exists. + * afs_lookup_cell - Look up or create a cell record. + * @net: The network namespace + * @name: The name of the cell. + * @namesz: The strlen of the cell name. + * @vllist: A colon/comma separated list of numeric IP addresses or NULL. + * @excl: T if an error should be given if the cell name already exists. + * + * Look up a cell record by name and query the DNS for VL server addresses if + * needed. Note that that actual DNS query is punted off to the manager thread + * so that this function can return immediately if interrupted whilst allowing + * cell records to be shared even if not yet fully constructed. */ -struct afs_cell *afs_cell_create(const char *name, unsigned namesz, - char *vllist, bool retref) +struct afs_cell *afs_lookup_cell(struct afs_net *net, + const char *name, unsigned int namesz, + const char *vllist, bool excl) { - struct afs_cell *cell; - int ret; - - _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); + struct afs_cell *cell, *candidate, *cursor; + struct rb_node *parent, **pp; + int ret, n; + + _enter("%s,%s", name, vllist); + + if (!excl) { + rcu_read_lock(); + cell = afs_lookup_cell_rcu(net, name, namesz); + rcu_read_unlock(); + if (!IS_ERR(cell)) { + if (excl) { + afs_put_cell(net, cell); + return ERR_PTR(-EEXIST); + } + goto wait_for_cell; + } + } - down_write(&afs_cells_sem); - read_lock(&afs_cells_lock); - list_for_each_entry(cell, &afs_cells, link) { - if (strncasecmp(cell->name, name, namesz) == 0) - goto duplicate_name; + /* Assume we're probably going to create a cell and preallocate and + * mostly set up a candidate record. We can then use this to stash the + * name, the net namespace and VL server addresses. + * + * We also want to do this before we hold any locks as it may involve + * upcalling to userspace to make DNS queries. + */ + candidate = afs_alloc_cell(net, name, namesz, vllist); + if (IS_ERR(candidate)) { + _leave(" = %ld", PTR_ERR(candidate)); + return candidate; } - read_unlock(&afs_cells_lock); - cell = afs_cell_alloc(name, namesz, vllist); - if (IS_ERR(cell)) { - _leave(" = %ld", PTR_ERR(cell)); - up_write(&afs_cells_sem); - return cell; + /* Find the insertion point and check to see if someone else added a + * cell whilst we were allocating. + */ + write_seqlock(&net->cells_lock); + + pp = &net->cells.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + cursor = rb_entry(parent, struct afs_cell, net_node); + + n = strncasecmp(cursor->name, name, + min_t(size_t, cursor->name_len, namesz)); + if (n == 0) + n = cursor->name_len - namesz; + if (n < 0) + pp = &(*pp)->rb_left; + else if (n > 0) + pp = &(*pp)->rb_right; + else + goto cell_already_exists; } - /* add a proc directory for this cell */ - ret = afs_proc_cell_setup(cell); - if (ret < 0) - goto error; + cell = candidate; + candidate = NULL; + rb_link_node_rcu(&cell->net_node, parent, pp); + rb_insert_color(&cell->net_node, &net->cells); + atomic_inc(&net->cells_outstanding); + write_sequnlock(&net->cells_lock); -#ifdef CONFIG_AFS_FSCACHE - /* put it up for caching (this never returns an error) */ - cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, - &afs_cell_cache_index_def, - cell, true); -#endif + queue_work(afs_wq, &cell->manager); - /* add to the cell lists */ - write_lock(&afs_cells_lock); - list_add_tail(&cell->link, &afs_cells); - write_unlock(&afs_cells_lock); +wait_for_cell: + _debug("wait_for_cell"); + ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE); + smp_rmb(); - down_write(&afs_proc_cells_sem); - list_add_tail(&cell->proc_link, &afs_proc_cells); - up_write(&afs_proc_cells_sem); - up_write(&afs_cells_sem); + switch (READ_ONCE(cell->state)) { + case AFS_CELL_FAILED: + ret = cell->error; + goto error; + default: + _debug("weird %u %d", cell->state, cell->error); + goto error; + case AFS_CELL_ACTIVE: + break; + } - _leave(" = %p", cell); + _leave(" = %p [cell]", cell); return cell; +cell_already_exists: + _debug("cell exists"); + cell = cursor; + if (excl) { + ret = -EEXIST; + } else { + afs_get_cell(cursor); + ret = 0; + } + write_sequnlock(&net->cells_lock); + kfree(candidate); + if (ret == 0) + goto wait_for_cell; + goto error_noput; error: - up_write(&afs_cells_sem); - key_put(cell->anonymous_key); - kfree(cell); - _leave(" = %d", ret); + afs_put_cell(net, cell); +error_noput: + _leave(" = %d [error]", ret); return ERR_PTR(ret); - -duplicate_name: - if (retref && !IS_ERR(cell)) - afs_get_cell(cell); - - read_unlock(&afs_cells_lock); - up_write(&afs_cells_sem); - - if (retref) { - _leave(" = %p", cell); - return cell; - } - - _leave(" = -EEXIST"); - return ERR_PTR(-EEXIST); } /* @@ -223,10 +306,11 @@ duplicate_name: * - can be called with a module parameter string * - can be called from a write to /proc/fs/afs/rootcell */ -int afs_cell_init(char *rootcell) +int afs_cell_init(struct afs_net *net, const char *rootcell) { struct afs_cell *old_root, *new_root; - char *cp; + const char *cp, *vllist; + size_t len; _enter(""); @@ -239,222 +323,453 @@ int afs_cell_init(char *rootcell) } cp = strchr(rootcell, ':'); - if (!cp) + if (!cp) { _debug("kAFS: no VL server IP addresses specified"); - else - *cp++ = 0; + vllist = NULL; + len = strlen(rootcell); + } else { + vllist = cp + 1; + len = cp - rootcell; + } /* allocate a cell record for the root cell */ - new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false); + new_root = afs_lookup_cell(net, rootcell, len, vllist, false); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); return PTR_ERR(new_root); } + set_bit(AFS_CELL_FL_NO_GC, &new_root->flags); + afs_get_cell(new_root); + /* install the new cell */ - write_lock(&afs_cells_lock); - old_root = afs_cell_root; - afs_cell_root = new_root; - write_unlock(&afs_cells_lock); - afs_put_cell(old_root); + write_seqlock(&net->cells_lock); + old_root = net->ws_cell; + net->ws_cell = new_root; + write_sequnlock(&net->cells_lock); + afs_put_cell(net, old_root); _leave(" = 0"); return 0; } /* - * lookup a cell record + * Update a cell's VL server address list from the DNS. */ -struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, - bool dns_cell) +static void afs_update_cell(struct afs_cell *cell) { - struct afs_cell *cell; - - _enter("\"%*.*s\",", namesz, namesz, name ?: ""); - - down_read(&afs_cells_sem); - read_lock(&afs_cells_lock); - - if (name) { - /* if the cell was named, look for it in the cell record list */ - list_for_each_entry(cell, &afs_cells, link) { - if (strncmp(cell->name, name, namesz) == 0) { - afs_get_cell(cell); - goto found; - } + struct afs_addr_list *alist, *old; + time64_t now, expiry; + + _enter("%s", cell->name); + + alist = afs_dns_query(cell, &expiry); + if (IS_ERR(alist)) { + switch (PTR_ERR(alist)) { + case -ENODATA: + /* The DNS said that the cell does not exist */ + set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); + clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + cell->dns_expiry = ktime_get_real_seconds() + 61; + break; + + case -EAGAIN: + case -ECONNREFUSED: + default: + set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + cell->dns_expiry = ktime_get_real_seconds() + 10; + break; } - cell = ERR_PTR(-ENOENT); - if (dns_cell) - goto create_cell; - found: - ; + + cell->error = -EDESTADDRREQ; } else { - cell = afs_cell_root; - if (!cell) { - /* this should not happen unless user tries to mount - * when root cell is not set. Return an impossibly - * bizarre errno to alert the user. Things like - * ENOENT might be "more appropriate" but they happen - * for other reasons. - */ - cell = ERR_PTR(-EDESTADDRREQ); - } else { - afs_get_cell(cell); - } + clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); + clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); + + /* Exclusion on changing vl_addrs is achieved by a + * non-reentrant work item. + */ + old = rcu_dereference_protected(cell->vl_addrs, true); + rcu_assign_pointer(cell->vl_addrs, alist); + cell->dns_expiry = expiry; + if (old) + afs_put_addrlist(old); } - read_unlock(&afs_cells_lock); - up_read(&afs_cells_sem); - _leave(" = %p", cell); - return cell; + if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) + wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET); -create_cell: - read_unlock(&afs_cells_lock); - up_read(&afs_cells_sem); + now = ktime_get_real_seconds(); + afs_set_cell_timer(cell->net, cell->dns_expiry - now); + _leave(""); +} - cell = afs_cell_create(name, namesz, NULL, true); +/* + * Destroy a cell record + */ +static void afs_cell_destroy(struct rcu_head *rcu) +{ + struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); - _leave(" = %p", cell); - return cell; + _enter("%p{%s}", cell, cell->name); + + ASSERTCMP(atomic_read(&cell->usage), ==, 0); + + afs_put_addrlist(cell->vl_addrs); + key_put(cell->anonymous_key); + kfree(cell); + + _leave(" [destroyed]"); } -#if 0 /* - * try and get a cell record + * Queue the cell manager. */ -struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell) +static void afs_queue_cell_manager(struct afs_net *net) { - write_lock(&afs_cells_lock); + int outstanding = atomic_inc_return(&net->cells_outstanding); - if (cell && !list_empty(&cell->link)) - afs_get_cell(cell); - else - cell = NULL; + _enter("%d", outstanding); - write_unlock(&afs_cells_lock); - return cell; + if (!queue_work(afs_wq, &net->cells_manager)) + afs_dec_cells_outstanding(net); } -#endif /* 0 */ /* - * destroy a cell record + * Cell management timer. We have an increment on cells_outstanding that we + * need to pass along to the work item. */ -void afs_put_cell(struct afs_cell *cell) +void afs_cells_timer(struct timer_list *timer) { - if (!cell) - return; + struct afs_net *net = container_of(timer, struct afs_net, cells_timer); - _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); + _enter(""); + if (!queue_work(afs_wq, &net->cells_manager)) + afs_dec_cells_outstanding(net); +} - ASSERTCMP(atomic_read(&cell->usage), >, 0); +/* + * Get a reference on a cell record. + */ +struct afs_cell *afs_get_cell(struct afs_cell *cell) +{ + atomic_inc(&cell->usage); + return cell; +} - /* to prevent a race, the decrement and the dequeue must be effectively - * atomic */ - write_lock(&afs_cells_lock); +/* + * Drop a reference on a cell record. + */ +void afs_put_cell(struct afs_net *net, struct afs_cell *cell) +{ + time64_t now, expire_delay; - if (likely(!atomic_dec_and_test(&cell->usage))) { - write_unlock(&afs_cells_lock); - _leave(""); + if (!cell) return; - } - ASSERT(list_empty(&cell->servers)); - ASSERT(list_empty(&cell->vl_list)); + _enter("%s", cell->name); - write_unlock(&afs_cells_lock); + now = ktime_get_real_seconds(); + cell->last_inactive = now; + expire_delay = 0; + if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && + !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + expire_delay = afs_cell_gc_delay; - wake_up(&afs_cells_freeable_wq); + if (atomic_dec_return(&cell->usage) > 1) + return; - _leave(" [unused]"); + /* 'cell' may now be garbage collected. */ + afs_set_cell_timer(net, expire_delay); } /* - * destroy a cell record - * - must be called with the afs_cells_sem write-locked - * - cell->link should have been broken by the caller + * Allocate a key to use as a placeholder for anonymous user security. */ -static void afs_cell_destroy(struct afs_cell *cell) +static int afs_alloc_anon_key(struct afs_cell *cell) { - _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); + struct key *key; + char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp; - ASSERTCMP(atomic_read(&cell->usage), >=, 0); - ASSERT(list_empty(&cell->link)); + /* Create a key to represent an anonymous user. */ + memcpy(keyname, "afs@", 4); + dp = keyname + 4; + cp = cell->name; + do { + *dp++ = tolower(*cp); + } while (*cp++); - /* wait for everyone to stop using the cell */ - if (atomic_read(&cell->usage) > 0) { - DECLARE_WAITQUEUE(myself, current); + key = rxrpc_get_null_key(keyname); + if (IS_ERR(key)) + return PTR_ERR(key); - _debug("wait for cell %s", cell->name); - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&afs_cells_freeable_wq, &myself); + cell->anonymous_key = key; - while (atomic_read(&cell->usage) > 0) { - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - } + _debug("anon key %p{%x}", + cell->anonymous_key, key_serial(cell->anonymous_key)); + return 0; +} - remove_wait_queue(&afs_cells_freeable_wq, &myself); - set_current_state(TASK_RUNNING); +/* + * Activate a cell. + */ +static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) +{ + int ret; + + if (!cell->anonymous_key) { + ret = afs_alloc_anon_key(cell); + if (ret < 0) + return ret; } - _debug("cell dead"); - ASSERTCMP(atomic_read(&cell->usage), ==, 0); - ASSERT(list_empty(&cell->servers)); - ASSERT(list_empty(&cell->vl_list)); +#ifdef CONFIG_AFS_FSCACHE + cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, + &afs_cell_cache_index_def, + cell, true); +#endif + ret = afs_proc_cell_setup(net, cell); + if (ret < 0) + return ret; + spin_lock(&net->proc_cells_lock); + list_add_tail(&cell->proc_link, &net->proc_cells); + spin_unlock(&net->proc_cells_lock); + return 0; +} - afs_proc_cell_remove(cell); +/* + * Deactivate a cell. + */ +static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) +{ + _enter("%s", cell->name); + + afs_proc_cell_remove(net, cell); - down_write(&afs_proc_cells_sem); + spin_lock(&net->proc_cells_lock); list_del_init(&cell->proc_link); - up_write(&afs_proc_cells_sem); + spin_unlock(&net->proc_cells_lock); #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(cell->cache, 0); + cell->cache = NULL; #endif - key_put(cell->anonymous_key); - kfree(cell); - _leave(" [destroyed]"); + _leave(""); } /* - * purge in-memory cell database on module unload or afs_init() failure - * - the timeout daemon is stopped before calling this + * Manage a cell record, initialising and destroying it, maintaining its DNS + * records. */ -void afs_cell_purge(void) +static void afs_manage_cell(struct work_struct *work) { - struct afs_cell *cell; + struct afs_cell *cell = container_of(work, struct afs_cell, manager); + struct afs_net *net = cell->net; + bool deleted; + int ret, usage; + + _enter("%s", cell->name); + +again: + _debug("state %u", cell->state); + switch (cell->state) { + case AFS_CELL_INACTIVE: + case AFS_CELL_FAILED: + write_seqlock(&net->cells_lock); + usage = 1; + deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0); + if (deleted) + rb_erase(&cell->net_node, &net->cells); + write_sequnlock(&net->cells_lock); + if (deleted) + goto final_destruction; + if (cell->state == AFS_CELL_FAILED) + goto done; + cell->state = AFS_CELL_UNSET; + goto again; + + case AFS_CELL_UNSET: + cell->state = AFS_CELL_ACTIVATING; + goto again; + + case AFS_CELL_ACTIVATING: + ret = afs_activate_cell(net, cell); + if (ret < 0) + goto activation_failed; + + cell->state = AFS_CELL_ACTIVE; + smp_wmb(); + clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); + wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + goto again; + + case AFS_CELL_ACTIVE: + if (atomic_read(&cell->usage) > 1) { + time64_t now = ktime_get_real_seconds(); + if (cell->dns_expiry <= now && net->live) + afs_update_cell(cell); + goto done; + } + cell->state = AFS_CELL_DEACTIVATING; + goto again; + + case AFS_CELL_DEACTIVATING: + set_bit(AFS_CELL_FL_NOT_READY, &cell->flags); + if (atomic_read(&cell->usage) > 1) + goto reverse_deactivation; + afs_deactivate_cell(net, cell); + cell->state = AFS_CELL_INACTIVE; + goto again; + + default: + break; + } + _debug("bad state %u", cell->state); + BUG(); /* Unhandled state */ + +activation_failed: + cell->error = ret; + afs_deactivate_cell(net, cell); + + cell->state = AFS_CELL_FAILED; + smp_wmb(); + if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags)) + wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + goto again; + +reverse_deactivation: + cell->state = AFS_CELL_ACTIVE; + smp_wmb(); + clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags); + wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY); + _leave(" [deact->act]"); + return; + +done: + _leave(" [done %u]", cell->state); + return; + +final_destruction: + call_rcu(&cell->rcu, afs_cell_destroy); + afs_dec_cells_outstanding(net); + _leave(" [destruct %d]", atomic_read(&net->cells_outstanding)); +} + +/* + * Manage the records of cells known to a network namespace. This includes + * updating the DNS records and garbage collecting unused cells that were + * automatically added. + * + * Note that constructed cell records may only be removed from net->cells by + * this work item, so it is safe for this work item to stash a cursor pointing + * into the tree and then return to caller (provided it skips cells that are + * still under construction). + * + * Note also that we were given an increment on net->cells_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_cells(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, cells_manager); + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; _enter(""); - afs_put_cell(afs_cell_root); + /* Trawl the cell database looking for cells that have expired from + * lack of use and cells whose DNS results have expired and dispatch + * their managers. + */ + read_seqlock_excl(&net->cells_lock); - down_write(&afs_cells_sem); + for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { + struct afs_cell *cell = + rb_entry(cursor, struct afs_cell, net_node); + unsigned usage; + bool sched_cell = false; - while (!list_empty(&afs_cells)) { - cell = NULL; + usage = atomic_read(&cell->usage); + _debug("manage %s %u", cell->name, usage); + + ASSERTCMP(usage, >=, 1); - /* remove the next cell from the front of the list */ - write_lock(&afs_cells_lock); + if (purging) { + if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) + usage = atomic_dec_return(&cell->usage); + ASSERTCMP(usage, ==, 1); + } + + if (usage == 1) { + time64_t expire_at = cell->last_inactive; + + if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) && + !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags)) + expire_at += afs_cell_gc_delay; + if (purging || expire_at <= now) + sched_cell = true; + else if (expire_at < next_manage) + next_manage = expire_at; + } - if (!list_empty(&afs_cells)) { - cell = list_entry(afs_cells.next, - struct afs_cell, link); - list_del_init(&cell->link); + if (!purging) { + if (cell->dns_expiry <= now) + sched_cell = true; + else if (cell->dns_expiry <= next_manage) + next_manage = cell->dns_expiry; } - write_unlock(&afs_cells_lock); + if (sched_cell) + queue_work(afs_wq, &cell->manager); + } + + read_sequnlock_excl(&net->cells_lock); - if (cell) { - _debug("PURGING CELL %s (%d)", - cell->name, atomic_read(&cell->usage)); + /* Update the timer on the way out. We have to pass an increment on + * cells_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); - /* now the cell should be left with no references */ - afs_cell_destroy(cell); + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->cells_manager)) + atomic_inc(&net->cells_outstanding); + } else { + afs_set_cell_timer(net, next_manage - now); } } - up_write(&afs_cells_sem); + afs_dec_cells_outstanding(net); + _leave(" [%d]", atomic_read(&net->cells_outstanding)); +} + +/* + * Purge in-memory cell database. + */ +void afs_cell_purge(struct afs_net *net) +{ + struct afs_cell *ws; + + _enter(""); + + write_seqlock(&net->cells_lock); + ws = net->ws_cell; + net->ws_cell = NULL; + write_sequnlock(&net->cells_lock); + afs_put_cell(net, ws); + + _debug("del timer"); + if (del_timer_sync(&net->cells_timer)) + atomic_dec(&net->cells_outstanding); + + _debug("kick mgr"); + afs_queue_cell_manager(net); + + _debug("wait"); + wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait, + TASK_UNINTERRUPTIBLE); _leave(""); } diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 782d4d05a53b..41e277f57b20 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -41,7 +41,6 @@ static CM_NAME(CallBack); static const struct afs_call_type afs_SRXCBCallBack = { .name = afs_SRXCBCallBack_name, .deliver = afs_deliver_cb_callback, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, }; @@ -53,7 +52,6 @@ static CM_NAME(InitCallBackState); static const struct afs_call_type afs_SRXCBInitCallBackState = { .name = afs_SRXCBInitCallBackState_name, .deliver = afs_deliver_cb_init_call_back_state, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, }; @@ -65,7 +63,6 @@ static CM_NAME(InitCallBackState3); static const struct afs_call_type afs_SRXCBInitCallBackState3 = { .name = afs_SRXCBInitCallBackState3_name, .deliver = afs_deliver_cb_init_call_back_state3, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, }; @@ -77,7 +74,6 @@ static CM_NAME(Probe); static const struct afs_call_type afs_SRXCBProbe = { .name = afs_SRXCBProbe_name, .deliver = afs_deliver_cb_probe, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_Probe, }; @@ -89,7 +85,6 @@ static CM_NAME(ProbeUuid); static const struct afs_call_type afs_SRXCBProbeUuid = { .name = afs_SRXCBProbeUuid_name, .deliver = afs_deliver_cb_probe_uuid, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_ProbeUuid, }; @@ -101,7 +96,6 @@ static CM_NAME(TellMeAboutYourself); static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { .name = afs_SRXCBTellMeAboutYourself_name, .deliver = afs_deliver_cb_tell_me_about_yourself, - .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, .work = SRXAFSCB_TellMeAboutYourself, }; @@ -127,6 +121,9 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; + case CBProbeUuid: + call->type = &afs_SRXCBProbeUuid; + return true; case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true; @@ -147,18 +144,16 @@ static void afs_cm_destructor(struct afs_call *call) * afs_deliver_cb_callback(). */ if (call->unmarshall == 5) { - ASSERT(call->server && call->count && call->request); - afs_break_callbacks(call->server, call->count, call->request); + ASSERT(call->cm_server && call->count && call->request); + afs_break_callbacks(call->cm_server, call->count, call->request); } - afs_put_server(call->server); - call->server = NULL; kfree(call->buffer); call->buffer = NULL; } /* - * allow the fileserver to see if the cache manager is still alive + * The server supplied a list of callbacks that it wanted to break. */ static void SRXAFSCB_CallBack(struct work_struct *work) { @@ -173,7 +168,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * yet */ afs_send_empty_reply(call); - afs_break_callbacks(call->server, call->count, call->request); + afs_break_callbacks(call->cm_server, call->count, call->request); afs_put_call(call); _leave(""); } @@ -193,7 +188,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) switch (call->unmarshall) { case 0: - rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; @@ -286,14 +280,16 @@ static int afs_deliver_cb_callback(struct afs_call *call) break; } - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(&srx); + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; - call->server = server; + call->cm_server = server; return afs_queue_call_work(call); } @@ -305,9 +301,9 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, work); - _enter("{%p}", call->server); + _enter("{%p}", call->cm_server); - afs_init_callback_state(call->server); + afs_init_callback_state(call->cm_server); afs_send_empty_reply(call); afs_put_call(call); _leave(""); @@ -324,21 +320,18 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) _enter(""); - rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; - /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(&srx); + server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; - call->server = server; + call->cm_server = server; return afs_queue_call_work(call); } @@ -357,8 +350,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) _enter(""); - rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - _enter("{%u}", call->unmarshall); switch (call->unmarshall) { @@ -402,15 +393,16 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) break; } - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(&srx); + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + server = afs_find_server(call->net, &srx); if (!server) return -ENOTCONN; - call->server = server; + call->cm_server = server; return afs_queue_call_work(call); } @@ -441,8 +433,8 @@ static int afs_deliver_cb_probe(struct afs_call *call) if (ret < 0) return ret; - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; return afs_queue_call_work(call); } @@ -461,7 +453,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) _enter(""); - if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) + if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) reply.match = htonl(0); else reply.match = htonl(1); @@ -524,7 +516,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) break; } - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; return afs_queue_call_work(call); } @@ -568,13 +561,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) memset(&reply, 0, sizeof(reply)); reply.ia.nifs = htonl(nifs); - reply.ia.uuid[0] = afs_uuid.time_low; - reply.ia.uuid[1] = htonl(ntohs(afs_uuid.time_mid)); - reply.ia.uuid[2] = htonl(ntohs(afs_uuid.time_hi_and_version)); - reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved); - reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low); + reply.ia.uuid[0] = call->net->uuid.time_low; + reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid)); + reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version)); + reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved); + reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low); for (loop = 0; loop < 6; loop++) - reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]); + reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]); if (ifs) { for (loop = 0; loop < nifs; loop++) { @@ -605,8 +598,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) if (ret < 0) return ret; - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return -EIO; return afs_queue_call_work(call); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 613a77058263..ab618d32554c 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -130,10 +130,11 @@ struct afs_lookup_cookie { /* * check that a directory page is valid */ -static inline bool afs_dir_check_page(struct inode *dir, struct page *page) +bool afs_dir_check_page(struct inode *dir, struct page *page) { struct afs_dir_page *dbuf; - loff_t latter; + struct afs_vnode *vnode = AFS_FS_I(dir); + loff_t latter, i_size, off; int tmp, qty; #if 0 @@ -150,8 +151,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page) } #endif - /* determine how many magic numbers there should be in this page */ - latter = dir->i_size - page_offset(page); + /* Determine how many magic numbers there should be in this page, but + * we must take care because the directory may change size under us. + */ + off = page_offset(page); + i_size = i_size_read(dir); + if (i_size <= off) + goto checked; + + latter = i_size - off; if (latter >= PAGE_SIZE) qty = PAGE_SIZE; else @@ -162,13 +170,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page) dbuf = page_address(page); for (tmp = 0; tmp < qty; tmp++) { if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { - printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", + printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", __func__, dir->i_ino, tmp, qty, ntohs(dbuf->blocks[tmp].pagehdr.magic)); + trace_afs_dir_check_failed(vnode, off, i_size); goto error; } } +checked: SetPageChecked(page); return true; @@ -183,6 +193,7 @@ error: static inline void afs_dir_put_page(struct page *page) { kunmap(page); + unlock_page(page); put_page(page); } @@ -197,9 +208,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); if (!IS_ERR(page)) { + lock_page(page); kmap(page); if (unlikely(!PageChecked(page))) { - if (PageError(page) || !afs_dir_check_page(dir, page)) + if (PageError(page)) goto fail; } } @@ -384,8 +396,7 @@ out: */ static int afs_readdir(struct file *file, struct dir_context *ctx) { - return afs_dir_iterate(file_inode(file), - ctx, file->private_data); + return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file)); } /* @@ -553,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version; /* instantiate the dentry */ - inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL); + inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL); key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); @@ -581,6 +592,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); struct dentry *parent; + struct inode *inode; struct key *key; void *dir_version; int ret; @@ -588,30 +600,39 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; - vnode = AFS_FS_I(d_inode(dentry)); - - if (d_really_is_positive(dentry)) + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%x:%u} n=%pd fl=%lx},", vnode->fid.vid, vnode->fid.vnode, dentry, vnode->flags); - else + } else { _enter("{neg n=%pd}", dentry); + } key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell); if (IS_ERR(key)) key = NULL; + if (d_really_is_positive(dentry)) { + inode = d_inode(dentry); + if (inode) { + vnode = AFS_FS_I(inode); + afs_validate(vnode, key); + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + goto out_bad; + } + } + /* lock down the parent dentry so we can peer at it */ parent = dget_parent(dentry); dir = AFS_FS_I(d_inode(parent)); /* validate the parent directory */ - if (test_bit(AFS_VNODE_MODIFIED, &dir->flags)) - afs_validate(dir, key); + afs_validate(dir, key); if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { _debug("%pd: parent dir deleted", dentry); - goto out_bad; + goto out_bad_parent; } dir_version = (void *) (unsigned long) dir->status.data_version; @@ -626,13 +647,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) case 0: /* the filename maps to something */ if (d_really_is_negative(dentry)) - goto out_bad; - if (is_bad_inode(d_inode(dentry))) { + goto out_bad_parent; + inode = d_inode(dentry); + if (is_bad_inode(inode)) { printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", dentry); - goto out_bad; + goto out_bad_parent; } + vnode = AFS_FS_I(inode); + /* if the vnode ID has changed, then the dirent points to a * different file */ if (fid.vnode != vnode->fid.vnode) { @@ -649,10 +673,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) _debug("%pd: file deleted (uq %u -> %u I:%u)", dentry, fid.unique, vnode->fid.unique, - d_inode(dentry)->i_generation); - spin_lock(&vnode->lock); + vnode->vfs_inode.i_generation); + write_seqlock(&vnode->cb_lock); set_bit(AFS_VNODE_DELETED, &vnode->flags); - spin_unlock(&vnode->lock); + write_sequnlock(&vnode->cb_lock); goto not_found; } goto out_valid; @@ -667,7 +691,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) default: _debug("failed to iterate dir %pd: %d", parent, ret); - goto out_bad; + goto out_bad_parent; } out_valid: @@ -683,9 +707,10 @@ not_found: dentry->d_flags |= DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); -out_bad: +out_bad_parent: _debug("dropping dentry %pd2", dentry); dput(parent); +out_bad: key_put(key); _leave(" = 0 [bad]"); @@ -727,20 +752,48 @@ static void afs_d_release(struct dentry *dentry) } /* + * Create a new inode for create/mkdir/symlink + */ +static void afs_vnode_new_inode(struct afs_fs_cursor *fc, + struct dentry *new_dentry, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct inode *inode; + + if (fc->ac.error < 0) + return; + + inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key, + newfid, newstatus, newcb, fc->cbi); + if (IS_ERR(inode)) { + /* ENOMEM or EINTR at a really inconvenient time - just abandon + * the new directory on the server. + */ + fc->ac.error = PTR_ERR(inode); + return; + } + + d_instantiate(new_dentry, inode); + if (d_unhashed(new_dentry)) + d_rehash(new_dentry); +} + +/* * create a directory on an AFS filesystem */ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct afs_file_status status; - struct afs_callback cb; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_file_status newstatus; + struct afs_fs_cursor fc; + struct afs_callback newcb; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); + mode |= S_IFDIR; _enter("{%x:%u},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); @@ -751,40 +804,27 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error; } - mode |= S_IFDIR; - ret = afs_vnode_create(dvnode, key, dentry->d_name.name, - mode, &fid, &status, &cb, &server); - if (ret < 0) - goto mkdir_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_create(&fc, dentry->d_name.name, mode, + &newfid, &newstatus, &newcb); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(server); -mkdir_error: +error_key: key_put(key); error: d_drop(dentry); @@ -793,16 +833,29 @@ error: } /* + * Remove a subdir from a directory. + */ +static void afs_dir_remove_subdir(struct dentry *dentry) +{ + if (d_really_is_positive(dentry)) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + clear_nlink(&vnode->vfs_inode); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } +} + +/* * remove a directory from an AFS filesystem */ static int afs_rmdir(struct inode *dir, struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); @@ -812,45 +865,69 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) goto error; } - ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true); - if (ret < 0) - goto rmdir_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_remove(&fc, dentry->d_name.name, true); + } - if (d_really_is_positive(dentry)) { - vnode = AFS_FS_I(d_inode(dentry)); - clear_nlink(&vnode->vfs_inode); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - afs_discard_callback_on_delete(vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) + afs_dir_remove_subdir(dentry); } key_put(key); - _leave(" = 0"); - return 0; - -rmdir_error: - key_put(key); error: - _leave(" = %d", ret); return ret; } /* - * remove a file from an AFS filesystem + * Remove a link to a file or symlink from a directory. + * + * If the file was not deleted due to excess hard links, the fileserver will + * break the callback promise on the file - if it had one - before it returns + * to us, and if it was deleted, it won't + * + * However, if we didn't have a callback promise outstanding, or it was + * outstanding on a different server, then it won't break it either... + */ +static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +{ + int ret = 0; + + if (d_really_is_positive(dentry)) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + } + + return ret; +} + +/* + * Remove a file or symlink from an AFS filesystem. */ static int afs_unlink(struct inode *dir, struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) - goto error; + return -ENAMETOOLONG; key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { @@ -858,44 +935,28 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) goto error; } + /* Try to make sure we have a callback promise on the victim. */ if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); - - /* make sure we have a callback promise on the victim */ ret = afs_validate(vnode, key); if (ret < 0) - goto error; + goto error_key; } - ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false); - if (ret < 0) - goto remove_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_remove(&fc, dentry->d_name.name, false); + } - if (d_really_is_positive(dentry)) { - /* if the file wasn't deleted due to excess hard links, the - * fileserver will break the callback promise on the file - if - * it had one - before it returns to us, and if it was deleted, - * it won't - * - * however, if we didn't have a callback promise outstanding, - * or it was outstanding on a different server, then it won't - * break it either... - */ - vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - _debug("AFS_VNODE_DELETED"); - if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) - _debug("AFS_VNODE_CB_BROKEN"); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - ret = afs_validate(vnode, key); - _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) + ret = afs_dir_remove_link(dentry, key); } - key_put(key); - _leave(" = 0"); - return 0; - -remove_error: +error_key: key_put(key); error: _leave(" = %d", ret); @@ -908,60 +969,50 @@ error: static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct afs_file_status status; - struct afs_callback cb; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_fs_cursor fc; + struct afs_file_status newstatus; + struct afs_callback newcb; + struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); + mode |= S_IFREG; _enter("{%x:%u},{%pd},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - mode |= S_IFREG; - ret = afs_vnode_create(dvnode, key, dentry->d_name.name, - mode, &fid, &status, &cb, &server); - if (ret < 0) - goto create_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_create(&fc, dentry->d_name.name, mode, + &newfid, &newstatus, &newcb); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(server); -create_error: +error_key: key_put(key); error: d_drop(dentry); @@ -975,6 +1026,7 @@ error: static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry) { + struct afs_fs_cursor fc; struct afs_vnode *dvnode, *vnode; struct key *key; int ret; @@ -987,23 +1039,45 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode->fid.vid, dvnode->fid.vnode, dentry); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name); - if (ret < 0) - goto link_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { + afs_end_vnode_operation(&fc); + return -ERESTARTSYS; + } + + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; + afs_fs_link(&fc, vnode, dentry->d_name.name); + } + + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break_2); + ihold(&vnode->vfs_inode); + d_instantiate(dentry, &vnode->vfs_inode); + + mutex_unlock(&vnode->io_lock); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; + } - ihold(&vnode->vfs_inode); - d_instantiate(dentry, &vnode->vfs_inode); key_put(key); _leave(" = 0"); return 0; -link_error: +error_key: key_put(key); error: d_drop(dentry); @@ -1017,20 +1091,21 @@ error: static int afs_symlink(struct inode *dir, struct dentry *dentry, const char *content) { - struct afs_file_status status; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_fs_cursor fc; + struct afs_file_status newstatus; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry, content); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + ret = -EINVAL; if (strlen(content) >= AFSPATHMAX) goto error; @@ -1041,39 +1116,27 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error; } - ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content, - &fid, &status, &server); - if (ret < 0) - goto create_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_symlink(&fc, dentry->d_name.name, content, + &newfid, &newstatus); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, NULL); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(server); -create_error: +error_key: key_put(key); error: d_drop(dentry); @@ -1088,6 +1151,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + struct afs_fs_cursor fc; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; struct key *key; int ret; @@ -1111,16 +1175,35 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, goto error; } - ret = afs_vnode_rename(orig_dvnode, new_dvnode, key, - old_dentry->d_name.name, - new_dentry->d_name.name); - if (ret < 0) - goto rename_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) { + if (orig_dvnode != new_dvnode) { + if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { + afs_end_vnode_operation(&fc); + return -ERESTARTSYS; + } + } + while (afs_select_fileserver(&fc)) { + fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; + fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; + afs_fs_rename(&fc, old_dentry->d_name.name, + new_dvnode, new_dentry->d_name.name); + } + + afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2); + if (orig_dvnode != new_dvnode) + mutex_unlock(&new_dvnode->io_lock); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; + } + key_put(key); _leave(" = 0"); return 0; -rename_error: +error_key: key_put(key); error: d_drop(new_dentry); diff --git a/fs/afs/file.c b/fs/afs/file.c index 510cba15fa56..a39192ced99e 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -19,11 +19,11 @@ #include <linux/task_io_accounting_ops.h> #include "internal.h" +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static int afs_readpage(struct file *file, struct page *page); static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length); static int afs_releasepage(struct page *page, gfp_t gfp_flags); -static int afs_launder_page(struct page *page); static int afs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); @@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = afs_file_write, - .mmap = generic_file_readonly_mmap, + .mmap = afs_file_mmap, .splice_read = generic_file_splice_read, .fsync = afs_fsync, .lock = afs_lock, @@ -62,12 +62,63 @@ const struct address_space_operations afs_fs_aops = { .writepages = afs_writepages, }; +static const struct vm_operations_struct afs_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = afs_page_mkwrite, +}; + +/* + * Discard a pin on a writeback key. + */ +void afs_put_wb_key(struct afs_wb_key *wbk) +{ + if (refcount_dec_and_test(&wbk->usage)) { + key_put(wbk->key); + kfree(wbk); + } +} + +/* + * Cache key for writeback. + */ +int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af) +{ + struct afs_wb_key *wbk, *p; + + wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL); + if (!wbk) + return -ENOMEM; + refcount_set(&wbk->usage, 2); + wbk->key = af->key; + + spin_lock(&vnode->wb_lock); + list_for_each_entry(p, &vnode->wb_keys, vnode_link) { + if (p->key == wbk->key) + goto found; + } + + key_get(wbk->key); + list_add_tail(&wbk->vnode_link, &vnode->wb_keys); + spin_unlock(&vnode->wb_lock); + af->wb = wbk; + return 0; + +found: + refcount_inc(&p->usage); + spin_unlock(&vnode->wb_lock); + af->wb = p; + kfree(wbk); + return 0; +} + /* * open an AFS file or directory and attach a key to it */ int afs_open(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af; struct key *key; int ret; @@ -75,19 +126,38 @@ int afs_open(struct inode *inode, struct file *file) key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - return PTR_ERR(key); + ret = PTR_ERR(key); + goto error; } - ret = afs_validate(vnode, key); - if (ret < 0) { - _leave(" = %d [val]", ret); - return ret; + af = kzalloc(sizeof(*af), GFP_KERNEL); + if (!af) { + ret = -ENOMEM; + goto error_key; } + af->key = key; + + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_af; - file->private_data = key; + if (file->f_mode & FMODE_WRITE) { + ret = afs_cache_wb_key(vnode, af); + if (ret < 0) + goto error_af; + } + + file->private_data = af; _leave(" = 0"); return 0; + +error_af: + kfree(af); +error_key: + key_put(key); +error: + _leave(" = %d", ret); + return ret; } /* @@ -96,10 +166,16 @@ int afs_open(struct inode *inode, struct file *file) int afs_release(struct inode *inode, struct file *file) { struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af = file->private_data; _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); - key_put(file->private_data); + file->private_data = NULL; + if (af->wb) + afs_put_wb_key(af->wb); + key_put(af->key); + kfree(af); + afs_prune_wb_keys(vnode); _leave(" = 0"); return 0; } @@ -138,6 +214,37 @@ static void afs_file_readpage_read_complete(struct page *page, #endif /* + * Fetch file data from the volume. + */ +int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x,,,", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_fetch_data(&fc, desc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* * read page from file, directory or symlink, given a key to use */ int afs_page_filler(void *data, struct page *page) @@ -199,8 +306,13 @@ int afs_page_filler(void *data, struct page *page) /* read the contents of the file from the server into the * page */ - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); afs_put_read(req); + + if (ret >= 0 && S_ISDIR(inode->i_mode) && + !afs_dir_check_page(inode, page)) + ret = -EIO; + if (ret < 0) { if (ret == -ENOENT) { _debug("got NOENT from server" @@ -259,12 +371,12 @@ static int afs_readpage(struct file *file, struct page *page) int ret; if (file) { - key = file->private_data; + key = afs_file_key(file); ASSERT(key != NULL); ret = afs_page_filler(key, page); } else { struct inode *inode = page->mapping->host; - key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); + key = afs_request_key(AFS_FS_S(inode->i_sb)->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); } else { @@ -281,7 +393,7 @@ static int afs_readpage(struct file *file, struct page *page) static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req) { #ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; #endif struct page *page = req->pages[req->index]; @@ -310,7 +422,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, struct afs_read *req; struct list_head *p; struct page *first, *page; - struct key *key = file->private_data; + struct key *key = afs_file_key(file); pgoff_t index; int ret, n, i; @@ -369,7 +481,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, return 0; } - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); if (ret < 0) goto error; @@ -406,7 +518,7 @@ error: static int afs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct key *key = file->private_data; + struct key *key = afs_file_key(file); struct afs_vnode *vnode; int ret = 0; @@ -464,16 +576,6 @@ static int afs_readpages(struct file *file, struct address_space *mapping, } /* - * write back a dirty page - */ -static int afs_launder_page(struct page *page) -{ - _enter("{%lu}", page->index); - - return 0; -} - -/* * invalidate part or all of a page * - release a page and clean up its private data if offset is 0 (indicating * the entire page) @@ -481,7 +583,8 @@ static int afs_launder_page(struct page *page) static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length) { - struct afs_writeback *wb = (struct afs_writeback *) page_private(page); + struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; _enter("{%lu},%u,%u", page->index, offset, length); @@ -498,13 +601,11 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, #endif if (PagePrivate(page)) { - if (wb && !PageWriteback(page)) { - set_page_private(page, 0); - afs_put_writeback(wb); - } - - if (!page_private(page)) - ClearPagePrivate(page); + priv = page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("inval"), + page->index, priv); + set_page_private(page, 0); + ClearPagePrivate(page); } } @@ -517,8 +618,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, */ static int afs_releasepage(struct page *page, gfp_t gfp_flags) { - struct afs_writeback *wb = (struct afs_writeback *) page_private(page); struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); + unsigned long priv; _enter("{{%x:%u}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, @@ -534,10 +635,10 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) #endif if (PagePrivate(page)) { - if (wb) { - set_page_private(page, 0); - afs_put_writeback(wb); - } + priv = page_private(page); + trace_afs_page_dirty(vnode, tracepoint_string("rel"), + page->index, priv); + set_page_private(page, 0); ClearPagePrivate(page); } @@ -545,3 +646,16 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) _leave(" = T"); return 1; } + +/* + * Handle setting up a memory mapping on an AFS file. + */ +static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret; + + ret = generic_file_mmap(file, vma); + if (ret == 0) + vma->vm_ops = &afs_vm_ops; + return ret; +} diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 3191dff2c156..7571a5dfd5a3 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -14,48 +14,17 @@ #define AFS_LOCK_GRANTED 0 #define AFS_LOCK_PENDING 1 +struct workqueue_struct *afs_lock_manager; + static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); static void afs_fl_release_private(struct file_lock *fl); -static struct workqueue_struct *afs_lock_manager; -static DEFINE_MUTEX(afs_lock_manager_mutex); - static const struct file_lock_operations afs_lock_ops = { .fl_copy_lock = afs_fl_copy_lock, .fl_release_private = afs_fl_release_private, }; /* - * initialise the lock manager thread if it isn't already running - */ -static int afs_init_lock_manager(void) -{ - int ret; - - ret = 0; - if (!afs_lock_manager) { - mutex_lock(&afs_lock_manager_mutex); - if (!afs_lock_manager) { - afs_lock_manager = alloc_workqueue("kafs_lockd", - WQ_MEM_RECLAIM, 0); - if (!afs_lock_manager) - ret = -ENOMEM; - } - mutex_unlock(&afs_lock_manager_mutex); - } - return ret; -} - -/* - * destroy the lock manager thread if it's running - */ -void __exit afs_kill_lock_manager(void) -{ - if (afs_lock_manager) - destroy_workqueue(afs_lock_manager); -} - -/* * if the callback is broken on this vnode, then the lock may now be available */ void afs_lock_may_be_available(struct afs_vnode *vnode) @@ -99,6 +68,100 @@ static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl) } /* + * Get a lock on a file + */ +static int afs_set_lock(struct afs_vnode *vnode, struct key *key, + afs_lock_type_t type) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x,%u", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key), type); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_set_lock(&fc, type); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Extend a lock on a file + */ +static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_current_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_extend_lock(&fc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Release a lock on a file + */ +static int afs_release_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_current_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_release_lock(&fc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* * do work for a lock, including: * - probing for a lock we're waiting on but didn't get immediately * - extending a lock that's close to timing out @@ -122,7 +185,7 @@ void afs_lock_work(struct work_struct *work) /* attempt to release the server lock; if it fails, we just * wait 5 minutes and it'll time out anyway */ - ret = afs_vnode_release_lock(vnode, vnode->unlock_key); + ret = afs_release_lock(vnode, vnode->unlock_key); if (ret < 0) printk(KERN_WARNING "AFS:" " Failed to release lock on {%x:%x} error %d\n", @@ -143,10 +206,10 @@ void afs_lock_work(struct work_struct *work) BUG(); fl = list_entry(vnode->granted_locks.next, struct file_lock, fl_u.afs.link); - key = key_get(fl->fl_file->private_data); + key = key_get(afs_file_key(fl->fl_file)); spin_unlock(&vnode->lock); - ret = afs_vnode_extend_lock(vnode, key); + ret = afs_extend_lock(vnode, key); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); key_put(key); switch (ret) { @@ -177,12 +240,12 @@ void afs_lock_work(struct work_struct *work) BUG(); fl = list_entry(vnode->pending_locks.next, struct file_lock, fl_u.afs.link); - key = key_get(fl->fl_file->private_data); + key = key_get(afs_file_key(fl->fl_file)); type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; spin_unlock(&vnode->lock); - ret = afs_vnode_set_lock(vnode, key, type); + ret = afs_set_lock(vnode, key, type); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); switch (ret) { case -EWOULDBLOCK: @@ -213,7 +276,7 @@ void afs_lock_work(struct work_struct *work) clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); spin_unlock(&vnode->lock); - afs_vnode_release_lock(vnode, key); + afs_release_lock(vnode, key); if (!list_empty(&vnode->pending_locks)) afs_lock_may_be_available(vnode); } @@ -255,7 +318,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); afs_lock_type_t type; - struct key *key = file->private_data; + struct key *key = afs_file_key(file); int ret; _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); @@ -264,10 +327,6 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) return -EINVAL; - ret = afs_init_lock_manager(); - if (ret < 0) - return ret; - fl->fl_ops = &afs_lock_ops; INIT_LIST_HEAD(&fl->fl_u.afs.link); fl->fl_u.afs.state = AFS_LOCK_PENDING; @@ -278,7 +337,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) /* make sure we've got a callback on this file and that our view of the * data version is up to date */ - ret = afs_vnode_fetch_status(vnode, NULL, key); + ret = afs_validate(vnode, key); if (ret < 0) goto error; @@ -315,7 +374,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) set_bit(AFS_VNODE_LOCKING, &vnode->flags); spin_unlock(&vnode->lock); - ret = afs_vnode_set_lock(vnode, key, type); + ret = afs_set_lock(vnode, key, type); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); switch (ret) { case 0: @@ -418,7 +477,7 @@ given_lock: /* again, make sure we've got a callback on this file and, again, make * sure that our view of the data version is up to date (we ignore * errors incurred here and deal with the consequences elsewhere) */ - afs_vnode_fetch_status(vnode, NULL, key); + afs_validate(vnode, key); error: spin_unlock(&inode->i_lock); @@ -441,7 +500,7 @@ vfs_rejected_lock: static int afs_do_unlk(struct file *file, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); - struct key *key = file->private_data; + struct key *key = afs_file_key(file); int ret; _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); @@ -476,7 +535,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl) static int afs_do_getlk(struct file *file, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); - struct key *key = file->private_data; + struct key *key = afs_file_key(file); int ret, lock_count; _enter(""); @@ -490,7 +549,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_vnode_fetch_status(vnode, NULL, key); + ret = afs_fetch_status(vnode, key); if (ret < 0) goto error; lock_count = vnode->status.lock_count; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 19f76ae36982..b90ef39ae914 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -16,12 +16,19 @@ #include "internal.h" #include "afs_fs.h" +static const struct afs_fid afs_zero_fid; + /* * We need somewhere to discard into in case the server helpfully returns more * than we asked for in FS.FetchData{,64}. */ static u8 afs_discard_buffer[64]; +static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) +{ + call->cbi = afs_get_cb_interest(cbi); +} + /* * decode an AFSFid block */ @@ -47,14 +54,18 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, const __be32 *bp = *_bp; umode_t mode; u64 data_version, size; - u32 changed = 0; /* becomes non-zero if ctime-type changes seen */ + bool changed = false; kuid_t owner; kgid_t group; + if (vnode) + write_seqlock(&vnode->cb_lock); + #define EXTRACT(DST) \ do { \ u32 x = ntohl(*bp++); \ - changed |= DST - x; \ + if (DST != x) \ + changed |= true; \ DST = x; \ } while (0) @@ -70,8 +81,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, EXTRACT(status->caller_access); /* call ticket dependent */ EXTRACT(status->anon_access); EXTRACT(status->mode); - EXTRACT(status->parent.vnode); - EXTRACT(status->parent.unique); + bp++; /* parent.vnode */ + bp++; /* parent.unique */ bp++; /* seg size */ status->mtime_client = ntohl(*bp++); status->mtime_server = ntohl(*bp++); @@ -95,7 +106,6 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, status->mtime_client, status->mtime_server); if (vnode) { - status->parent.vid = vnode->fid.vid; if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { _debug("vnode changed"); i_size_write(&vnode->vfs_inode, size); @@ -127,25 +137,47 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, _debug("vnode modified %llx on {%x:%u}", (unsigned long long) data_version, vnode->fid.vid, vnode->fid.vnode); - set_bit(AFS_VNODE_MODIFIED, &vnode->flags); + set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } } else if (store_version) { status->data_version = data_version; } + + if (vnode) + write_sequnlock(&vnode->cb_lock); } /* * decode an AFSCallBack block */ -static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) +static void xdr_decode_AFSCallBack(struct afs_call *call, + struct afs_vnode *vnode, + const __be32 **_bp) { + struct afs_cb_interest *old, *cbi = call->cbi; const __be32 *bp = *_bp; + u32 cb_expiry; + + write_seqlock(&vnode->cb_lock); + + if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) { + vnode->cb_version = ntohl(*bp++); + cb_expiry = ntohl(*bp++); + vnode->cb_type = ntohl(*bp++); + vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + old = vnode->cb_interest; + if (old != call->cbi) { + vnode->cb_interest = cbi; + cbi = old; + } + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } else { + bp += 3; + } - vnode->cb_version = ntohl(*bp++); - vnode->cb_expiry = ntohl(*bp++); - vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); + write_sequnlock(&vnode->cb_lock); + call->cbi = cbi; *_bp = bp; } @@ -243,22 +275,22 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, */ static int afs_deliver_fs_fetch_status(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; - _enter(""); - ret = afs_transfer_reply(call); if (ret < 0) return ret; + _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSCallBack(&bp, vnode); - if (call->reply2) - xdr_decode_AFSVolSync(&bp, call->reply2); + xdr_decode_AFSCallBack(call, vnode, &bp); + if (call->reply[1]) + xdr_decode_AFSVolSync(&bp, call->reply[1]); _leave(" = 0 [done]"); return 0; @@ -269,35 +301,33 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) */ static const struct afs_call_type afs_RXFSFetchStatus = { .name = "FS.FetchStatus", + .op = afs_FS_FetchStatus, .deliver = afs_deliver_fs_fetch_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - struct afs_volsync *volsync, - bool async) +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); - if (!call) + call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); + if (!call) { + fc->ac.error = -ENOMEM; return -ENOMEM; + } - call->key = key; - call->reply = vnode; - call->reply2 = volsync; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = volsync; /* marshall the parameters */ bp = call->request; @@ -306,7 +336,10 @@ int afs_fs_fetch_file_status(struct afs_server *server, bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -314,8 +347,8 @@ int afs_fs_fetch_file_status(struct afs_server *server, */ static int afs_deliver_fs_fetch_data(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; - struct afs_read *req = call->reply3; + struct afs_vnode *vnode = call->reply[0]; + struct afs_read *req = call->reply[2]; const __be32 *bp; unsigned int size; void *buffer; @@ -431,9 +464,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSCallBack(&bp, vnode); - if (call->reply2) - xdr_decode_AFSVolSync(&bp, call->reply2); + xdr_decode_AFSCallBack(call, vnode, &bp); + if (call->reply[1]) + xdr_decode_AFSVolSync(&bp, call->reply[1]); call->offset = 0; call->unmarshall++; @@ -457,7 +490,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) static void afs_fetch_data_destructor(struct afs_call *call) { - struct afs_read *req = call->reply3; + struct afs_read *req = call->reply[2]; afs_put_read(req); afs_flat_call_destructor(call); @@ -468,43 +501,38 @@ static void afs_fetch_data_destructor(struct afs_call *call) */ static const struct afs_call_type afs_RXFSFetchData = { .name = "FS.FetchData", + .op = afs_FS_FetchData, .deliver = afs_deliver_fs_fetch_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_fetch_data_destructor, }; static const struct afs_call_type afs_RXFSFetchData64 = { .name = "FS.FetchData64", + .op = afs_FS_FetchData64, .deliver = afs_deliver_fs_fetch_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_fetch_data_destructor, }; /* * fetch data from a very large file */ -static int afs_fs_fetch_data64(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - struct afs_read *req, - bool async) +static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->reply2 = NULL; /* volsync */ - call->reply3 = req; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); - call->operation_ID = FSFETCHDATA64; + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = NULL; /* volsync */ + call->reply[2] = req; /* marshall the parameters */ bp = call->request; @@ -518,39 +546,37 @@ static int afs_fs_fetch_data64(struct afs_server *server, bp[7] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * fetch data from a file */ -int afs_fs_fetch_data(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - struct afs_read *req, - bool async) +int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) - return afs_fs_fetch_data64(server, key, vnode, req, async); + return afs_fs_fetch_data64(fc, req); _enter(""); - call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->reply2 = NULL; /* volsync */ - call->reply3 = req; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); - call->operation_ID = FSFETCHDATA; + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = NULL; /* volsync */ + call->reply[2] = req; /* marshall the parameters */ bp = call->request; @@ -562,90 +588,10 @@ int afs_fs_fetch_data(struct afs_server *server, bp[5] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - return afs_make_call(&server->addr, call, GFP_NOFS, async); -} - -/* - * deliver reply data to an FS.GiveUpCallBacks - */ -static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) -{ - _enter(""); - - /* shouldn't be any reply data */ - return afs_extract_data(call, NULL, 0, false); -} - -/* - * FS.GiveUpCallBacks operation type - */ -static const struct afs_call_type afs_RXFSGiveUpCallBacks = { - .name = "FS.GiveUpCallBacks", - .deliver = afs_deliver_fs_give_up_callbacks, - .abort_to_error = afs_abort_to_error, - .destructor = afs_flat_call_destructor, -}; - -/* - * give up a set of callbacks - * - the callbacks are held in the server->cb_break ring - */ -int afs_fs_give_up_callbacks(struct afs_server *server, - bool async) -{ - struct afs_call *call; - size_t ncallbacks; - __be32 *bp, *tp; - int loop; - - ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail, - ARRAY_SIZE(server->cb_break)); - - _enter("{%zu},", ncallbacks); - - if (ncallbacks == 0) - return 0; - if (ncallbacks > AFSCBMAX) - ncallbacks = AFSCBMAX; - - _debug("break %zu callbacks", ncallbacks); - - call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks, - 12 + ncallbacks * 6 * 4, 0); - if (!call) - return -ENOMEM; - - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); - - /* marshall the parameters */ - bp = call->request; - tp = bp + 2 + ncallbacks * 3; - *bp++ = htonl(FSGIVEUPCALLBACKS); - *bp++ = htonl(ncallbacks); - *tp++ = htonl(ncallbacks); - - atomic_sub(ncallbacks, &server->cb_break_n); - for (loop = ncallbacks; loop > 0; loop--) { - struct afs_callback *cb = - &server->cb_break[server->cb_break_tail]; - - *bp++ = htonl(cb->fid.vid); - *bp++ = htonl(cb->fid.vnode); - *bp++ = htonl(cb->fid.unique); - *tp++ = htonl(cb->version); - *tp++ = htonl(cb->expiry); - *tp++ = htonl(cb->type); - smp_mb(); - server->cb_break_tail = - (server->cb_break_tail + 1) & - (ARRAY_SIZE(server->cb_break) - 1); - } - - ASSERT(ncallbacks > 0); - wake_up_nr(&server->cb_break_waitq, ncallbacks); - - return afs_make_call(&server->addr, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -653,7 +599,7 @@ int afs_fs_give_up_callbacks(struct afs_server *server, */ static int afs_deliver_fs_create_vnode(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -665,11 +611,11 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFid(&bp, call->reply[1]); + xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - xdr_decode_AFSCallBack_raw(&bp, call->reply4); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -678,27 +624,33 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* * FS.CreateFile and FS.MakeDir operation type */ -static const struct afs_call_type afs_RXFSCreateXXXX = { - .name = "FS.CreateXXXX", +static const struct afs_call_type afs_RXFSCreateFile = { + .name = "FS.CreateFile", + .op = afs_FS_CreateFile, + .deliver = afs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSMakeDir = { + .name = "FS.MakeDir", + .op = afs_FS_MakeDir, .deliver = afs_deliver_fs_create_vnode, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * create a file or make a directory */ -int afs_fs_create(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, +int afs_fs_create(struct afs_fs_cursor *fc, const char *name, umode_t mode, struct afs_fid *newfid, struct afs_file_status *newstatus, - struct afs_callback *newcb, - bool async) + struct afs_callback *newcb) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; @@ -708,18 +660,17 @@ int afs_fs_create(struct afs_server *server, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (6 * 4); - call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz, - (3 + 21 + 21 + 3 + 6) * 4); + call = afs_alloc_flat_call( + net, S_ISDIR(mode) ? &afs_RXFSMakeDir : &afs_RXFSCreateFile, + reqsz, (3 + 21 + 21 + 3 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->reply2 = newfid; - call->reply3 = newstatus; - call->reply4 = newcb; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->reply[3] = newcb; /* marshall the parameters */ bp = call->request; @@ -741,7 +692,9 @@ int afs_fs_create(struct afs_server *server, *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -749,7 +702,7 @@ int afs_fs_create(struct afs_server *server, */ static int afs_deliver_fs_remove(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -762,7 +715,7 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -771,24 +724,28 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* * FS.RemoveDir/FS.RemoveFile operation type */ -static const struct afs_call_type afs_RXFSRemoveXXXX = { - .name = "FS.RemoveXXXX", +static const struct afs_call_type afs_RXFSRemoveFile = { + .name = "FS.RemoveFile", + .op = afs_FS_RemoveFile, + .deliver = afs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type afs_RXFSRemoveDir = { + .name = "FS.RemoveDir", + .op = afs_FS_RemoveDir, .deliver = afs_deliver_fs_remove, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * remove a file or directory */ -int afs_fs_remove(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - const char *name, - bool isdir, - bool async) +int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; @@ -798,14 +755,14 @@ int afs_fs_remove(struct afs_server *server, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz; - call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4); + call = afs_alloc_flat_call( + net, isdir ? &afs_RXFSRemoveDir : &afs_RXFSRemoveFile, + reqsz, (21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -821,7 +778,9 @@ int afs_fs_remove(struct afs_server *server, bp = (void *) bp + padsz; } - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -829,7 +788,7 @@ int afs_fs_remove(struct afs_server *server, */ static int afs_deliver_fs_link(struct afs_call *call) { - struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; + struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1]; const __be32 *bp; int ret; @@ -843,7 +802,7 @@ static int afs_deliver_fs_link(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -854,22 +813,20 @@ static int afs_deliver_fs_link(struct afs_call *call) */ static const struct afs_call_type afs_RXFSLink = { .name = "FS.Link", + .op = afs_FS_Link, .deliver = afs_deliver_fs_link, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * make a hard link */ -int afs_fs_link(struct afs_server *server, - struct key *key, - struct afs_vnode *dvnode, - struct afs_vnode *vnode, - const char *name, - bool async) +int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name) { + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; @@ -879,15 +836,13 @@ int afs_fs_link(struct afs_server *server, padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (3 * 4); - call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = dvnode; - call->reply2 = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; /* marshall the parameters */ bp = call->request; @@ -906,7 +861,9 @@ int afs_fs_link(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -914,7 +871,7 @@ int afs_fs_link(struct afs_server *server, */ static int afs_deliver_fs_symlink(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -926,10 +883,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->reply2); - xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); + xdr_decode_AFSFid(&bp, call->reply[1]); + xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL); xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -940,24 +897,23 @@ static int afs_deliver_fs_symlink(struct afs_call *call) */ static const struct afs_call_type afs_RXFSSymlink = { .name = "FS.Symlink", + .op = afs_FS_Symlink, .deliver = afs_deliver_fs_symlink, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * create a symbolic link */ -int afs_fs_symlink(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, +int afs_fs_symlink(struct afs_fs_cursor *fc, const char *name, const char *contents, struct afs_fid *newfid, - struct afs_file_status *newstatus, - bool async) + struct afs_file_status *newstatus) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz, c_namesz, c_padsz; __be32 *bp; @@ -971,17 +927,15 @@ int afs_fs_symlink(struct afs_server *server, reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4); - call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz, + call = afs_alloc_flat_call(net, &afs_RXFSSymlink, reqsz, (3 + 21 + 21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->reply2 = newfid; - call->reply3 = newstatus; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; /* marshall the parameters */ bp = call->request; @@ -1010,7 +964,9 @@ int afs_fs_symlink(struct afs_server *server, *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1018,7 +974,7 @@ int afs_fs_symlink(struct afs_server *server, */ static int afs_deliver_fs_rename(struct afs_call *call) { - struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; + struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1]; const __be32 *bp; int ret; @@ -1034,7 +990,7 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (new_dvnode != orig_dvnode) xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, NULL); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -1045,23 +1001,22 @@ static int afs_deliver_fs_rename(struct afs_call *call) */ static const struct afs_call_type afs_RXFSRename = { .name = "FS.Rename", + .op = afs_FS_Rename, .deliver = afs_deliver_fs_rename, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * create a symbolic link */ -int afs_fs_rename(struct afs_server *server, - struct key *key, - struct afs_vnode *orig_dvnode, +int afs_fs_rename(struct afs_fs_cursor *fc, const char *orig_name, struct afs_vnode *new_dvnode, - const char *new_name, - bool async) + const char *new_name) { + struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(orig_dvnode); size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; __be32 *bp; @@ -1078,15 +1033,13 @@ int afs_fs_rename(struct afs_server *server, (3 * 4) + 4 + n_namesz + n_padsz; - call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); + call = afs_alloc_flat_call(net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = orig_dvnode; - call->reply2 = new_dvnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = orig_dvnode; + call->reply[1] = new_dvnode; /* marshall the parameters */ bp = call->request; @@ -1113,7 +1066,9 @@ int afs_fs_rename(struct afs_server *server, bp = (void *) bp + n_padsz; } - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &orig_dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1121,7 +1076,7 @@ int afs_fs_rename(struct afs_server *server, */ static int afs_deliver_fs_store_data(struct afs_call *call) { - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1135,7 +1090,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, &call->store_version); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ afs_pages_written_back(vnode, call); @@ -1148,47 +1103,44 @@ static int afs_deliver_fs_store_data(struct afs_call *call) */ static const struct afs_call_type afs_RXFSStoreData = { .name = "FS.StoreData", + .op = afs_FS_StoreData, .deliver = afs_deliver_fs_store_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64 = { .name = "FS.StoreData64", + .op = afs_FS_StoreData64, .deliver = afs_deliver_fs_store_data, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* * store a set of pages to a very large file */ -static int afs_fs_store_data64(struct afs_server *server, - struct afs_writeback *wb, +static int afs_fs_store_data64(struct afs_fs_cursor *fc, + struct address_space *mapping, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, - loff_t size, loff_t pos, loff_t i_size, - bool async) + loff_t size, loff_t pos, loff_t i_size) { - struct afs_vnode *vnode = wb->vnode; + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(&afs_RXFSStoreData64, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, (4 + 6 + 3 * 2) * 4, (21 + 6) * 4); if (!call) return -ENOMEM; - call->wb = wb; - call->key = wb->key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); - call->mapping = vnode->vfs_inode.i_mapping; + call->key = fc->key; + call->mapping = mapping; + call->reply[0] = vnode; call->first = first; call->last = last; call->first_offset = offset; @@ -1217,24 +1169,25 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(i_size >> 32); *bp++ = htonl((u32) i_size); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * store a set of pages */ -int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, +int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, - bool async) + unsigned offset, unsigned to) { - struct afs_vnode *vnode = wb->vnode; + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); loff_t size, pos, i_size; __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); size = (loff_t)to - (loff_t)offset; if (first != last) @@ -1251,21 +1204,18 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, (unsigned long long) i_size); if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) - return afs_fs_store_data64(server, wb, first, last, offset, to, - size, pos, i_size, async); + return afs_fs_store_data64(fc, mapping, first, last, offset, to, + size, pos, i_size); - call = afs_alloc_flat_call(&afs_RXFSStoreData, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, (21 + 6) * 4); if (!call) return -ENOMEM; - call->wb = wb; - call->key = wb->key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); - call->mapping = vnode->vfs_inode.i_mapping; + call->key = fc->key; + call->mapping = mapping; + call->reply[0] = vnode; call->first = first; call->last = last; call->first_offset = offset; @@ -1291,7 +1241,9 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(size); *bp++ = htonl(i_size); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1300,7 +1252,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, static int afs_deliver_fs_store_status(struct afs_call *call) { afs_dataversion_t *store_version; - struct afs_vnode *vnode = call->reply; + struct afs_vnode *vnode = call->reply[0]; const __be32 *bp; int ret; @@ -1317,7 +1269,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call) bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -1328,22 +1280,22 @@ static int afs_deliver_fs_store_status(struct afs_call *call) */ static const struct afs_call_type afs_RXFSStoreStatus = { .name = "FS.StoreStatus", + .op = afs_FS_StoreStatus, .deliver = afs_deliver_fs_store_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData_as_Status = { .name = "FS.StoreData", + .op = afs_FS_StoreData, .deliver = afs_deliver_fs_store_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64_as_Status = { .name = "FS.StoreData64", + .op = afs_FS_StoreData64, .deliver = afs_deliver_fs_store_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1351,30 +1303,27 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = { * set the attributes on a very large file, using FS.StoreData rather than * FS.StoreStatus so as to alter the file size also */ -static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); - call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData64_as_Status, (4 + 6 + 3 * 2) * 4, (21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; - call->operation_ID = FSSTOREDATA; /* marshall the parameters */ bp = call->request; @@ -1392,40 +1341,38 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, *bp++ = htonl(attr->ia_size >> 32); /* new file length */ *bp++ = htonl((u32) attr->ia_size); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also */ -static int afs_fs_setattr_size(struct afs_server *server, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); if (attr->ia_size >> 32) - return afs_fs_setattr_size64(server, key, vnode, attr, - async); + return afs_fs_setattr_size64(fc, attr); - call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, + call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, (21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; - call->operation_ID = FSSTOREDATA; /* marshall the parameters */ bp = call->request; @@ -1440,38 +1387,36 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * set the attributes on a file, using FS.StoreData if there's a change in file * size, and FS.StoreStatus otherwise */ -int afs_fs_setattr(struct afs_server *server, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; if (attr->ia_valid & ATTR_SIZE) - return afs_fs_setattr_size(server, key, vnode, attr, - async); + return afs_fs_setattr_size(fc, attr); _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - call = afs_alloc_flat_call(&afs_RXFSStoreStatus, + call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, (4 + 6) * 4, (21 + 6) * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); - call->operation_ID = FSSTORESTATUS; + call->key = fc->key; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -1482,7 +1427,9 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1510,7 +1457,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); + xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]); call->offset = 0; call->unmarshall++; @@ -1531,13 +1478,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 3: _debug("extract volname"); if (call->count > 0) { - ret = afs_extract_data(call, call->reply3, + ret = afs_extract_data(call, call->reply[2], call->count, true); if (ret < 0) return ret; } - p = call->reply3; + p = call->reply[2]; p[call->count] = 0; _debug("volname '%s'", p); @@ -1578,13 +1525,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 6: _debug("extract offline"); if (call->count > 0) { - ret = afs_extract_data(call, call->reply3, + ret = afs_extract_data(call, call->reply[2], call->count, true); if (ret < 0) return ret; } - p = call->reply3; + p = call->reply[2]; p[call->count] = 0; _debug("offline '%s'", p); @@ -1625,13 +1572,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 9: _debug("extract motd"); if (call->count > 0) { - ret = afs_extract_data(call, call->reply3, + ret = afs_extract_data(call, call->reply[2], call->count, true); if (ret < 0) return ret; } - p = call->reply3; + p = call->reply[2]; p[call->count] = 0; _debug("motd '%s'", p); @@ -1662,8 +1609,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) */ static void afs_get_volume_status_call_destructor(struct afs_call *call) { - kfree(call->reply3); - call->reply3 = NULL; + kfree(call->reply[2]); + call->reply[2] = NULL; afs_flat_call_destructor(call); } @@ -1672,21 +1619,20 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call) */ static const struct afs_call_type afs_RXFSGetVolumeStatus = { .name = "FS.GetVolumeStatus", + .op = afs_FS_GetVolumeStatus, .deliver = afs_deliver_fs_get_volume_status, - .abort_to_error = afs_abort_to_error, .destructor = afs_get_volume_status_call_destructor, }; /* * fetch the status of a volume */ -int afs_fs_get_volume_status(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - struct afs_volume_status *vs, - bool async) +int afs_fs_get_volume_status(struct afs_fs_cursor *fc, + struct afs_volume_status *vs) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; void *tmpbuf; @@ -1696,25 +1642,25 @@ int afs_fs_get_volume_status(struct afs_server *server, if (!tmpbuf) return -ENOMEM; - call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4); if (!call) { kfree(tmpbuf); return -ENOMEM; } - call->key = key; - call->reply = vnode; - call->reply2 = vs; - call->reply3 = tmpbuf; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = vs; + call->reply[2] = tmpbuf; /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSGETVOLUMESTATUS); bp[1] = htonl(vnode->fid.vid); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1733,7 +1679,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - /* xdr_decode_AFSVolSync(&bp, call->replyX); */ + /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); return 0; @@ -1744,8 +1690,8 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) */ static const struct afs_call_type afs_RXFSSetLock = { .name = "FS.SetLock", + .op = afs_FS_SetLock, .deliver = afs_deliver_fs_xxxx_lock, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1754,8 +1700,8 @@ static const struct afs_call_type afs_RXFSSetLock = { */ static const struct afs_call_type afs_RXFSExtendLock = { .name = "FS.ExtendLock", + .op = afs_FS_ExtendLock, .deliver = afs_deliver_fs_xxxx_lock, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; @@ -1764,33 +1710,29 @@ static const struct afs_call_type afs_RXFSExtendLock = { */ static const struct afs_call_type afs_RXFSReleaseLock = { .name = "FS.ReleaseLock", + .op = afs_FS_ReleaseLock, .deliver = afs_deliver_fs_xxxx_lock, - .abort_to_error = afs_abort_to_error, .destructor = afs_flat_call_destructor, }; /* - * get a lock on a file + * Set a lock on a file */ -int afs_fs_set_lock(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - afs_lock_type_t type, - bool async) +int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -1800,30 +1742,29 @@ int afs_fs_set_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.unique); *bp++ = htonl(type); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * extend a lock on a file */ -int afs_fs_extend_lock(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - bool async) +int afs_fs_extend_lock(struct afs_fs_cursor *fc) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -1832,30 +1773,29 @@ int afs_fs_extend_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * release a lock on a file */ -int afs_fs_release_lock(struct afs_server *server, - struct key *key, - struct afs_vnode *vnode, - bool async) +int afs_fs_release_lock(struct afs_fs_cursor *fc) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4); + call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); if (!call) return -ENOMEM; - call->key = key; - call->reply = vnode; - call->service_id = FS_SERVICE; - call->port = htons(AFS_FS_PORT); + call->key = fc->key; + call->reply[0] = vnode; /* marshall the parameters */ bp = call->request; @@ -1864,5 +1804,145 @@ int afs_fs_release_lock(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - return afs_make_call(&server->addr, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.GiveUpAllCallBacks operation. + */ +static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call) +{ + return afs_transfer_reply(call); +} + +/* + * FS.GiveUpAllCallBacks operation type + */ +static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { + .name = "FS.GiveUpAllCallBacks", + .op = afs_FS_GiveUpAllCallBacks, + .deliver = afs_deliver_fs_give_up_all_callbacks, + .destructor = afs_flat_call_destructor, +}; + +/* + * Flush all the callbacks we have on a server. + */ +int afs_fs_give_up_all_callbacks(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0); + if (!call) + return -ENOMEM; + + call->key = key; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGIVEUPALLCALLBACKS); + + /* Can't take a ref on server */ + return afs_make_call(ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.GetCapabilities operation. + */ +static int afs_deliver_fs_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, &call->tmp, + 1 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + + call->count = count; + call->count2 = count; + call->offset = 0; + call->unmarshall++; + + /* Extract capabilities words */ + case 2: + count = min(call->count, 16U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 16); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXFSGetCapabilities = { + .name = "FS.GetCapabilities", + .op = afs_FS_GetCapabilities, + .deliver = afs_deliver_fs_get_capabilities, + .destructor = afs_flat_call_destructor, +}; + +/* + * Probe a fileserver for the capabilities that it supports. This can + * return up to 196 words. + */ +int afs_fs_get_capabilities(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4); + if (!call) + return -ENOMEM; + + call->key = key; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGETCAPABILITIES); + + /* Can't take a ref on server */ + trace_afs_make_fs_call(call, NULL); + return afs_make_call(ac, call, GFP_NOFS, false); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 342316a9e3e0..3415eb7484f6 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -23,11 +23,6 @@ #include <linux/namei.h> #include "internal.h" -struct afs_iget_data { - struct afs_fid fid; - struct afs_volume *volume; /* volume on which resides */ -}; - static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, .listxattr = afs_listxattr, @@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = { static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) { struct inode *inode = AFS_VNODE_TO_I(vnode); + bool changed; _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", vnode->status.type, @@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) vnode->status.data_version, vnode->status.mode); + read_seqlock_excl(&vnode->cb_lock); + switch (vnode->status.type) { case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | vnode->status.mode; @@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) if ((vnode->status.mode & 0777) == 0644) { inode->i_flags |= S_AUTOMOUNT; - spin_lock(&vnode->lock); set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - spin_unlock(&vnode->lock); inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; @@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) break; default: printk("kAFS: AFS vnode with undefined type\n"); + read_sequnlock_excl(&vnode->cb_lock); return -EBADMSG; } -#ifdef CONFIG_AFS_FSCACHE - if (vnode->status.size != inode->i_size) - fscache_attr_changed(vnode->cache); -#endif + changed = (vnode->status.size != inode->i_size); set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; @@ -97,13 +91,49 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_generation = vnode->fid.unique; inode->i_version = vnode->status.data_version; inode->i_mapping->a_ops = &afs_fs_aops; + + read_sequnlock_excl(&vnode->cb_lock); + +#ifdef CONFIG_AFS_FSCACHE + if (changed) + fscache_attr_changed(vnode->cache); +#endif return 0; } /* + * Fetch file status from the volume. + */ +int afs_fetch_status(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s,{%x:%u.%u,S=%lx}", + vnode->volume->name, + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + vnode->flags); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_fetch_file_status(&fc, NULL); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* * iget5() comparator */ -static int afs_iget5_test(struct inode *inode, void *opaque) +int afs_iget5_test(struct inode *inode, void *opaque) { struct afs_iget_data *data = opaque; @@ -204,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, */ struct inode *afs_iget(struct super_block *sb, struct key *key, struct afs_fid *fid, struct afs_file_status *status, - struct afs_callback *cb) + struct afs_callback *cb, struct afs_cb_interest *cbi) { struct afs_iget_data data = { .fid = *fid }; struct afs_super_info *as; @@ -237,8 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - ret = afs_vnode_fetch_status(vnode, NULL, key); + ret = afs_fetch_status(vnode, key); if (ret < 0) goto bad_inode; } else { @@ -249,16 +278,17 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* it's a symlink we just created (the fileserver * didn't give us a callback) */ vnode->cb_version = 0; - vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = ktime_get_real_seconds(); + vnode->cb_expires_at = 0; } else { vnode->cb_version = cb->version; - vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + - ktime_get_real_seconds(); + vnode->cb_expires_at = cb->expiry; + vnode->cb_interest = afs_get_cb_interest(cbi); + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } + + vnode->cb_expires_at += ktime_get_real_seconds(); } /* set up caching before mapping the status, as map-status reads the @@ -320,25 +350,34 @@ void afs_zap_data(struct afs_vnode *vnode) */ int afs_validate(struct afs_vnode *vnode, struct key *key) { + time64_t now = ktime_get_real_seconds(); + bool valid = false; int ret; _enter("{v={%x:%u} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); - if (vnode->cb_promised && - !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && - !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && - !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < ktime_get_real_seconds() + 10) { - _debug("callback expired"); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - } else { - goto valid; + /* Quickly check the callback state. Ideally, we'd use read_seqbegin + * here, but we have no way to pass the net namespace to the RCU + * cleanup for the server record. + */ + read_seqlock_excl(&vnode->cb_lock); + + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { + vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; + } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) && + !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && + vnode->cb_expires_at - 10 > now) { + valid = true; } + } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + valid = true; } - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + read_sequnlock_excl(&vnode->cb_lock); + if (valid) goto valid; mutex_lock(&vnode->validate_lock); @@ -347,12 +386,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * a new promise - note that if the (parent) directory's metadata was * changed then the security may be different and we may no longer have * access */ - if (!vnode->cb_promised || - test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) { + if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, NULL, key); - if (ret < 0) + ret = afs_fetch_status(vnode, key); + if (ret < 0) { + if (ret == -ENOENT) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + ret = -ESTALE; + } goto error_unlock; + } _debug("new promise [fl=%lx]", vnode->flags); } @@ -367,7 +410,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); + clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags); mutex_unlock(&vnode->validate_lock); valid: _leave(" = 0"); @@ -386,10 +429,17 @@ int afs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct afs_vnode *vnode = AFS_FS_I(inode); + int seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); - generic_fillattr(inode, stat); + do { + read_seqbegin_or_lock(&vnode->cb_lock, &seq); + generic_fillattr(inode, stat); + } while (need_seqretry(&vnode->cb_lock, seq)); + + done_seqretry(&vnode->cb_lock, seq); return 0; } @@ -411,18 +461,14 @@ int afs_drop_inode(struct inode *inode) */ void afs_evict_inode(struct inode *inode) { - struct afs_permits *permits; struct afs_vnode *vnode; vnode = AFS_FS_I(inode); - _enter("{%x:%u.%d} v=%u x=%u t=%u }", + _enter("{%x:%u.%d}", vnode->fid.vid, vnode->fid.vnode, - vnode->fid.unique, - vnode->cb_version, - vnode->cb_expiry, - vnode->cb_type); + vnode->fid.unique); _debug("CLEAR INODE %p", inode); @@ -431,31 +477,24 @@ void afs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - afs_give_up_callback(vnode); - - if (vnode->server) { - spin_lock(&vnode->server->fs_lock); - rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes); - spin_unlock(&vnode->server->fs_lock); - afs_put_server(vnode->server); - vnode->server = NULL; + if (vnode->cb_interest) { + afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest); + vnode->cb_interest = NULL; } - ASSERT(list_empty(&vnode->writebacks)); - ASSERT(!vnode->cb_promised); + while (!list_empty(&vnode->wb_keys)) { + struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, + struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(vnode->cache, 0); vnode->cache = NULL; #endif - mutex_lock(&vnode->permits_lock); - permits = vnode->permits; - RCU_INIT_POINTER(vnode->permits, NULL); - mutex_unlock(&vnode->permits_lock); - if (permits) - call_rcu(&permits->rcu, afs_zap_permits); - + afs_put_permits(vnode->permit_cache); _leave(""); } @@ -464,6 +503,7 @@ void afs_evict_inode(struct inode *inode) */ int afs_setattr(struct dentry *dentry, struct iattr *attr) { + struct afs_fs_cursor fc; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; @@ -479,13 +519,11 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } /* flush any dirty data outstanding on a regular file */ - if (S_ISREG(vnode->vfs_inode.i_mode)) { + if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); - afs_writeback_all(vnode); - } if (attr->ia_valid & ATTR_FILE) { - key = attr->ia_file->private_data; + key = afs_file_key(attr->ia_file); } else { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { @@ -494,7 +532,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } } - ret = afs_vnode_setattr(vnode, key, attr); + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_setattr(&fc, attr); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + if (!(attr->ia_valid & ATTR_FILE)) key_put(key); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3f03f7888302..bd8dcee7e066 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -21,6 +21,7 @@ #include <linux/fscache.h> #include <linux/backing-dev.h> #include <linux/uuid.h> +#include <net/net_namespace.h> #include <net/af_rxrpc.h> #include "afs.h" @@ -31,16 +32,6 @@ struct pagevec; struct afs_call; -typedef enum { - AFS_VL_NEW, /* new, uninitialised record */ - AFS_VL_CREATING, /* creating record */ - AFS_VL_VALID, /* record is pending */ - AFS_VL_NO_VOLUME, /* no such volume available */ - AFS_VL_UPDATING, /* update in progress */ - AFS_VL_VOLUME_DELETED, /* volume was deleted */ - AFS_VL_UNCERTAIN, /* uncertain state (update failed) */ -} __attribute__((packed)) afs_vlocation_state_t; - struct afs_mount_params { bool rwpath; /* T if the parent should be considered R/W */ bool force; /* T to force cell type */ @@ -48,20 +39,43 @@ struct afs_mount_params { afs_voltype_t type; /* type of volume requested */ int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ + struct afs_net *net; /* Network namespace in effect */ struct afs_cell *cell; /* cell in which to find volume */ struct afs_volume *volume; /* volume record */ struct key *key; /* key to use for secure mounting */ }; +struct afs_iget_data { + struct afs_fid fid; + struct afs_volume *volume; /* volume on which resides */ +}; + enum afs_call_state { - AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ - AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ - AFS_CALL_AWAIT_OP_ID, /* awaiting op ID on incoming call */ - AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ - AFS_CALL_REPLYING, /* replying to incoming call */ - AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ - AFS_CALL_COMPLETE, /* Completed or failed */ + AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */ + AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */ + AFS_CALL_CL_PROC_REPLY, /* Client: rxrpc call complete; processing reply */ + AFS_CALL_SV_AWAIT_OP_ID, /* Server: Awaiting op ID */ + AFS_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */ + AFS_CALL_SV_REPLYING, /* Server: Replying */ + AFS_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */ + AFS_CALL_COMPLETE, /* Completed or failed */ }; + +/* + * List of server addresses. + */ +struct afs_addr_list { + struct rcu_head rcu; /* Must be first */ + refcount_t usage; + u32 version; /* Version */ + unsigned short nr_addrs; + unsigned short index; /* Address currently in use */ + unsigned short nr_ipv4; /* Number of IPv4 addresses */ + unsigned long probed; /* Mask of servers that have been probed */ + unsigned long yfs; /* Mask of servers that are YFS */ + struct sockaddr_rxrpc addrs[]; +}; + /* * a record of an in-progress RxRPC call */ @@ -72,25 +86,25 @@ struct afs_call { struct work_struct work; /* actual work processor */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ - struct afs_server *server; /* server affected by incoming CM call */ + struct afs_net *net; /* The network namespace */ + struct afs_server *cm_server; /* Server affected by incoming CM call */ + struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ - struct address_space *mapping; /* page set */ - struct afs_writeback *wb; /* writeback being performed */ + struct address_space *mapping; /* Pages being written from */ void *buffer; /* reply receive buffer */ - void *reply; /* reply buffer (first part) */ - void *reply2; /* reply buffer (second part) */ - void *reply3; /* reply buffer (third part) */ - void *reply4; /* reply buffer (fourth part) */ + void *reply[4]; /* Where to put the reply */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ size_t offset; /* offset into received data store */ atomic_t usage; enum afs_call_state state; + spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ + unsigned int cb_break; /* cb_break + cb_s_break before the call */ union { unsigned last_to; /* amount of mapping[last] */ unsigned count2; /* count used in unmarshalling */ @@ -100,9 +114,9 @@ struct afs_call { bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ + bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ - u16 service_id; /* RxRPC service ID to call */ - __be16 port; /* target UDP port */ + u16 service_id; /* Actual service ID (after upgrade) */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ @@ -111,15 +125,13 @@ struct afs_call { struct afs_call_type { const char *name; + unsigned int op; /* Really enum afs_fs_operation */ /* deliver request or reply data to an call * - returning an error will cause the call to be aborted */ int (*deliver)(struct afs_call *call); - /* map an abort code to an error number */ - int (*abort_to_error)(u32 abort_code); - /* clean up a call */ void (*destructor)(struct afs_call *call); @@ -128,6 +140,30 @@ struct afs_call_type { }; /* + * Key available for writeback on a file. + */ +struct afs_wb_key { + refcount_t usage; + struct key *key; + struct list_head vnode_link; /* Link in vnode->wb_keys */ +}; + +/* + * AFS open file information record. Pointed to by file->private_data. + */ +struct afs_file { + struct key *key; /* The key this file was opened with */ + struct afs_wb_key *wb; /* Writeback key record for this file */ +}; + +static inline struct key *afs_file_key(struct file *file) +{ + struct afs_file *af = file->private_data; + + return af->key; +} + +/* * Record of an outstanding read operation on a vnode. */ struct afs_read { @@ -143,38 +179,13 @@ struct afs_read { }; /* - * record of an outstanding writeback on a vnode - */ -struct afs_writeback { - struct list_head link; /* link in vnode->writebacks */ - struct work_struct writer; /* work item to perform the writeback */ - struct afs_vnode *vnode; /* vnode to which this write applies */ - struct key *key; /* owner of this write */ - wait_queue_head_t waitq; /* completion and ready wait queue */ - pgoff_t first; /* first page in batch */ - pgoff_t point; /* last page in current store op */ - pgoff_t last; /* last page in batch (inclusive) */ - unsigned offset_first; /* offset into first page of start of write */ - unsigned to_last; /* offset into last page of end of write */ - int num_conflicts; /* count of conflicting writes in list */ - int usage; - bool conflicts; /* T if has dependent conflicts */ - enum { - AFS_WBACK_SYNCING, /* synchronisation being performed */ - AFS_WBACK_PENDING, /* write pending */ - AFS_WBACK_CONFLICTING, /* conflicting writes posted */ - AFS_WBACK_WRITING, /* writing back */ - AFS_WBACK_COMPLETE /* the writeback record has been unlinked */ - } state __attribute__((packed)); -}; - -/* * AFS superblock private data * - there's one superblock per volume */ struct afs_super_info { + struct afs_net *net; /* Network namespace */ + struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ - char rwparent; /* T if parent is R/W AFS volume */ }; static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) @@ -185,149 +196,238 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) extern struct file_system_type afs_fs_type; /* - * entry in the cached cell catalogue + * AFS network namespace record. */ -struct afs_cache_cell { - char name[AFS_MAXCELLNAME]; /* cell name (padded with NULs) */ - struct in_addr vl_servers[15]; /* cached cell VL servers */ +struct afs_net { + struct afs_uuid uuid; + bool live; /* F if this namespace is being removed */ + + /* AF_RXRPC I/O stuff */ + struct socket *socket; + struct afs_call *spare_incoming_call; + struct work_struct charge_preallocation_work; + struct mutex socket_mutex; + atomic_t nr_outstanding_calls; + atomic_t nr_superblocks; + + /* Cell database */ + struct rb_root cells; + struct afs_cell *ws_cell; + struct work_struct cells_manager; + struct timer_list cells_timer; + atomic_t cells_outstanding; + seqlock_t cells_lock; + + spinlock_t proc_cells_lock; + struct list_head proc_cells; + + /* Known servers. Theoretically each fileserver can only be in one + * cell, but in practice, people create aliases and subsets and there's + * no easy way to distinguish them. + */ + seqlock_t fs_lock; /* For fs_servers */ + struct rb_root fs_servers; /* afs_server (by server UUID or address) */ + struct list_head fs_updates; /* afs_server (by update_at) */ + struct hlist_head fs_proc; /* procfs servers list */ + + struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ + struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */ + seqlock_t fs_addr_lock; /* For fs_addresses[46] */ + + struct work_struct fs_manager; + struct timer_list fs_timer; + atomic_t servers_outstanding; + + /* File locking renewal management */ + struct mutex lock_manager_mutex; + + /* Misc */ + struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ +}; + +extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns + +enum afs_cell_state { + AFS_CELL_UNSET, + AFS_CELL_ACTIVATING, + AFS_CELL_ACTIVE, + AFS_CELL_DEACTIVATING, + AFS_CELL_INACTIVE, + AFS_CELL_FAILED, }; /* - * AFS cell record + * AFS cell record. + * + * This is a tricky concept to get right as it is possible to create aliases + * simply by pointing AFSDB/SRV records for two names at the same set of VL + * servers; it is also possible to do things like setting up two sets of VL + * servers, one of which provides a superset of the volumes provided by the + * other (for internal/external division, for example). + * + * Cells only exist in the sense that (a) a cell's name maps to a set of VL + * servers and (b) a cell's name is used by the client to select the key to use + * for authentication and encryption. The cell name is not typically used in + * the protocol. + * + * There is no easy way to determine if two cells are aliases or one is a + * subset of another. */ struct afs_cell { - atomic_t usage; - struct list_head link; /* main cell list link */ + union { + struct rcu_head rcu; + struct rb_node net_node; /* Node in net->cells */ + }; + struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ + struct work_struct manager; /* Manager for init/deinit/dns */ struct list_head proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - - /* server record management */ - rwlock_t servers_lock; /* active server list lock */ - struct list_head servers; /* active server list */ - - /* volume location record management */ - struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ - struct list_head vl_list; /* cell's active VL record list */ - spinlock_t vl_lock; /* vl_list lock */ - unsigned short vl_naddrs; /* number of VL servers in addr list */ - unsigned short vl_curr_svix; /* current server index */ - struct in_addr vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */ - - char name[0]; /* cell name - must go last */ + time64_t dns_expiry; /* Time AFSDB/SRV record expires */ + time64_t last_inactive; /* Time of last drop of usage count */ + atomic_t usage; + unsigned long flags; +#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */ +#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */ +#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */ +#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */ +#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */ + enum afs_cell_state state; + short error; + + /* Active fileserver interaction state. */ + struct list_head proc_volumes; /* procfs volume list */ + rwlock_t proc_lock; + + /* VL server list. */ + rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ + struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ + u8 name_len; /* Length of name */ + char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ }; /* - * entry in the cached volume location catalogue + * Cached VLDB entry. + * + * This is pointed to by cell->vldb_entries, indexed by name. */ -struct afs_cache_vlocation { - /* volume name (lowercase, padded with NULs) */ - uint8_t name[AFS_MAXVOLNAME + 1]; +struct afs_vldb_entry { + afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */ - uint8_t nservers; /* number of entries used in servers[] */ - uint8_t vidmask; /* voltype mask for vid[] */ - uint8_t srvtmask[8]; /* voltype masks for servers[] */ + unsigned long flags; +#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */ +#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */ +#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */ +#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */ +#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */ + + uuid_t fs_server[AFS_NMAXNSERVERS]; + u8 fs_mask[AFS_NMAXNSERVERS]; #define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ #define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ #define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */ - - afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */ - struct in_addr servers[8]; /* fileserver addresses */ - time_t rtime; /* last retrieval time */ + short error; + u8 nr_servers; /* Number of server records */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; /* - * volume -> vnode hash table entry + * Record of fileserver with which we're actively communicating. */ -struct afs_cache_vhash { - afs_voltype_t vtype; /* which volume variation */ - uint8_t hash_bucket; /* which hash bucket this represents */ -} __attribute__((packed)); +struct afs_server { + struct rcu_head rcu; + union { + uuid_t uuid; /* Server ID */ + struct afs_uuid _uuid; + }; -/* - * AFS volume location record - */ -struct afs_vlocation { + struct afs_addr_list __rcu *addresses; + struct rb_node uuid_rb; /* Link in net->servers */ + struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ + struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node proc_link; /* Link in net->fs_proc */ + struct afs_server *gc_next; /* Next server in manager's list */ + time64_t put_time; /* Time at which last put */ + time64_t update_at; /* Time at which to next update the record */ + unsigned long flags; +#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */ +#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */ +#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ +#define AFS_SERVER_FL_UPDATING 4 +#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ +#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ atomic_t usage; - time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct list_head link; /* link in cell volume location list */ - struct list_head grave; /* link in master graveyard list */ - struct list_head update; /* link in master update list */ - struct afs_cell *cell; /* cell to which volume belongs */ -#ifdef CONFIG_AFS_FSCACHE - struct fscache_cookie *cache; /* caching cookie */ -#endif - struct afs_cache_vlocation vldb; /* volume information DB record */ - struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ - wait_queue_head_t waitq; /* status change waitqueue */ - time64_t update_at; /* time at which record should be updated */ - spinlock_t lock; /* access lock */ - afs_vlocation_state_t state; /* volume location state */ - unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ - unsigned short upd_busy_cnt; /* EBUSY count during update */ - bool valid; /* T if valid */ + u32 addr_version; /* Address list version */ + + /* file service access */ + rwlock_t fs_lock; /* access lock */ + + /* callback promise management */ + struct list_head cb_interests; /* List of superblocks using this server */ + unsigned cb_s_break; /* Break-everything counter. */ + rwlock_t cb_break_lock; /* Volume finding lock */ }; /* - * AFS fileserver record + * Interest by a superblock on a server. */ -struct afs_server { - atomic_t usage; - time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct in_addr addr; /* server address */ - struct afs_cell *cell; /* cell in which server resides */ - struct list_head link; /* link in cell's server list */ - struct list_head grave; /* link in master graveyard list */ - struct rb_node master_rb; /* link in master by-addr tree */ - struct rw_semaphore sem; /* access lock */ +struct afs_cb_interest { + struct list_head cb_link; /* Link in server->cb_interests */ + struct afs_server *server; /* Server on which this interest resides */ + struct super_block *sb; /* Superblock on which inodes reside */ + afs_volid_t vid; /* Volume ID to match */ + refcount_t usage; +}; - /* file service access */ - struct rb_root fs_vnodes; /* vnodes backed by this server (ordered by FID) */ - unsigned long fs_act_jif; /* time at which last activity occurred */ - unsigned long fs_dead_jif; /* time at which no longer to be considered dead */ - spinlock_t fs_lock; /* access lock */ - int fs_state; /* 0 or reason FS currently marked dead (-errno) */ +/* + * Replaceable server list. + */ +struct afs_server_entry { + struct afs_server *server; + struct afs_cb_interest *cb_interest; +}; - /* callback promise management */ - struct rb_root cb_promises; /* vnode expiration list (ordered earliest first) */ - struct delayed_work cb_updater; /* callback updater */ - struct delayed_work cb_break_work; /* collected break dispatcher */ - wait_queue_head_t cb_break_waitq; /* space available in cb_break waitqueue */ - spinlock_t cb_lock; /* access lock */ - struct afs_callback cb_break[64]; /* ring of callbacks awaiting breaking */ - atomic_t cb_break_n; /* number of pending breaks */ - u8 cb_break_head; /* head of callback breaking ring */ - u8 cb_break_tail; /* tail of callback breaking ring */ +struct afs_server_list { + refcount_t usage; + unsigned short nr_servers; + unsigned short index; /* Server currently in use */ + unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ + unsigned int seq; /* Set to ->servers_seq when installed */ + struct afs_server_entry servers[]; }; /* - * AFS volume access record + * Live AFS volume management. */ struct afs_volume { + afs_volid_t vid; /* volume ID */ atomic_t usage; - struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ - struct afs_vlocation *vlocation; /* volume location */ + time64_t update_at; /* Time at which to next update */ + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ + struct list_head proc_link; /* Link in cell->vl_proc */ + unsigned long flags; +#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */ +#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */ +#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */ +#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */ +#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ +#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - afs_volid_t vid; /* volume ID */ + struct afs_server_list *servers; /* List of servers on which volume resides */ + rwlock_t servers_lock; /* Lock for ->servers */ + unsigned int servers_seq; /* Incremented each time ->servers changes */ + afs_voltype_t type; /* type of volume */ + short error; char type_force; /* force volume type (suppress R/O -> R/W) */ - unsigned short nservers; /* number of server slots filled */ - unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ - struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ - struct rw_semaphore server_sem; /* lock for accessing current server */ -}; - -/* - * vnode catalogue entry - */ -struct afs_cache_vnode { - afs_vnodeid_t vnode_id; /* vnode ID */ - unsigned vnode_unique; /* vnode ID uniquifier */ - afs_dataversion_t data_version; /* data version */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; /* @@ -337,24 +437,20 @@ struct afs_vnode { struct inode vfs_inode; /* the VFS's inode record */ struct afs_volume *volume; /* volume on which vnode resides */ - struct afs_server *server; /* server currently supplying this file */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - struct afs_permits *permits; /* cache of permits so far obtained */ - struct mutex permits_lock; /* lock for altering permits list */ + struct afs_permits *permit_cache; /* cache of permits so far obtained */ + struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ - wait_queue_head_t update_waitq; /* status fetch waitqueue */ - int update_cnt; /* number of outstanding ops that will update the - * status */ - spinlock_t writeback_lock; /* lock for writebacks */ + spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; -#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ +#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */ #define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */ -#define AFS_VNODE_MODIFIED 2 /* set if vnode's data modified */ +#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */ #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */ #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */ #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */ @@ -365,24 +461,21 @@ struct afs_vnode { #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ - long acl_order; /* ACL check count (callback break count) */ - - struct list_head writebacks; /* alterations in pagecache that need writing */ + struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ struct list_head granted_locks; /* locks granted on this file */ struct delayed_work lock_work; /* work to be done in locking */ struct key *unlock_key; /* key to be used in unlocking */ /* outstanding callback notification on this file */ - struct rb_node server_rb; /* link in server->fs_vnodes */ - struct rb_node cb_promise; /* link in server->cb_promises */ - struct work_struct cb_broken_work; /* work to be done on callback break */ - time64_t cb_expires; /* time at which callback expires */ - time64_t cb_expires_at; /* time used to order cb_promise */ + struct afs_cb_interest *cb_interest; /* Server on which this resides */ + unsigned int cb_s_break; /* Mass break counter on ->server */ + unsigned int cb_break; /* Break counter on vnode */ + seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ + + time64_t cb_expires_at; /* time at which callback expires */ unsigned cb_version; /* callback version */ - unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ - bool cb_promised; /* true if promise still holds */ }; /* @@ -390,16 +483,21 @@ struct afs_vnode { */ struct afs_permit { struct key *key; /* RxRPC ticket holding a security context */ - afs_access_t access_mask; /* access mask for this key */ + afs_access_t access; /* CallerAccess value for this key */ }; /* - * cache of security records from attempts to access a vnode + * Immutable cache of CallerAccess records from attempts to access vnodes. + * These may be shared between multiple vnodes. */ struct afs_permits { - struct rcu_head rcu; /* disposal procedure */ - int count; /* number of records */ - struct afs_permit permits[0]; /* the permits so far examined */ + struct rcu_head rcu; + struct hlist_node hash_node; /* Link in hash */ + unsigned long h; /* Hash value for this permit list */ + refcount_t usage; + unsigned short nr_permits; /* Number of records */ + bool invalidated; /* Invalidated due to key change */ + struct afs_permit permits[]; /* List of permits sorted by key pointer */ }; /* @@ -411,28 +509,78 @@ struct afs_interface { unsigned mtu; /* MTU of interface */ }; -struct afs_uuid { - __be32 time_low; /* low part of timestamp */ - __be16 time_mid; /* mid part of timestamp */ - __be16 time_hi_and_version; /* high part of timestamp and version */ - __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ - __u8 clock_seq_low; /* clock seq low */ - __u8 node[6]; /* spatially unique node ID (MAC addr) */ +/* + * Cursor for iterating over a server's address list. + */ +struct afs_addr_cursor { + struct afs_addr_list *alist; /* Current address list (pins ref) */ + struct sockaddr_rxrpc *addr; + u32 abort_code; + unsigned short start; /* Starting point in alist->addrs[] */ + unsigned short index; /* Wrapping offset from start to current addr */ + short error; + bool begun; /* T if we've begun iteration */ + bool responded; /* T if the current address responded */ +}; + +/* + * Cursor for iterating over a set of fileservers. + */ +struct afs_fs_cursor { + struct afs_addr_cursor ac; + struct afs_vnode *vnode; + struct afs_server_list *server_list; /* Current server list (pins ref) */ + struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ + struct key *key; /* Key for the server */ + unsigned int cb_break; /* cb_break + cb_s_break before the call */ + unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ + unsigned char start; /* Initial index in server list */ + unsigned char index; /* Number of servers tried beyond start */ + unsigned short flags; +#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */ +#define AFS_FS_CURSOR_VMOVED 0x0004 /* Set if seen VMOVED */ +#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ +#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ +#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ }; +#include <trace/events/afs.h> + /*****************************************************************************/ /* + * addr_list.c + */ +static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist) +{ + if (alist) + refcount_inc(&alist->usage); + return alist; +} +extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, + unsigned short, + unsigned short); +extern void afs_put_addrlist(struct afs_addr_list *); +extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char, + unsigned short, unsigned short); +extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *); +extern bool afs_iterate_addresses(struct afs_addr_cursor *); +extern int afs_end_cursor(struct afs_addr_cursor *); +extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); + +extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); +extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); + +/* * cache.c */ #ifdef CONFIG_AFS_FSCACHE extern struct fscache_netfs afs_cache_netfs; extern struct fscache_cookie_def afs_cell_cache_index_def; -extern struct fscache_cookie_def afs_vlocation_cache_index_def; extern struct fscache_cookie_def afs_volume_cache_index_def; extern struct fscache_cookie_def afs_vnode_cache_index_def; #else #define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) -#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL) #define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) #define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) #endif @@ -441,29 +589,31 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; * callback.c */ extern void afs_init_callback_state(struct afs_server *); -extern void afs_broken_callback_work(struct work_struct *); -extern void afs_break_callbacks(struct afs_server *, size_t, - struct afs_callback[]); -extern void afs_discard_callback_on_delete(struct afs_vnode *); -extern void afs_give_up_callback(struct afs_vnode *); -extern void afs_dispatch_give_up_callbacks(struct work_struct *); -extern void afs_flush_callback_breaks(struct afs_server *); -extern int __init afs_callback_update_init(void); -extern void afs_callback_update_kill(void); +extern void afs_break_callback(struct afs_vnode *); +extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]); + +extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); +extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); +extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); + +static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) +{ + refcount_inc(&cbi->usage); + return cbi; +} /* * cell.c */ -extern struct rw_semaphore afs_proc_cells_sem; -extern struct list_head afs_proc_cells; - -#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) -extern int afs_cell_init(char *); -extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool); -extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool); -extern struct afs_cell *afs_grab_cell(struct afs_cell *); -extern void afs_put_cell(struct afs_cell *); -extern void afs_cell_purge(void); +extern int afs_cell_init(struct afs_net *, const char *); +extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned); +extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, + const char *, bool); +extern struct afs_cell *afs_get_cell(struct afs_cell *); +extern void afs_put_cell(struct afs_net *, struct afs_cell *); +extern void afs_manage_cells(struct work_struct *); +extern void afs_cells_timer(struct timer_list *); +extern void __net_exit afs_cell_purge(struct afs_net *); /* * cmservice.c @@ -473,6 +623,7 @@ extern bool afs_cm_incoming_call(struct afs_call *); /* * dir.c */ +extern bool afs_dir_check_page(struct inode *, struct page *); extern const struct inode_operations afs_dir_inode_operations; extern const struct dentry_operations afs_fs_dentry_operations; extern const struct file_operations afs_dir_file_operations; @@ -484,15 +635,19 @@ extern const struct address_space_operations afs_fs_aops; extern const struct inode_operations afs_file_inode_operations; extern const struct file_operations afs_file_operations; +extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *); +extern void afs_put_wb_key(struct afs_wb_key *); extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); +extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); extern int afs_page_filler(void *, struct page *); extern void afs_put_read(struct afs_read *); /* * flock.c */ -extern void __exit afs_kill_lock_manager(void); +extern struct workqueue_struct *afs_lock_manager; + extern void afs_lock_work(struct work_struct *); extern void afs_lock_may_be_available(struct afs_vnode *); extern int afs_lock(struct file *, int, struct file_lock *); @@ -501,48 +656,40 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_server *, struct key *, - struct afs_vnode *, struct afs_volsync *, - bool); -extern int afs_fs_give_up_callbacks(struct afs_server *, bool); -extern int afs_fs_fetch_data(struct afs_server *, struct key *, - struct afs_vnode *, struct afs_read *, bool); -extern int afs_fs_create(struct afs_server *, struct key *, - struct afs_vnode *, const char *, umode_t, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, bool); -extern int afs_fs_remove(struct afs_server *, struct key *, - struct afs_vnode *, const char *, bool, bool); -extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *, - struct afs_vnode *, const char *, bool); -extern int afs_fs_symlink(struct afs_server *, struct key *, - struct afs_vnode *, const char *, const char *, - struct afs_fid *, struct afs_file_status *, bool); -extern int afs_fs_rename(struct afs_server *, struct key *, - struct afs_vnode *, const char *, - struct afs_vnode *, const char *, bool); -extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, - pgoff_t, pgoff_t, unsigned, unsigned, bool); -extern int afs_fs_setattr(struct afs_server *, struct key *, - struct afs_vnode *, struct iattr *, bool); -extern int afs_fs_get_volume_status(struct afs_server *, struct key *, - struct afs_vnode *, - struct afs_volume_status *, bool); -extern int afs_fs_set_lock(struct afs_server *, struct key *, - struct afs_vnode *, afs_lock_type_t, bool); -extern int afs_fs_extend_lock(struct afs_server *, struct key *, - struct afs_vnode *, bool); -extern int afs_fs_release_lock(struct afs_server *, struct key *, - struct afs_vnode *, bool); +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *); +extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); +extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); +extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool); +extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *); +extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, + struct afs_fid *, struct afs_file_status *); +extern int afs_fs_rename(struct afs_fs_cursor *, const char *, + struct afs_vnode *, const char *); +extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, + pgoff_t, pgoff_t, unsigned, unsigned); +extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); +extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); +extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); +extern int afs_fs_extend_lock(struct afs_fs_cursor *); +extern int afs_fs_release_lock(struct afs_fs_cursor *); +extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); +extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); /* * inode.c */ +extern int afs_fetch_status(struct afs_vnode *, struct key *); +extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_autocell(struct inode *, const char *, int, struct key *); extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, - struct afs_callback *); + struct afs_callback *, + struct afs_cb_interest *); extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -554,7 +701,35 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; -extern struct afs_uuid afs_uuid; + +static inline struct afs_net *afs_d2net(struct dentry *dentry) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_i2net(struct inode *inode) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_sock2net(struct sock *sk) +{ + return &__afs_net; +} + +static inline struct afs_net *afs_get_net(struct afs_net *net) +{ + return net; +} + +static inline void afs_put_net(struct afs_net *net) +{ +} /* * misc.c @@ -579,23 +754,33 @@ extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); /* * proc.c */ -extern int afs_proc_init(void); -extern void afs_proc_cleanup(void); -extern int afs_proc_cell_setup(struct afs_cell *); -extern void afs_proc_cell_remove(struct afs_cell *); +extern int __net_init afs_proc_init(struct afs_net *); +extern void __net_exit afs_proc_cleanup(struct afs_net *); +extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); +extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); + +/* + * rotate.c + */ +extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *, + struct key *); +extern bool afs_select_fileserver(struct afs_fs_cursor *); +extern bool afs_select_current_fileserver(struct afs_fs_cursor *); +extern int afs_end_vnode_operation(struct afs_fs_cursor *); /* * rxrpc.c */ -extern struct socket *afs_socket; -extern atomic_t afs_outstanding_calls; +extern struct workqueue_struct *afs_async_calls; -extern int afs_open_socket(void); -extern void afs_close_socket(void); +extern int __net_init afs_open_socket(struct afs_net *); +extern void __net_exit afs_close_socket(struct afs_net *); +extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); extern int afs_queue_call_work(struct afs_call *); -extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool); -extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, +extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool); +extern struct afs_call *afs_alloc_flat_call(struct afs_net *, + const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); @@ -607,117 +792,135 @@ static inline int afs_transfer_reply(struct afs_call *call) return afs_extract_data(call, call->buffer, call->reply_max, false); } +static inline bool afs_check_call_state(struct afs_call *call, + enum afs_call_state state) +{ + return READ_ONCE(call->state) == state; +} + +static inline bool afs_set_call_state(struct afs_call *call, + enum afs_call_state from, + enum afs_call_state to) +{ + bool ok = false; + + spin_lock_bh(&call->state_lock); + if (call->state == from) { + call->state = to; + trace_afs_call_state(call, from, to, 0, 0); + ok = true; + } + spin_unlock_bh(&call->state_lock); + return ok; +} + +static inline void afs_set_call_complete(struct afs_call *call, + int error, u32 remote_abort) +{ + enum afs_call_state state; + bool ok = false; + + spin_lock_bh(&call->state_lock); + state = call->state; + if (state != AFS_CALL_COMPLETE) { + call->abort_code = remote_abort; + call->error = error; + call->state = AFS_CALL_COMPLETE; + trace_afs_call_state(call, state, AFS_CALL_COMPLETE, + error, remote_abort); + ok = true; + } + spin_unlock_bh(&call->state_lock); + if (ok) + trace_afs_call_done(call); +} + /* * security.c */ +extern void afs_put_permits(struct afs_permits *); extern void afs_clear_permits(struct afs_vnode *); -extern void afs_cache_permit(struct afs_vnode *, struct key *, long); +extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern int afs_permission(struct inode *, int); +extern void __exit afs_clean_up_permit_cache(void); /* * server.c */ extern spinlock_t afs_server_peer_lock; -#define afs_get_server(S) \ -do { \ - _debug("GET SERVER %d", atomic_read(&(S)->usage)); \ - atomic_inc(&(S)->usage); \ -} while(0) +static inline struct afs_server *afs_get_server(struct afs_server *server) +{ + atomic_inc(&server->usage); + return server; +} -extern struct afs_server *afs_lookup_server(struct afs_cell *, - const struct in_addr *); -extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); -extern void afs_put_server(struct afs_server *); -extern void __exit afs_purge_servers(void); +extern struct afs_server *afs_find_server(struct afs_net *, + const struct sockaddr_rxrpc *); +extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); +extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); +extern void afs_put_server(struct afs_net *, struct afs_server *); +extern void afs_manage_servers(struct work_struct *); +extern void afs_servers_timer(struct timer_list *); +extern void __net_exit afs_purge_servers(struct afs_net *); +extern bool afs_probe_fileserver(struct afs_fs_cursor *); +extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); /* - * super.c + * server_list.c */ -extern int afs_fs_init(void); -extern void afs_fs_exit(void); +static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist) +{ + refcount_inc(&slist->usage); + return slist; +} -/* - * vlclient.c - */ -extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, - const char *, struct afs_cache_vlocation *, - bool); -extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *, - afs_volid_t, afs_voltype_t, - struct afs_cache_vlocation *, bool); +extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *); +extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *, + struct afs_vldb_entry *, + u8); +extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *); /* - * vlocation.c + * super.c */ -#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0) - -extern int __init afs_vlocation_update_init(void); -extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *, - struct key *, - const char *, size_t); -extern void afs_put_vlocation(struct afs_vlocation *); -extern void afs_vlocation_purge(void); +extern int __init afs_fs_init(void); +extern void __exit afs_fs_exit(void); /* - * vnode.c + * vlclient.c */ -static inline struct afs_vnode *AFS_FS_I(struct inode *inode) -{ - return container_of(inode, struct afs_vnode, vfs_inode); -} - -static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) -{ - return &vnode->vfs_inode; -} - -extern void afs_vnode_finalise_status_update(struct afs_vnode *, - struct afs_server *); -extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *, - struct key *); -extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *, - struct afs_read *); -extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *, - umode_t, struct afs_fid *, struct afs_file_status *, - struct afs_callback *, struct afs_server **); -extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *, - bool); -extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *, - const char *); -extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *, - const char *, struct afs_fid *, - struct afs_file_status *, struct afs_server **); -extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, - struct key *, const char *, const char *); -extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, - unsigned, unsigned); -extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); -extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, - struct afs_volume_status *); -extern int afs_vnode_set_lock(struct afs_vnode *, struct key *, - afs_lock_type_t); -extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *); -extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, + struct afs_addr_cursor *, + struct key *, const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, + struct key *, const uuid_t *); +extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); +extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *, + struct key *, const uuid_t *); /* * volume.c */ -#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) +static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) +{ + if (volume) + atomic_inc(&volume->usage); + return volume; +} -extern void afs_put_volume(struct afs_volume *); -extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); -extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); -extern int afs_volume_release_fileserver(struct afs_vnode *, - struct afs_server *, int); +extern struct afs_volume *afs_create_volume(struct afs_mount_params *); +extern void afs_activate_volume(struct afs_volume *); +extern void afs_deactivate_volume(struct afs_volume *); +extern void afs_put_volume(struct afs_cell *, struct afs_volume *); +extern int afs_check_volume_status(struct afs_volume *, struct key *); /* * write.c */ extern int afs_set_page_dirty(struct page *); -extern void afs_put_writeback(struct afs_writeback *); extern int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); @@ -728,9 +931,11 @@ extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); -extern int afs_writeback_all(struct afs_vnode *); extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); +extern int afs_page_mkwrite(struct vm_fault *); +extern void afs_prune_wb_keys(struct afs_vnode *); +extern int afs_launder_page(struct page *); /* * xattr.c @@ -738,12 +943,42 @@ extern int afs_fsync(struct file *, loff_t, loff_t, int); extern const struct xattr_handler *afs_xattr_handlers[]; extern ssize_t afs_listxattr(struct dentry *, char *, size_t); + +/* + * Miscellaneous inline functions. + */ +static inline struct afs_vnode *AFS_FS_I(struct inode *inode) +{ + return container_of(inode, struct afs_vnode, vfs_inode); +} + +static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) +{ + return &vnode->vfs_inode; +} + +static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc, + struct afs_vnode *vnode, + unsigned int cb_break) +{ + if (fc->ac.error == 0) + afs_cache_permit(vnode, fc->key, cb_break); +} + +static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + if (fc->ac.error == -ENOENT) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + afs_break_callback(vnode); + } +} + + /*****************************************************************************/ /* * debug tracing */ -#include <trace/events/afs.h> - extern unsigned afs_debug; #define dbgprintk(FMT,...) \ diff --git a/fs/afs/main.c b/fs/afs/main.c index 9944770849da..15a02a05ff40 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -31,57 +31,112 @@ static char *rootcell; module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); -struct afs_uuid afs_uuid; struct workqueue_struct *afs_wq; +struct afs_net __afs_net; /* - * initialise the AFS client FS module + * Initialise an AFS network namespace record. */ -static int __init afs_init(void) +static int __net_init afs_net_init(struct afs_net *net) { int ret; - printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); + net->live = true; + generate_random_uuid((unsigned char *)&net->uuid); - generate_random_uuid((unsigned char *)&afs_uuid); + INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation); + mutex_init(&net->socket_mutex); - /* create workqueue */ - ret = -ENOMEM; - afs_wq = alloc_workqueue("afs", 0, 0); - if (!afs_wq) - return ret; + net->cells = RB_ROOT; + seqlock_init(&net->cells_lock); + INIT_WORK(&net->cells_manager, afs_manage_cells); + timer_setup(&net->cells_timer, afs_cells_timer, 0); - /* register the /proc stuff */ - ret = afs_proc_init(); - if (ret < 0) - goto error_proc; + spin_lock_init(&net->proc_cells_lock); + INIT_LIST_HEAD(&net->proc_cells); -#ifdef CONFIG_AFS_FSCACHE - /* we want to be able to cache */ - ret = fscache_register_netfs(&afs_cache_netfs); + seqlock_init(&net->fs_lock); + net->fs_servers = RB_ROOT; + INIT_LIST_HEAD(&net->fs_updates); + INIT_HLIST_HEAD(&net->fs_proc); + + INIT_HLIST_HEAD(&net->fs_addresses4); + INIT_HLIST_HEAD(&net->fs_addresses6); + seqlock_init(&net->fs_addr_lock); + + INIT_WORK(&net->fs_manager, afs_manage_servers); + timer_setup(&net->fs_timer, afs_servers_timer, 0); + + /* Register the /proc stuff */ + ret = afs_proc_init(net); if (ret < 0) - goto error_cache; -#endif + goto error_proc; - /* initialise the cell DB */ - ret = afs_cell_init(rootcell); + /* Initialise the cell DB */ + ret = afs_cell_init(net, rootcell); if (ret < 0) goto error_cell_init; - /* initialise the VL update process */ - ret = afs_vlocation_update_init(); + /* Create the RxRPC transport */ + ret = afs_open_socket(net); if (ret < 0) - goto error_vl_update_init; + goto error_open_socket; - /* initialise the callback update process */ - ret = afs_callback_update_init(); + return 0; + +error_open_socket: + net->live = false; + afs_cell_purge(net); + afs_purge_servers(net); +error_cell_init: + net->live = false; + afs_proc_cleanup(net); +error_proc: + net->live = false; + return ret; +} + +/* + * Clean up and destroy an AFS network namespace record. + */ +static void __net_exit afs_net_exit(struct afs_net *net) +{ + net->live = false; + afs_cell_purge(net); + afs_purge_servers(net); + afs_close_socket(net); + afs_proc_cleanup(net); +} + +/* + * initialise the AFS client FS module + */ +static int __init afs_init(void) +{ + int ret = -ENOMEM; + + printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); + + afs_wq = alloc_workqueue("afs", 0, 0); + if (!afs_wq) + goto error_afs_wq; + afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); + if (!afs_async_calls) + goto error_async; + afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); + if (!afs_lock_manager) + goto error_lockmgr; + +#ifdef CONFIG_AFS_FSCACHE + /* we want to be able to cache */ + ret = fscache_register_netfs(&afs_cache_netfs); if (ret < 0) - goto error_callback_update_init; + goto error_cache; +#endif - /* create the RxRPC transport */ - ret = afs_open_socket(); + ret = afs_net_init(&__afs_net); if (ret < 0) - goto error_open_socket; + goto error_net; /* register the filesystems */ ret = afs_fs_init(); @@ -91,21 +146,18 @@ static int __init afs_init(void) return ret; error_fs: - afs_close_socket(); -error_open_socket: - afs_callback_update_kill(); -error_callback_update_init: - afs_vlocation_purge(); -error_vl_update_init: - afs_cell_purge(); -error_cell_init: + afs_net_exit(&__afs_net); +error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); error_cache: #endif - afs_proc_cleanup(); -error_proc: + destroy_workqueue(afs_lock_manager); +error_lockmgr: + destroy_workqueue(afs_async_calls); +error_async: destroy_workqueue(afs_wq); +error_afs_wq: rcu_barrier(); printk(KERN_ERR "kAFS: failed to register: %d\n", ret); return ret; @@ -124,17 +176,14 @@ static void __exit afs_exit(void) printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); afs_fs_exit(); - afs_kill_lock_manager(); - afs_close_socket(); - afs_purge_servers(); - afs_callback_update_kill(); - afs_vlocation_purge(); - destroy_workqueue(afs_wq); - afs_cell_purge(); + afs_net_exit(&__afs_net); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif - afs_proc_cleanup(); + destroy_workqueue(afs_lock_manager); + destroy_workqueue(afs_async_calls); + destroy_workqueue(afs_wq); + afs_clean_up_permit_cache(); rcu_barrier(); } diff --git a/fs/afs/misc.c b/fs/afs/misc.c index c05f1f1c0d41..700a5fa7f4ec 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -21,12 +21,12 @@ int afs_abort_to_error(u32 abort_code) { switch (abort_code) { - /* low errno codes inserted into abort namespace */ + /* Low errno codes inserted into abort namespace */ case 13: return -EACCES; case 27: return -EFBIG; case 30: return -EROFS; - /* VICE "special error" codes; 101 - 111 */ + /* VICE "special error" codes; 101 - 111 */ case VSALVAGE: return -EIO; case VNOVNODE: return -ENOENT; case VNOVOL: return -ENOMEDIUM; @@ -39,7 +39,37 @@ int afs_abort_to_error(u32 abort_code) case VBUSY: return -EBUSY; case VMOVED: return -ENXIO; - /* Unified AFS error table; ET "uae" == 0x2f6df00 */ + /* Volume Location server errors */ + case AFSVL_IDEXIST: return -EEXIST; + case AFSVL_IO: return -EREMOTEIO; + case AFSVL_NAMEEXIST: return -EEXIST; + case AFSVL_CREATEFAIL: return -EREMOTEIO; + case AFSVL_NOENT: return -ENOMEDIUM; + case AFSVL_EMPTY: return -ENOMEDIUM; + case AFSVL_ENTDELETED: return -ENOMEDIUM; + case AFSVL_BADNAME: return -EINVAL; + case AFSVL_BADINDEX: return -EINVAL; + case AFSVL_BADVOLTYPE: return -EINVAL; + case AFSVL_BADSERVER: return -EINVAL; + case AFSVL_BADPARTITION: return -EINVAL; + case AFSVL_REPSFULL: return -EFBIG; + case AFSVL_NOREPSERVER: return -ENOENT; + case AFSVL_DUPREPSERVER: return -EEXIST; + case AFSVL_RWNOTFOUND: return -ENOENT; + case AFSVL_BADREFCOUNT: return -EINVAL; + case AFSVL_SIZEEXCEEDED: return -EINVAL; + case AFSVL_BADENTRY: return -EINVAL; + case AFSVL_BADVOLIDBUMP: return -EINVAL; + case AFSVL_IDALREADYHASHED: return -EINVAL; + case AFSVL_ENTRYLOCKED: return -EBUSY; + case AFSVL_BADVOLOPER: return -EBADRQC; + case AFSVL_BADRELLOCKTYPE: return -EINVAL; + case AFSVL_RERELEASE: return -EREMOTEIO; + case AFSVL_BADSERVERFLAG: return -EINVAL; + case AFSVL_PERM: return -EACCES; + case AFSVL_NOMEM: return -EREMOTEIO; + + /* Unified AFS error table; ET "uae" == 0x2f6df00 */ case 0x2f6df00: return -EPERM; case 0x2f6df01: return -ENOENT; case 0x2f6df04: return -EIO; @@ -68,7 +98,7 @@ int afs_abort_to_error(u32 abort_code) case 0x2f6df6c: return -ETIMEDOUT; case 0x2f6df78: return -EDQUOT; - /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ + /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ case RXKADINCONSISTENCY: return -EPROTO; case RXKADPACKETSHORT: return -EPROTO; case RXKADLEVELFAIL: return -EKEYREJECTED; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 35efb9a31dd7..4508dd54f789 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -17,8 +17,15 @@ #include <linux/uaccess.h> #include "internal.h" -static struct proc_dir_entry *proc_afs; +static inline struct afs_net *afs_proc2net(struct file *f) +{ + return &__afs_net; +} +static inline struct afs_net *afs_seq2net(struct seq_file *m) +{ + return &__afs_net; // TODO: use seq_file_net(m) +} static int afs_proc_cells_open(struct inode *inode, struct file *file); static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos); @@ -98,22 +105,22 @@ static const struct file_operations afs_proc_cell_vlservers_fops = { .release = seq_release, }; -static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); -static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, +static int afs_proc_servers_open(struct inode *inode, struct file *file); +static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); +static void *afs_proc_servers_next(struct seq_file *p, void *v, loff_t *pos); -static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); -static int afs_proc_cell_servers_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_cell_servers_ops = { - .start = afs_proc_cell_servers_start, - .next = afs_proc_cell_servers_next, - .stop = afs_proc_cell_servers_stop, - .show = afs_proc_cell_servers_show, +static void afs_proc_servers_stop(struct seq_file *p, void *v); +static int afs_proc_servers_show(struct seq_file *m, void *v); + +static const struct seq_operations afs_proc_servers_ops = { + .start = afs_proc_servers_start, + .next = afs_proc_servers_next, + .stop = afs_proc_servers_stop, + .show = afs_proc_servers_show, }; -static const struct file_operations afs_proc_cell_servers_fops = { - .open = afs_proc_cell_servers_open, +static const struct file_operations afs_proc_servers_fops = { + .open = afs_proc_servers_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -122,23 +129,24 @@ static const struct file_operations afs_proc_cell_servers_fops = { /* * initialise the /proc/fs/afs/ directory */ -int afs_proc_init(void) +int afs_proc_init(struct afs_net *net) { _enter(""); - proc_afs = proc_mkdir("fs/afs", NULL); - if (!proc_afs) + net->proc_afs = proc_mkdir("fs/afs", NULL); + if (!net->proc_afs) goto error_dir; - if (!proc_create("cells", 0644, proc_afs, &afs_proc_cells_fops) || - !proc_create("rootcell", 0644, proc_afs, &afs_proc_rootcell_fops)) + if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || + !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || + !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops)) goto error_tree; _leave(" = 0"); return 0; error_tree: - remove_proc_subtree("fs/afs", NULL); + proc_remove(net->proc_afs); error_dir: _leave(" = -ENOMEM"); return -ENOMEM; @@ -147,9 +155,10 @@ error_dir: /* * clean up the /proc/fs/afs/ directory */ -void afs_proc_cleanup(void) +void afs_proc_cleanup(struct afs_net *net) { - remove_proc_subtree("fs/afs", NULL); + proc_remove(net->proc_afs); + net->proc_afs = NULL; } /* @@ -166,7 +175,6 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) m = file->private_data; m->private = PDE_DATA(inode); - return 0; } @@ -176,25 +184,28 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) */ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) { - /* lock the list against modification */ - down_read(&afs_proc_cells_sem); - return seq_list_start_head(&afs_proc_cells, *_pos); + struct afs_net *net = afs_seq2net(m); + + rcu_read_lock(); + return seq_list_start_head(&net->proc_cells, *_pos); } /* * move to next cell in cells list */ -static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos) +static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &afs_proc_cells, pos); + struct afs_net *net = afs_seq2net(m); + + return seq_list_next(v, &net->proc_cells, pos); } /* * clean up after reading from the cells list */ -static void afs_proc_cells_stop(struct seq_file *p, void *v) +static void afs_proc_cells_stop(struct seq_file *m, void *v) { - up_read(&afs_proc_cells_sem); + rcu_read_unlock(); } /* @@ -203,16 +214,16 @@ static void afs_proc_cells_stop(struct seq_file *p, void *v) static int afs_proc_cells_show(struct seq_file *m, void *v) { struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); + struct afs_net *net = afs_seq2net(m); - if (v == &afs_proc_cells) { + if (v == &net->proc_cells) { /* display header on line 1 */ seq_puts(m, "USE NAME\n"); return 0; } /* display one cell per line on subsequent lines */ - seq_printf(m, "%3d %s\n", - atomic_read(&cell->usage), cell->name); + seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); return 0; } @@ -223,6 +234,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, size_t size, loff_t *_pos) { + struct afs_net *net = afs_proc2net(file); char *kbuf, *name, *args; int ret; @@ -264,13 +276,13 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, if (strcmp(kbuf, "add") == 0) { struct afs_cell *cell; - cell = afs_cell_create(name, strlen(name), args, false); + cell = afs_lookup_cell(net, name, strlen(name), args, true); if (IS_ERR(cell)) { ret = PTR_ERR(cell); goto done; } - afs_put_cell(cell); + set_bit(AFS_CELL_FL_NO_GC, &cell->flags); printk("kAFS: Added new cell '%s'\n", name); } else { goto inval; @@ -303,6 +315,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file, const char __user *buf, size_t size, loff_t *_pos) { + struct afs_net *net = afs_proc2net(file); char *kbuf, *s; int ret; @@ -322,7 +335,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file, /* determine command to perform */ _debug("rootcell=%s", kbuf); - ret = afs_cell_init(kbuf); + ret = afs_cell_init(net, kbuf); if (ret >= 0) ret = size; /* consume everything, always */ @@ -334,29 +347,27 @@ static ssize_t afs_proc_rootcell_write(struct file *file, /* * initialise /proc/fs/afs/<cell>/ */ -int afs_proc_cell_setup(struct afs_cell *cell) +int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) { struct proc_dir_entry *dir; - _enter("%p{%s}", cell, cell->name); + _enter("%p{%s},%p", cell, cell->name, net->proc_afs); - dir = proc_mkdir(cell->name, proc_afs); + dir = proc_mkdir(cell->name, net->proc_afs); if (!dir) goto error_dir; - if (!proc_create_data("servers", 0, dir, - &afs_proc_cell_servers_fops, cell) || - !proc_create_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_fops, cell) || + if (!proc_create_data("vlservers", 0, dir, + &afs_proc_cell_vlservers_fops, cell) || !proc_create_data("volumes", 0, dir, - &afs_proc_cell_volumes_fops, cell)) + &afs_proc_cell_volumes_fops, cell)) goto error_tree; _leave(" = 0"); return 0; error_tree: - remove_proc_subtree(cell->name, proc_afs); + remove_proc_subtree(cell->name, net->proc_afs); error_dir: _leave(" = -ENOMEM"); return -ENOMEM; @@ -365,11 +376,11 @@ error_dir: /* * remove /proc/fs/afs/<cell>/ */ -void afs_proc_cell_remove(struct afs_cell *cell) +void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell) { _enter(""); - remove_proc_subtree(cell->name, proc_afs); + remove_proc_subtree(cell->name, net->proc_afs); _leave(""); } @@ -407,9 +418,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) _enter("cell=%p pos=%Ld", cell, *_pos); - /* lock the list against modification */ - down_read(&cell->vl_sem); - return seq_list_start_head(&cell->vl_list, *_pos); + read_lock(&cell->proc_lock); + return seq_list_start_head(&cell->proc_volumes, *_pos); } /* @@ -421,7 +431,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, struct afs_cell *cell = p->private; _enter("cell=%p pos=%Ld", cell, *_pos); - return seq_list_next(v, &cell->vl_list, _pos); + return seq_list_next(v, &cell->proc_volumes, _pos); } /* @@ -431,17 +441,13 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) { struct afs_cell *cell = p->private; - up_read(&cell->vl_sem); + read_unlock(&cell->proc_lock); } -static const char afs_vlocation_states[][4] = { - [AFS_VL_NEW] = "New", - [AFS_VL_CREATING] = "Crt", - [AFS_VL_VALID] = "Val", - [AFS_VL_NO_VOLUME] = "NoV", - [AFS_VL_UPDATING] = "Upd", - [AFS_VL_VOLUME_DELETED] = "Del", - [AFS_VL_UNCERTAIN] = "Unc", +static const char afs_vol_types[3][3] = { + [AFSVL_RWVOL] = "RW", + [AFSVL_ROVOL] = "RO", + [AFSVL_BACKVOL] = "BK", }; /* @@ -450,23 +456,17 @@ static const char afs_vlocation_states[][4] = { static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { struct afs_cell *cell = m->private; - struct afs_vlocation *vlocation = - list_entry(v, struct afs_vlocation, link); + struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); - /* display header on line 1 */ - if (v == &cell->vl_list) { - seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); + /* Display header on line 1 */ + if (v == &cell->proc_volumes) { + seq_puts(m, "USE VID TY\n"); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%3d %s %08x %08x %08x %s\n", - atomic_read(&vlocation->usage), - afs_vlocation_states[vlocation->state], - vlocation->vldb.vid[0], - vlocation->vldb.vid[1], - vlocation->vldb.vid[2], - vlocation->vldb.name); + seq_printf(m, "%3d %08x %s\n", + atomic_read(&vol->usage), vol->vid, + afs_vol_types[vol->type]); return 0; } @@ -501,23 +501,23 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file) */ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) { + struct afs_addr_list *alist; struct afs_cell *cell = m->private; loff_t pos = *_pos; - _enter("cell=%p pos=%Ld", cell, *_pos); + rcu_read_lock(); - /* lock the list against modification */ - down_read(&cell->vl_sem); + alist = rcu_dereference(cell->vl_addrs); /* allow for the header line */ if (!pos) return (void *) 1; pos--; - if (pos >= cell->vl_naddrs) + if (!alist || pos >= alist->nr_addrs) return NULL; - return &cell->vl_addrs[pos]; + return alist->addrs + pos; } /* @@ -526,17 +526,18 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, loff_t *_pos) { + struct afs_addr_list *alist; struct afs_cell *cell = p->private; loff_t pos; - _enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos); + alist = rcu_dereference(cell->vl_addrs); pos = *_pos; (*_pos)++; - if (pos >= cell->vl_naddrs) + if (!alist || pos >= alist->nr_addrs) return NULL; - return &cell->vl_addrs[pos]; + return alist->addrs + pos; } /* @@ -544,9 +545,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, */ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) { - struct afs_cell *cell = p->private; - - up_read(&cell->vl_sem); + rcu_read_unlock(); } /* @@ -554,100 +553,76 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) */ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct in_addr *addr = v; + struct sockaddr_rxrpc *addr = v; /* display header on line 1 */ - if (v == (struct in_addr *) 1) { + if (v == (void *)1) { seq_puts(m, "ADDRESS\n"); return 0; } /* display one cell per line on subsequent lines */ - seq_printf(m, "%pI4\n", &addr->s_addr); + seq_printf(m, "%pISp\n", &addr->transport); return 0; } /* - * open "/proc/fs/afs/<cell>/servers" which provides a summary of active + * open "/proc/fs/afs/servers" which provides a summary of active * servers */ -static int afs_proc_cell_servers_open(struct inode *inode, struct file *file) +static int afs_proc_servers_open(struct inode *inode, struct file *file) { - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_servers_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = cell; - return 0; + return seq_open(file, &afs_proc_servers_ops); } /* - * set up the iterator to start reading from the cells list and return the - * first item + * Set up the iterator to start reading from the server list and return the + * first item. */ -static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) - __acquires(m->private->servers_lock) +static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) { - struct afs_cell *cell = m->private; - - _enter("cell=%p pos=%Ld", cell, *_pos); + struct afs_net *net = afs_seq2net(m); - /* lock the list against modification */ - read_lock(&cell->servers_lock); - return seq_list_start_head(&cell->servers, *_pos); + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->fs_proc, *_pos); } /* * move to next cell in cells list */ -static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, - loff_t *_pos) +static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = p->private; + struct afs_net *net = afs_seq2net(m); - _enter("cell=%p pos=%Ld", cell, *_pos); - return seq_list_next(v, &cell->servers, _pos); + return seq_hlist_next_rcu(v, &net->fs_proc, _pos); } /* * clean up after reading from the cells list */ -static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) - __releases(p->private->servers_lock) +static void afs_proc_servers_stop(struct seq_file *p, void *v) { - struct afs_cell *cell = p->private; - - read_unlock(&cell->servers_lock); + rcu_read_unlock(); } /* * display a header line followed by a load of volume lines */ -static int afs_proc_cell_servers_show(struct seq_file *m, void *v) +static int afs_proc_servers_show(struct seq_file *m, void *v) { - struct afs_cell *cell = m->private; - struct afs_server *server = list_entry(v, struct afs_server, link); - char ipaddr[20]; + struct afs_server *server; + struct afs_addr_list *alist; - /* display header on line 1 */ - if (v == &cell->servers) { - seq_puts(m, "USE ADDR STATE\n"); + if (v == SEQ_START_TOKEN) { + seq_puts(m, "UUID USE ADDR\n"); return 0; } - /* display one cell per line on subsequent lines */ - sprintf(ipaddr, "%pI4", &server->addr); - seq_printf(m, "%3d %-15.15s %5d\n", - atomic_read(&server->usage), ipaddr, server->fs_state); - + server = list_entry(v, struct afs_server, proc_link); + alist = rcu_dereference(server->addresses); + seq_printf(m, "%pU %3d %pISp\n", + &server->uuid, + atomic_read(&server->usage), + &alist->addrs[alist->index].transport); return 0; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c new file mode 100644 index 000000000000..e728ca1776c9 --- /dev/null +++ b/fs/afs/rotate.c @@ -0,0 +1,715 @@ +/* Handle fileserver selection and rotation. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/sched/signal.h> +#include "internal.h" +#include "afs_fs.h" + +/* + * Initialise a filesystem server cursor for iterating over FS servers. + */ +void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + memset(fc, 0, sizeof(*fc)); +} + +/* + * Begin an operation on the fileserver. + * + * Fileserver operations are serialised on the server by vnode, so we serialise + * them here also using the io_lock. + */ +bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + struct key *key) +{ + afs_init_fs_cursor(fc, vnode); + fc->vnode = vnode; + fc->key = key; + fc->ac.error = SHRT_MAX; + + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + fc->ac.error = -EINTR; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) || + test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags)) + fc->flags |= AFS_FS_CURSOR_CUR_ONLY; + return true; +} + +/* + * Begin iteration through a server list, starting with the vnode's last used + * server if possible, or the last recorded good server if not. + */ +static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + struct afs_cb_interest *cbi; + int i; + + read_lock(&vnode->volume->servers_lock); + fc->server_list = afs_get_serverlist(vnode->volume->servers); + read_unlock(&vnode->volume->servers_lock); + + cbi = vnode->cb_interest; + if (cbi) { + /* See if the vnode's preferred record is still available */ + for (i = 0; i < fc->server_list->nr_servers; i++) { + if (fc->server_list->servers[i].cb_interest == cbi) { + fc->start = i; + goto found_interest; + } + } + + /* If we have a lock outstanding on a server that's no longer + * serving this vnode, then we can't switch to another server + * and have to return an error. + */ + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + fc->ac.error = -ESTALE; + return false; + } + + /* Note that the callback promise is effectively broken */ + write_seqlock(&vnode->cb_lock); + ASSERTCMP(cbi, ==, vnode->cb_interest); + vnode->cb_interest = NULL; + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + vnode->cb_break++; + write_sequnlock(&vnode->cb_lock); + + afs_put_cb_interest(afs_v2net(vnode), cbi); + cbi = NULL; + } else { + fc->start = READ_ONCE(fc->server_list->index); + } + +found_interest: + fc->index = fc->start; + return true; +} + +/* + * Post volume busy note. + */ +static void afs_busy(struct afs_volume *volume, u32 abort_code) +{ + const char *m; + + switch (abort_code) { + case VOFFLINE: m = "offline"; break; + case VRESTARTING: m = "restarting"; break; + case VSALVAGING: m = "being salvaged"; break; + default: m = "busy"; break; + } + + pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); +} + +/* + * Sleep and retry the operation to the same fileserver. + */ +static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) +{ + msleep_interruptible(1000); + if (signal_pending(current)) { + fc->ac.error = -ERESTARTSYS; + return false; + } + + return true; +} + +/* + * Select the fileserver to use. May be called multiple times to rotate + * through the fileservers. + */ +bool afs_select_fileserver(struct afs_fs_cursor *fc) +{ + struct afs_addr_list *alist; + struct afs_server *server; + struct afs_vnode *vnode = fc->vnode; + + _enter("%u/%u,%u/%u,%d,%d", + fc->index, fc->start, + fc->ac.index, fc->ac.start, + fc->ac.error, fc->ac.abort_code); + + if (fc->flags & AFS_FS_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + /* Evaluate the result of the previous operation, if there was one. */ + switch (fc->ac.error) { + case SHRT_MAX: + goto start; + + case 0: + default: + /* Success or local failure. Stop. */ + fc->flags |= AFS_FS_CURSOR_STOP; + _leave(" = f [okay/local %d]", fc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (fc->ac.abort_code) { + case VNOVOL: + /* This fileserver doesn't know about the volume. + * - May indicate that the VL is wrong - retry once and compare + * the results. + * - May indicate that the fileserver couldn't attach to the vol. + */ + if (fc->flags & AFS_FS_CURSOR_VNOVOL) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + + write_lock(&vnode->volume->servers_lock); + fc->server_list->vnovol_mask |= 1 << fc->index; + write_unlock(&vnode->volume->servers_lock); + + set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { + fc->ac.error = -ENOMEDIUM; + goto failed; + } + + /* If the server list didn't change, then assume that + * it's the fileserver having trouble. + */ + if (vnode->volume->servers == fc->server_list) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + + /* Try again */ + fc->flags |= AFS_FS_CURSOR_VNOVOL; + _leave(" = t [vnovol]"); + return true; + + case VSALVAGE: /* TODO: Should this return an error or iterate? */ + case VVOLEXISTS: + case VNOSERVICE: + case VONLINE: + case VDISKFULL: + case VOVERQUOTA: + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto next_server; + + case VOFFLINE: + if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { + afs_busy(vnode->volume, fc->ac.abort_code); + clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + } + if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { + fc->ac.error = -EADV; + goto failed; + } + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + fc->ac.error = -ESTALE; + goto failed; + } + goto busy; + + case VSALVAGING: + case VRESTARTING: + case VBUSY: + /* Retry after going round all the servers unless we + * have a file lock we need to maintain. + */ + if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { + fc->ac.error = -EBUSY; + goto failed; + } + if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { + afs_busy(vnode->volume, fc->ac.abort_code); + clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + } + busy: + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + if (!afs_sleep_and_retry(fc)) + goto failed; + + /* Retry with same server & address */ + _leave(" = t [vbusy]"); + return true; + } + + fc->flags |= AFS_FS_CURSOR_VBUSY; + goto next_server; + + case VMOVED: + /* The volume migrated to another server. We consider + * consider all locks and callbacks broken and request + * an update from the VLDB. + * + * We also limit the number of VMOVED hops we will + * honour, just in case someone sets up a loop. + */ + if (fc->flags & AFS_FS_CURSOR_VMOVED) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + fc->flags |= AFS_FS_CURSOR_VMOVED; + + set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); + set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + /* If the server list didn't change, then the VLDB is + * out of sync with the fileservers. This is hopefully + * a temporary condition, however, so we don't want to + * permanently block access to the file. + * + * TODO: Try other fileservers if we can. + * + * TODO: Retry a few times with sleeps. + */ + if (vnode->volume->servers == fc->server_list) { + fc->ac.error = -ENOMEDIUM; + goto failed; + } + + goto restart_from_beginning; + + default: + clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto failed; + } + + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn"); + goto iterate_address; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&fc->ac); + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + afs_put_serverlist(afs_v2net(vnode), fc->server_list); + fc->server_list = NULL; +start: + _debug("start"); + /* See if we need to do an update of the volume record. Note that the + * volume may have moved or even have been deleted. + */ + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + if (!afs_start_fs_iteration(fc, vnode)) + goto failed; + goto use_server; + +next_server: + _debug("next"); + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + fc->index++; + if (fc->index >= fc->server_list->nr_servers) + fc->index = 0; + if (fc->index != fc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (fc->flags & AFS_FS_CURSOR_VBUSY) + goto restart_from_beginning; + + fc->ac.error = -EDESTADDRREQ; + goto failed; + +use_server: + _debug("use"); + /* We're starting on a different fileserver from the list. We need to + * check it, create a callback intercept, find its address list and + * probe its capabilities before we use it. + */ + ASSERTCMP(fc->ac.alist, ==, NULL); + server = fc->server_list->servers[fc->index].server; + + if (!afs_check_server_record(fc, server)) + goto failed; + + _debug("USING SERVER: %pU", &server->uuid); + + /* Make sure we've got a callback interest record for this server. We + * have to link it in before we send the request as we can be sent a + * break request before we've finished decoding the reply and + * installing the vnode. + */ + fc->ac.error = afs_register_server_cb_interest( + vnode, &fc->server_list->servers[fc->index]); + if (fc->ac.error < 0) + goto failed; + + fc->cbi = afs_get_cb_interest(vnode->cb_interest); + + read_lock(&server->fs_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(alist); + read_unlock(&server->fs_lock); + + + /* Probe the current fileserver if we haven't done so yet. */ + if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { + fc->ac.alist = afs_get_addrlist(alist); + + if (!afs_probe_fileserver(fc)) + goto failed; + } + + if (!fc->ac.alist) + fc->ac.alist = alist; + else + afs_put_addrlist(alist); + + fc->ac.addr = NULL; + fc->ac.start = READ_ONCE(alist->index); + fc->ac.index = fc->ac.start; + fc->ac.error = 0; + fc->ac.begun = false; + goto iterate_address; + +iterate_address: + ASSERT(fc->ac.alist); + _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (afs_iterate_addresses(&fc->ac)) { + _leave(" = t"); + return true; + } + + afs_end_cursor(&fc->ac); + goto next_server; + +failed: + fc->flags |= AFS_FS_CURSOR_STOP; + _leave(" = f [failed %d]", fc->ac.error); + return false; +} + +/* + * Select the same fileserver we used for a vnode before and only that + * fileserver. We use this when we have a lock on that file, which is backed + * only by the fileserver we obtained it from. + */ +bool afs_select_current_fileserver(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_cb_interest *cbi = vnode->cb_interest; + struct afs_addr_list *alist; + + _enter(""); + + if (!cbi) { + fc->ac.error = -ESTALE; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + read_lock(&cbi->server->fs_lock); + alist = afs_get_addrlist(cbi->server->addresses); + read_unlock(&cbi->server->fs_lock); + if (!alist) { + fc->ac.error = -ESTALE; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + fc->ac.alist = alist; + fc->ac.error = 0; + return true; +} + +/* + * Tidy up a filesystem cursor and unlock the vnode. + */ +int afs_end_vnode_operation(struct afs_fs_cursor *fc) +{ + struct afs_net *net = afs_v2net(fc->vnode); + int ret; + + mutex_unlock(&fc->vnode->io_lock); + + afs_end_cursor(&fc->ac); + afs_put_cb_interest(net, fc->cbi); + afs_put_serverlist(net, fc->server_list); + + ret = fc->ac.error; + if (ret == -ECONNABORTED) + afs_abort_to_error(fc->ac.abort_code); + + return fc->ac.error; +} + +#if 0 +/* + * Set a filesystem server cursor for using a specific FS server. + */ +int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + afs_init_fs_cursor(fc, vnode); + + read_seqlock_excl(&vnode->cb_lock); + if (vnode->cb_interest) { + if (vnode->cb_interest->server->fs_state == 0) + fc->server = afs_get_server(vnode->cb_interest->server); + else + fc->ac.error = vnode->cb_interest->server->fs_state; + } else { + fc->ac.error = -ESTALE; + } + read_sequnlock_excl(&vnode->cb_lock); + + return fc->ac.error; +} + +/* + * pick a server to use to try accessing this volume + * - returns with an elevated usage count on the server chosen + */ +bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode) +{ + struct afs_volume *volume = vnode->volume; + struct afs_server *server; + int ret, state, loop; + + _enter("%s", volume->vlocation->vldb.name); + + /* stick with the server we're already using if we can */ + if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { + fc->server = afs_get_server(vnode->cb_interest->server); + goto set_server; + } + + down_read(&volume->server_sem); + + /* handle the no-server case */ + if (volume->nservers == 0) { + fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE; + up_read(&volume->server_sem); + _leave(" = f [no servers %d]", fc->ac.error); + return false; + } + + /* basically, just search the list for the first live server and use + * that */ + ret = 0; + for (loop = 0; loop < volume->nservers; loop++) { + server = volume->servers[loop]; + state = server->fs_state; + + _debug("consider %d [%d]", loop, state); + + switch (state) { + case 0: + goto picked_server; + + case -ENETUNREACH: + if (ret == 0) + ret = state; + break; + + case -EHOSTUNREACH: + if (ret == 0 || + ret == -ENETUNREACH) + ret = state; + break; + + case -ECONNREFUSED: + if (ret == 0 || + ret == -ENETUNREACH || + ret == -EHOSTUNREACH) + ret = state; + break; + + default: + case -EREMOTEIO: + if (ret == 0 || + ret == -ENETUNREACH || + ret == -EHOSTUNREACH || + ret == -ECONNREFUSED) + ret = state; + break; + } + } + +error: + fc->ac.error = ret; + + /* no available servers + * - TODO: handle the no active servers case better + */ + up_read(&volume->server_sem); + _leave(" = f [%d]", fc->ac.error); + return false; + +picked_server: + /* Found an apparently healthy server. We need to register an interest + * in receiving callbacks before we talk to it. + */ + ret = afs_register_server_cb_interest(vnode, + &volume->cb_interests[loop], server); + if (ret < 0) + goto error; + + fc->server = afs_get_server(server); + up_read(&volume->server_sem); +set_server: + fc->ac.alist = afs_get_addrlist(fc->server->addrs); + fc->ac.addr = &fc->ac.alist->addrs[0]; + _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport); + _leave(" = t (picked %pIS)", &fc->ac.addr->transport); + return true; +} + +/* + * release a server after use + * - releases the ref on the server struct that was acquired by picking + * - records result of using a particular server to access a volume + * - return true to try again, false if okay or to issue error + * - the caller must release the server struct if result was false + */ +bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + struct afs_volume *volume = vnode->volume; + struct afs_server *server = fc->server; + unsigned loop; + + _enter("%s,%pIS,%d", + volume->vlocation->vldb.name, &fc->ac.addr->transport, + fc->ac.error); + + switch (fc->ac.error) { + /* success */ + case 0: + server->fs_state = 0; + _leave(" = f"); + return false; + + /* the fileserver denied all knowledge of the volume */ + case -ENOMEDIUM: + down_write(&volume->server_sem); + + /* firstly, find where the server is in the active list (if it + * is) */ + for (loop = 0; loop < volume->nservers; loop++) + if (volume->servers[loop] == server) + goto present; + + /* no longer there - may have been discarded by another op */ + goto try_next_server_upw; + + present: + volume->nservers--; + memmove(&volume->servers[loop], + &volume->servers[loop + 1], + sizeof(volume->servers[loop]) * + (volume->nservers - loop)); + volume->servers[volume->nservers] = NULL; + afs_put_server(afs_v2net(vnode), server); + volume->rjservers++; + + if (volume->nservers > 0) + /* another server might acknowledge its existence */ + goto try_next_server_upw; + + /* handle the case where all the fileservers have rejected the + * volume + * - TODO: try asking the fileservers for volume information + * - TODO: contact the VL server again to see if the volume is + * no longer registered + */ + up_write(&volume->server_sem); + afs_put_server(afs_v2net(vnode), server); + fc->server = NULL; + _leave(" = f [completely rejected]"); + return false; + + /* problem reaching the server */ + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIME: + case -ETIMEDOUT: + case -EREMOTEIO: + /* mark the server as dead + * TODO: vary dead timeout depending on error + */ + spin_lock(&server->fs_lock); + if (!server->fs_state) { + server->fs_state = fc->ac.error; + printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error); + } + spin_unlock(&server->fs_lock); + goto try_next_server; + + /* miscellaneous error */ + default: + case -ENOMEM: + case -ENONET: + /* tell the caller to accept the result */ + afs_put_server(afs_v2net(vnode), server); + fc->server = NULL; + _leave(" = f [local failure]"); + return false; + } + + /* tell the caller to loop around and try the next server */ +try_next_server_upw: + up_write(&volume->server_sem); +try_next_server: + afs_put_server(afs_v2net(vnode), server); + _leave(" = t [try next server]"); + return true; +} + +/* + * Clean up a fileserver cursor. + */ +int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) +{ + afs_end_cursor(&fc->ac); + afs_put_server(net, fc->server); + return fc->ac.error; +} + +#endif diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bb1e2caa1720..ea1460b9b71a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -17,13 +17,10 @@ #include "internal.h" #include "afs_cm.h" -struct socket *afs_socket; /* my RxRPC socket */ -static struct workqueue_struct *afs_async_calls; -static struct afs_call *afs_spare_incoming_call; -atomic_t afs_outstanding_calls; +struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); -static int afs_wait_for_call_to_complete(struct afs_call *); +static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); @@ -34,24 +31,13 @@ static int afs_deliver_cm_op_id(struct afs_call *); static const struct afs_call_type afs_RXCMxxxx = { .name = "CB.xxxx", .deliver = afs_deliver_cm_op_id, - .abort_to_error = afs_abort_to_error, }; -static void afs_charge_preallocation(struct work_struct *); - -static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); - -static int afs_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} - /* * open an RxRPC socket and bind it to be a server for callback notifications * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT */ -int afs_open_socket(void) +int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; @@ -59,28 +45,26 @@ int afs_open_socket(void) _enter(""); - ret = -ENOMEM; - afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); - if (!afs_async_calls) - goto error_0; - - ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); if (ret < 0) goto error_1; socket->sk->sk_allocation = GFP_NOFS; /* bind the callback manager's address to make this a server socket */ + memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; srx.srx_service = CM_SERVICE; srx.transport_type = SOCK_DGRAM; - srx.transport_len = sizeof(srx.transport.sin); - srx.transport.sin.sin_family = AF_INET; - srx.transport.sin.sin_port = htons(AFS_CM_PORT); - memset(&srx.transport.sin.sin_addr, 0, - sizeof(srx.transport.sin.sin_addr)); + srx.transport_len = sizeof(srx.transport.sin6); + srx.transport.sin6.sin6_family = AF_INET6; + srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + if (ret == -EADDRINUSE) { + srx.transport.sin6.sin6_port = 0; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + } if (ret < 0) goto error_2; @@ -91,16 +75,14 @@ int afs_open_socket(void) if (ret < 0) goto error_2; - afs_socket = socket; - afs_charge_preallocation(NULL); + net->socket = socket; + afs_charge_preallocation(&net->charge_preallocation_work); _leave(" = 0"); return 0; error_2: sock_release(socket); error_1: - destroy_workqueue(afs_async_calls); -error_0: _leave(" = %d", ret); return ret; } @@ -108,36 +90,36 @@ error_0: /* * close the RxRPC socket AFS was using */ -void afs_close_socket(void) +void afs_close_socket(struct afs_net *net) { _enter(""); - kernel_listen(afs_socket, 0); + kernel_listen(net->socket, 0); flush_workqueue(afs_async_calls); - if (afs_spare_incoming_call) { - afs_put_call(afs_spare_incoming_call); - afs_spare_incoming_call = NULL; + if (net->spare_incoming_call) { + afs_put_call(net->spare_incoming_call); + net->spare_incoming_call = NULL; } - _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); - wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, + _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls)); + wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); - kernel_sock_shutdown(afs_socket, SHUT_RDWR); + kernel_sock_shutdown(net->socket, SHUT_RDWR); flush_workqueue(afs_async_calls); - sock_release(afs_socket); + sock_release(net->socket); _debug("dework"); - destroy_workqueue(afs_async_calls); _leave(""); } /* * Allocate a call. */ -static struct afs_call *afs_alloc_call(const struct afs_call_type *type, +static struct afs_call *afs_alloc_call(struct afs_net *net, + const struct afs_call_type *type, gfp_t gfp) { struct afs_call *call; @@ -148,11 +130,13 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type, return NULL; call->type = type; + call->net = net; atomic_set(&call->usage, 1); INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); + spin_lock_init(&call->state_lock); - o = atomic_inc_return(&afs_outstanding_calls); + o = atomic_inc_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_alloc, 1, o, __builtin_return_address(0)); return call; @@ -163,8 +147,9 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type, */ void afs_put_call(struct afs_call *call) { + struct afs_net *net = call->net; int n = atomic_dec_return(&call->usage); - int o = atomic_read(&afs_outstanding_calls); + int o = atomic_read(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_put, n + 1, o, __builtin_return_address(0)); @@ -175,20 +160,22 @@ void afs_put_call(struct afs_call *call) ASSERT(call->type->name != NULL); if (call->rxcall) { - rxrpc_kernel_end_call(afs_socket, call->rxcall); + rxrpc_kernel_end_call(net->socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) call->type->destructor(call); + afs_put_server(call->net, call->cm_server); + afs_put_cb_interest(call->net, call->cbi); kfree(call->request); kfree(call); - o = atomic_dec_return(&afs_outstanding_calls); + o = atomic_dec_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_free, 0, o, __builtin_return_address(0)); if (o == 0) - wake_up_atomic_t(&afs_outstanding_calls); + wake_up_atomic_t(&net->nr_outstanding_calls); } } @@ -200,7 +187,7 @@ int afs_queue_call_work(struct afs_call *call) int u = atomic_inc_return(&call->usage); trace_afs_call(call, afs_call_trace_work, u, - atomic_read(&afs_outstanding_calls), + atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); INIT_WORK(&call->work, call->type->work); @@ -213,12 +200,13 @@ int afs_queue_call_work(struct afs_call *call) /* * allocate a call with flat request and reply buffers */ -struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, +struct afs_call *afs_alloc_flat_call(struct afs_net *net, + const struct afs_call_type *type, size_t request_size, size_t reply_max) { struct afs_call *call; - call = afs_alloc_call(type, GFP_NOFS); + call = afs_alloc_call(net, type, GFP_NOFS); if (!call) goto nomem_call; @@ -236,6 +224,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, goto nomem_free; } + call->operation_ID = type->op; init_waitqueue_head(&call->waitq); return call; @@ -300,8 +289,7 @@ static void afs_notify_end_request_tx(struct sock *sock, { struct afs_call *call = (struct afs_call *)call_user_ID; - if (call->state == AFS_CALL_REQUESTING) - call->state = AFS_CALL_AWAIT_REPLY; + afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY); } /* @@ -319,11 +307,13 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) do { afs_load_bvec(call, msg, bv, first, last, offset); + trace_afs_send_pages(call, msg, first, last, offset); + offset = 0; bytes = msg->msg_iter.count; nr = msg->msg_iter.nr_segs; - ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg, + ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg, bytes, afs_notify_end_request_tx); for (loop = 0; loop < nr; loop++) put_page(bv[loop].bv_page); @@ -333,63 +323,62 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) first += nr; } while (first <= last); + trace_afs_sent_pages(call, call->first, last, first, ret); return ret; } /* * initiate a call */ -int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, - bool async) +long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, + gfp_t gfp, bool async) { - struct sockaddr_rxrpc srx; + struct sockaddr_rxrpc *srx = ac->addr; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; size_t offset; s64 tx_total_len; - u32 abort_code; int ret; - _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); + _enter(",{%pISp},", &srx->transport); ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); _debug("____MAKE %p{%s,%x} [%d]____", call, call->type->name, key_serial(call->key), - atomic_read(&afs_outstanding_calls)); + atomic_read(&call->net->nr_outstanding_calls)); call->async = async; - memset(&srx, 0, sizeof(srx)); - srx.srx_family = AF_RXRPC; - srx.srx_service = call->service_id; - srx.transport_type = SOCK_DGRAM; - srx.transport_len = sizeof(srx.transport.sin); - srx.transport.sin.sin_family = AF_INET; - srx.transport.sin.sin_port = call->port; - memcpy(&srx.transport.sin.sin_addr, addr, 4); - /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data * after the initial fixed part. */ tx_total_len = call->request_size; if (call->send_pages) { - tx_total_len += call->last_to - call->first_offset; - tx_total_len += (call->last - call->first) * PAGE_SIZE; + if (call->last == call->first) { + tx_total_len += call->last_to - call->first_offset; + } else { + /* It looks mathematically like you should be able to + * combine the following lines with the ones above, but + * unsigned arithmetic is fun when it wraps... + */ + tx_total_len += PAGE_SIZE - call->first_offset; + tx_total_len += call->last_to; + tx_total_len += (call->last - call->first - 1) * PAGE_SIZE; + } } /* create a call */ - rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, + rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, (unsigned long)call, tx_total_len, gfp, (async ? afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade); - call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); goto error_kill_call; @@ -409,14 +398,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_controllen = 0; msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); - /* We have to change the state *before* sending the last packet as - * rxrpc might give us the reply before it returns from sending the - * request. Further, if the send fails, we may already have been given - * a notification and may have collected it. - */ - if (!call->send_pages) - call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(afs_socket, rxcall, + ret = rxrpc_kernel_send_data(call->net->socket, rxcall, &msg, call->request_size, afs_notify_end_request_tx); if (ret < 0) @@ -433,22 +415,26 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, if (call->async) return -EINPROGRESS; - return afs_wait_for_call_to_complete(call); + return afs_wait_for_call_to_complete(call, ac); error_do_abort: call->state = AFS_CALL_COMPLETE; if (ret != -ECONNABORTED) { - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, - ret, "KSD"); + rxrpc_kernel_abort_call(call->net->socket, rxcall, + RX_USER_ABORT, ret, "KSD"); } else { - abort_code = 0; offset = 0; - rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, - false, &abort_code, &call->service_id); - ret = call->type->abort_to_error(abort_code); + rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, + 0, &offset, false, &call->abort_code, + &call->service_id); + ac->abort_code = call->abort_code; + ac->responded = true; } + call->error = ret; + trace_afs_call_done(call); error_kill_call: afs_put_call(call); + ac->error = ret; _leave(" = %d", ret); return ret; } @@ -458,88 +444,98 @@ error_kill_call: */ static void afs_deliver_to_call(struct afs_call *call) { - u32 abort_code; + enum afs_call_state state; + u32 abort_code, remote_abort = 0; int ret; _enter("%s", call->type->name); - while (call->state == AFS_CALL_AWAIT_REPLY || - call->state == AFS_CALL_AWAIT_OP_ID || - call->state == AFS_CALL_AWAIT_REQUEST || - call->state == AFS_CALL_AWAIT_ACK + while (state = READ_ONCE(call->state), + state == AFS_CALL_CL_AWAIT_REPLY || + state == AFS_CALL_SV_AWAIT_OP_ID || + state == AFS_CALL_SV_AWAIT_REQUEST || + state == AFS_CALL_SV_AWAIT_ACK ) { - if (call->state == AFS_CALL_AWAIT_ACK) { + if (state == AFS_CALL_SV_AWAIT_ACK) { size_t offset = 0; - ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + ret = rxrpc_kernel_recv_data(call->net->socket, + call->rxcall, NULL, 0, &offset, false, - &call->abort_code, + &remote_abort, &call->service_id); trace_afs_recv_data(call, 0, offset, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) return; - if (ret == 1 || ret < 0) { - call->state = AFS_CALL_COMPLETE; - goto done; + if (ret < 0 || ret == 1) { + if (ret == 1) + ret = 0; + goto call_complete; } return; } ret = call->type->deliver(call); + state = READ_ONCE(call->state); switch (ret) { case 0: - if (call->state == AFS_CALL_AWAIT_REPLY) - call->state = AFS_CALL_COMPLETE; + if (state == AFS_CALL_CL_PROC_REPLY) + goto call_complete; + ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); goto done; case -EINPROGRESS: case -EAGAIN: goto out; + case -EIO: case -ECONNABORTED: - goto call_complete; + ASSERTCMP(state, ==, AFS_CALL_COMPLETE); + goto done; case -ENOTCONN: abort_code = RX_CALL_DEAD; - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KNC"); - goto save_error; + goto local_abort; case -ENOTSUPP: abort_code = RXGEN_OPCODE; - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KIV"); - goto save_error; + goto local_abort; case -ENODATA: case -EBADMSG: case -EMSGSIZE: default: abort_code = RXGEN_CC_UNMARSHAL; - if (call->state != AFS_CALL_AWAIT_REPLY) + if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, -EBADMSG, "KUM"); - goto save_error; + goto local_abort; } } done: - if (call->state == AFS_CALL_COMPLETE && call->incoming) + if (state == AFS_CALL_COMPLETE && call->incoming) afs_put_call(call); out: _leave(""); return; -save_error: - call->error = ret; +local_abort: + abort_code = 0; call_complete: - call->state = AFS_CALL_COMPLETE; + afs_set_call_complete(call, ret, remote_abort); + state = AFS_CALL_COMPLETE; goto done; } /* * wait synchronously for a call to complete */ -static int afs_wait_for_call_to_complete(struct afs_call *call) +static long afs_wait_for_call_to_complete(struct afs_call *call, + struct afs_addr_cursor *ac) { signed long rtt2, timeout; - int ret; + long ret; u64 rtt; u32 life, last_life; @@ -547,30 +543,31 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) _enter(""); - rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall); + rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); rtt2 = nsecs_to_jiffies64(rtt) * 2; if (rtt2 < 2) rtt2 = 2; timeout = rtt2; - last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall); + last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); /* deliver any messages that are in the queue */ - if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + if (!afs_check_call_state(call, AFS_CALL_COMPLETE) && + call->need_attention) { call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } - if (call->state == AFS_CALL_COMPLETE) + if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - life = rxrpc_kernel_check_life(afs_socket, call->rxcall); + life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); if (timeout == 0 && life == last_life && signal_pending(current)) break; @@ -587,16 +584,34 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) __set_current_state(TASK_RUNNING); /* Kill off the call if it's still live. */ - if (call->state < AFS_CALL_COMPLETE) { + if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) { _debug("call interrupted"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI"); + if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + RX_USER_ABORT, -EINTR, "KWI")) + afs_set_call_complete(call, -EINTR, 0); + } + + spin_lock_bh(&call->state_lock); + ac->abort_code = call->abort_code; + ac->error = call->error; + spin_unlock_bh(&call->state_lock); + + ret = ac->error; + switch (ret) { + case 0: + if (call->ret_reply0) { + ret = (long)call->reply[0]; + call->reply[0] = NULL; + } + /* Fall through */ + case -ECONNABORTED: + ac->responded = true; + break; } - ret = call->error; _debug("call complete"); afs_put_call(call); - _leave(" = %d", ret); + _leave(" = %p", (void *)ret); return ret; } @@ -627,7 +642,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, u = __atomic_add_unless(&call->usage, 1, 0); if (u != 0) { trace_afs_call(call, afs_call_trace_wake, u, - atomic_read(&afs_outstanding_calls), + atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); if (!queue_work(afs_async_calls, &call->async_work)) @@ -666,7 +681,7 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply = NULL; + call->reply[0] = NULL; /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the @@ -691,22 +706,24 @@ static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) /* * Charge the incoming call preallocation. */ -static void afs_charge_preallocation(struct work_struct *work) +void afs_charge_preallocation(struct work_struct *work) { - struct afs_call *call = afs_spare_incoming_call; + struct afs_net *net = + container_of(work, struct afs_net, charge_preallocation_work); + struct afs_call *call = net->spare_incoming_call; for (;;) { if (!call) { - call = afs_alloc_call(&afs_RXCMxxxx, GFP_KERNEL); + call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL); if (!call) break; call->async = true; - call->state = AFS_CALL_AWAIT_OP_ID; + call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); } - if (rxrpc_kernel_charge_accept(afs_socket, + if (rxrpc_kernel_charge_accept(net->socket, afs_wake_up_async_call, afs_rx_attach, (unsigned long)call, @@ -714,7 +731,7 @@ static void afs_charge_preallocation(struct work_struct *work) break; call = NULL; } - afs_spare_incoming_call = call; + net->spare_incoming_call = call; } /* @@ -735,7 +752,9 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { - queue_work(afs_wq, &afs_charge_preallocation_work); + struct afs_net *net = afs_sock2net(sk); + + queue_work(afs_wq, &net->charge_preallocation_work); } /* @@ -756,7 +775,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call) return ret; call->operation_ID = ntohl(call->tmp); - call->state = AFS_CALL_AWAIT_REQUEST; + afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST); call->offset = 0; /* ask the cache manager to route the call (it'll change the call type @@ -781,8 +800,7 @@ static void afs_notify_end_reply_tx(struct sock *sock, { struct afs_call *call = (struct afs_call *)call_user_ID; - if (call->state == AFS_CALL_REPLYING) - call->state = AFS_CALL_AWAIT_ACK; + afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK); } /* @@ -790,11 +808,12 @@ static void afs_notify_end_reply_tx(struct sock *sock, */ void afs_send_empty_reply(struct afs_call *call) { + struct afs_net *net = call->net; struct msghdr msg; _enter(""); - rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0); + rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0); msg.msg_name = NULL; msg.msg_namelen = 0; @@ -803,8 +822,7 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_controllen = 0; msg.msg_flags = 0; - call->state = AFS_CALL_AWAIT_ACK; - switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0, + switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0, afs_notify_end_reply_tx)) { case 0: _leave(" [replied]"); @@ -812,7 +830,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(net->socket, call->rxcall, RX_USER_ABORT, -ENOMEM, "KOO"); default: _leave(" [error]"); @@ -825,13 +843,14 @@ void afs_send_empty_reply(struct afs_call *call) */ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) { + struct afs_net *net = call->net; struct msghdr msg; struct kvec iov[1]; int n; _enter(""); - rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len); + rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len); iov[0].iov_base = (void *) buf; iov[0].iov_len = len; @@ -842,8 +861,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_controllen = 0; msg.msg_flags = 0; - call->state = AFS_CALL_AWAIT_ACK; - n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len, + n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len, afs_notify_end_reply_tx); if (n >= 0) { /* Success */ @@ -853,7 +871,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, + rxrpc_kernel_abort_call(net->socket, call->rxcall, RX_USER_ABORT, -ENOMEM, "KOO"); } _leave(" [error]"); @@ -865,6 +883,9 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) int afs_extract_data(struct afs_call *call, void *buf, size_t count, bool want_more) { + struct afs_net *net = call->net; + enum afs_call_state state; + u32 remote_abort; int ret; _enter("{%s,%zu},,%zu,%d", @@ -872,32 +893,32 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, ASSERTCMP(call->offset, <=, count); - ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, buf, count, &call->offset, - want_more, &call->abort_code, + want_more, &remote_abort, &call->service_id); trace_afs_recv_data(call, count, call->offset, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; + state = READ_ONCE(call->state); if (ret == 1) { - switch (call->state) { - case AFS_CALL_AWAIT_REPLY: - call->state = AFS_CALL_COMPLETE; + switch (state) { + case AFS_CALL_CL_AWAIT_REPLY: + afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY); break; - case AFS_CALL_AWAIT_REQUEST: - call->state = AFS_CALL_REPLYING; + case AFS_CALL_SV_AWAIT_REQUEST: + afs_set_call_state(call, state, AFS_CALL_SV_REPLYING); break; + case AFS_CALL_COMPLETE: + kdebug("prem complete %d", call->error); + return -EIO; default: break; } return 0; } - if (ret == -ECONNABORTED) - call->error = call->type->abort_to_error(call->abort_code); - else - call->error = ret; - call->state = AFS_CALL_COMPLETE; + afs_set_call_complete(call, ret, remote_abort); return ret; } diff --git a/fs/afs/security.c b/fs/afs/security.c index faca66227ecf..46a881a4d08f 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -1,6 +1,6 @@ /* AFS security handling * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -14,9 +14,13 @@ #include <linux/fs.h> #include <linux/ctype.h> #include <linux/sched.h> +#include <linux/hashtable.h> #include <keys/rxrpc-type.h> #include "internal.h" +static DEFINE_HASHTABLE(afs_permits_cache, 10); +static DEFINE_SPINLOCK(afs_permits_lock); + /* * get a key */ @@ -46,167 +50,233 @@ struct key *afs_request_key(struct afs_cell *cell) } /* - * dispose of a permits list + * Dispose of a list of permits. */ -void afs_zap_permits(struct rcu_head *rcu) +static void afs_permits_rcu(struct rcu_head *rcu) { struct afs_permits *permits = container_of(rcu, struct afs_permits, rcu); - int loop; - - _enter("{%d}", permits->count); + int i; - for (loop = permits->count - 1; loop >= 0; loop--) - key_put(permits->permits[loop].key); + for (i = 0; i < permits->nr_permits; i++) + key_put(permits->permits[i].key); kfree(permits); } /* - * dispose of a permits list in which all the key pointers have been copied + * Discard a permission cache. */ -static void afs_dispose_of_permits(struct rcu_head *rcu) +void afs_put_permits(struct afs_permits *permits) { - struct afs_permits *permits = - container_of(rcu, struct afs_permits, rcu); - - _enter("{%d}", permits->count); - - kfree(permits); + if (permits && refcount_dec_and_test(&permits->usage)) { + spin_lock(&afs_permits_lock); + hash_del_rcu(&permits->hash_node); + spin_unlock(&afs_permits_lock); + call_rcu(&permits->rcu, afs_permits_rcu); + } } /* - * get the authorising vnode - this is the specified inode itself if it's a - * directory or it's the parent directory if the specified inode is a file or - * symlink - * - the caller must release the ref on the inode + * Clear a permit cache on callback break. */ -static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode, - struct key *key) +void afs_clear_permits(struct afs_vnode *vnode) { - struct afs_vnode *auth_vnode; - struct inode *auth_inode; + struct afs_permits *permits; - _enter(""); + spin_lock(&vnode->lock); + permits = rcu_dereference_protected(vnode->permit_cache, + lockdep_is_held(&vnode->lock)); + RCU_INIT_POINTER(vnode->permit_cache, NULL); + vnode->cb_break++; + spin_unlock(&vnode->lock); - if (S_ISDIR(vnode->vfs_inode.i_mode)) { - auth_inode = igrab(&vnode->vfs_inode); - ASSERT(auth_inode != NULL); - } else { - auth_inode = afs_iget(vnode->vfs_inode.i_sb, key, - &vnode->status.parent, NULL, NULL); - if (IS_ERR(auth_inode)) - return ERR_CAST(auth_inode); - } - - auth_vnode = AFS_FS_I(auth_inode); - _leave(" = {%x}", auth_vnode->fid.vnode); - return auth_vnode; + if (permits) + afs_put_permits(permits); } /* - * clear the permit cache on a directory vnode + * Hash a list of permits. Use simple addition to make it easy to add an extra + * one at an as-yet indeterminate position in the list. */ -void afs_clear_permits(struct afs_vnode *vnode) +static void afs_hash_permits(struct afs_permits *permits) { - struct afs_permits *permits; - - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + unsigned long h = permits->nr_permits; + int i; - mutex_lock(&vnode->permits_lock); - permits = vnode->permits; - RCU_INIT_POINTER(vnode->permits, NULL); - mutex_unlock(&vnode->permits_lock); + for (i = 0; i < permits->nr_permits; i++) { + h += (unsigned long)permits->permits[i].key / sizeof(void *); + h += permits->permits[i].access; + } - if (permits) - call_rcu(&permits->rcu, afs_zap_permits); - _leave(""); + permits->h = h; } /* - * add the result obtained for a vnode to its or its parent directory's cache - * for the key used to access it + * Cache the CallerAccess result obtained from doing a fileserver operation + * that returned a vnode status for a particular key. If a callback break + * occurs whilst the operation was in progress then we have to ditch the cache + * as the ACL *may* have changed. */ -void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) +void afs_cache_permit(struct afs_vnode *vnode, struct key *key, + unsigned int cb_break) { - struct afs_permits *permits, *xpermits; - struct afs_permit *permit; - struct afs_vnode *auth_vnode; - int count, loop; + struct afs_permits *permits, *xpermits, *replacement, *new = NULL; + afs_access_t caller_access = READ_ONCE(vnode->status.caller_access); + size_t size = 0; + bool changed = false; + int i, j; + + _enter("{%x:%u},%x,%x", + vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); + + rcu_read_lock(); + + /* Check for the common case first: We got back the same access as last + * time we tried and already have it recorded. + */ + permits = rcu_dereference(vnode->permit_cache); + if (permits) { + if (!permits->invalidated) { + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) + break; + if (permits->permits[i].access != caller_access) { + changed = true; + break; + } - _enter("{%x:%u},%x,%lx", - vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order); + if (cb_break != (vnode->cb_break + + vnode->cb_interest->server->cb_s_break)) { + changed = true; + break; + } - auth_vnode = afs_get_auth_inode(vnode, key); - if (IS_ERR(auth_vnode)) { - _leave(" [get error %ld]", PTR_ERR(auth_vnode)); - return; - } + /* The cache is still good. */ + rcu_read_unlock(); + return; + } + } + + changed |= permits->invalidated; + size = permits->nr_permits; - mutex_lock(&auth_vnode->permits_lock); + /* If this set of permits is now wrong, clear the permits + * pointer so that no one tries to use the stale information. + */ + if (changed) { + spin_lock(&vnode->lock); + if (permits != rcu_access_pointer(vnode->permit_cache)) + goto someone_else_changed_it_unlock; + RCU_INIT_POINTER(vnode->permit_cache, NULL); + spin_unlock(&vnode->lock); + + afs_put_permits(permits); + permits = NULL; + size = 0; + } + } - /* guard against a rename being detected whilst we waited for the - * lock */ - if (memcmp(&auth_vnode->fid, &vnode->status.parent, - sizeof(struct afs_fid)) != 0) { - _debug("renamed"); - goto out_unlock; + if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) { + rcu_read_unlock(); + goto someone_else_changed_it; } - /* have to be careful as the directory's callback may be broken between - * us receiving the status we're trying to cache and us getting the - * lock to update the cache for the status */ - if (auth_vnode->acl_order - acl_order > 0) { - _debug("ACL changed?"); - goto out_unlock; + /* We need a ref on any permits list we want to copy as we'll have to + * drop the lock to do memory allocation. + */ + if (permits && !refcount_inc_not_zero(&permits->usage)) { + rcu_read_unlock(); + goto someone_else_changed_it; } - /* always update the anonymous mask */ - _debug("anon access %x", vnode->status.anon_access); - auth_vnode->status.anon_access = vnode->status.anon_access; - if (key == vnode->volume->cell->anonymous_key) - goto out_unlock; - - xpermits = auth_vnode->permits; - count = 0; - if (xpermits) { - /* see if the permit is already in the list - * - if it is then we just amend the list - */ - count = xpermits->count; - permit = xpermits->permits; - for (loop = count; loop > 0; loop--) { - if (permit->key == key) { - permit->access_mask = - vnode->status.caller_access; - goto out_unlock; + rcu_read_unlock(); + + /* Speculatively create a new list with the revised permission set. We + * discard this if we find an extant match already in the hash, but + * it's easier to compare with memcmp this way. + * + * We fill in the key pointers at this time, but we don't get the refs + * yet. + */ + size++; + new = kzalloc(sizeof(struct afs_permits) + + sizeof(struct afs_permit) * size, GFP_NOFS); + if (!new) + return; + + refcount_set(&new->usage, 1); + new->nr_permits = size; + i = j = 0; + if (permits) { + for (i = 0; i < permits->nr_permits; i++) { + if (j == i && permits->permits[i].key > key) { + new->permits[j].key = key; + new->permits[j].access = caller_access; + j++; } - permit++; + new->permits[j].key = permits->permits[i].key; + new->permits[j].access = permits->permits[i].access; + j++; + } + } + + if (j == i) { + new->permits[j].key = key; + new->permits[j].access = caller_access; + } + + afs_hash_permits(new); + + afs_put_permits(permits); + + /* Now see if the permit list we want is actually already available */ + spin_lock(&afs_permits_lock); + + hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) { + if (xpermits->h != new->h || + xpermits->invalidated || + xpermits->nr_permits != new->nr_permits || + memcmp(xpermits->permits, new->permits, + new->nr_permits * sizeof(struct afs_permit)) != 0) + continue; + + if (refcount_inc_not_zero(&xpermits->usage)) { + replacement = xpermits; + goto found; } + + break; } - permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1), - GFP_NOFS); - if (!permits) - goto out_unlock; - - if (xpermits) - memcpy(permits->permits, xpermits->permits, - count * sizeof(struct afs_permit)); - - _debug("key %x access %x", - key_serial(key), vnode->status.caller_access); - permits->permits[count].access_mask = vnode->status.caller_access; - permits->permits[count].key = key_get(key); - permits->count = count + 1; - - rcu_assign_pointer(auth_vnode->permits, permits); - if (xpermits) - call_rcu(&xpermits->rcu, afs_dispose_of_permits); - -out_unlock: - mutex_unlock(&auth_vnode->permits_lock); - iput(&auth_vnode->vfs_inode); - _leave(""); + for (i = 0; i < new->nr_permits; i++) + key_get(new->permits[i].key); + hash_add_rcu(afs_permits_cache, &new->hash_node, new->h); + replacement = new; + new = NULL; + +found: + spin_unlock(&afs_permits_lock); + + kfree(new); + + spin_lock(&vnode->lock); + if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) || + permits != rcu_access_pointer(vnode->permit_cache)) + goto someone_else_changed_it_unlock; + rcu_assign_pointer(vnode->permit_cache, replacement); + spin_unlock(&vnode->lock); + afs_put_permits(permits); + return; + +someone_else_changed_it_unlock: + spin_unlock(&vnode->lock); +someone_else_changed_it: + /* Someone else changed the cache under us - don't recheck at this + * time. + */ + return; } /* @@ -218,56 +288,45 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, afs_access_t *_access) { struct afs_permits *permits; - struct afs_permit *permit; - struct afs_vnode *auth_vnode; - bool valid; - int loop, ret; + bool valid = false; + int i, ret; _enter("{%x:%u},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); - auth_vnode = afs_get_auth_inode(vnode, key); - if (IS_ERR(auth_vnode)) { - *_access = 0; - _leave(" = %ld", PTR_ERR(auth_vnode)); - return PTR_ERR(auth_vnode); - } - - ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode)); + permits = vnode->permit_cache; /* check the permits to see if we've got one yet */ - if (key == auth_vnode->volume->cell->anonymous_key) { + if (key == vnode->volume->cell->anonymous_key) { _debug("anon"); - *_access = auth_vnode->status.anon_access; + *_access = vnode->status.anon_access; valid = true; } else { - valid = false; rcu_read_lock(); - permits = rcu_dereference(auth_vnode->permits); + permits = rcu_dereference(vnode->permit_cache); if (permits) { - permit = permits->permits; - for (loop = permits->count; loop > 0; loop--) { - if (permit->key == key) { - _debug("found in cache"); - *_access = permit->access_mask; - valid = true; + for (i = 0; i < permits->nr_permits; i++) { + if (permits->permits[i].key < key) + continue; + if (permits->permits[i].key > key) break; - } - permit++; + + *_access = permits->permits[i].access; + valid = !permits->invalidated; + break; } } rcu_read_unlock(); } if (!valid) { - /* check the status on the file we're actually interested in - * (the post-processing will cache the result on auth_vnode) */ + /* Check the status on the file we're actually interested in + * (the post-processing will cache the result). + */ _debug("no valid permit"); - set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - ret = afs_vnode_fetch_status(vnode, auth_vnode, key); + ret = afs_fetch_status(vnode, key); if (ret < 0) { - iput(&auth_vnode->vfs_inode); *_access = 0; _leave(" = %d", ret); return ret; @@ -275,7 +334,6 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, *_access = vnode->status.caller_access; } - iput(&auth_vnode->vfs_inode); _leave(" = 0 [access %x]", *_access); return 0; } @@ -304,14 +362,9 @@ int afs_permission(struct inode *inode, int mask) return PTR_ERR(key); } - /* if the promise has expired, we need to check the server again */ - if (!vnode->cb_promised) { - _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, NULL, key); - if (ret < 0) - goto error; - _debug("new promise [fl=%lx]", vnode->flags); - } + ret = afs_validate(vnode, key); + if (ret < 0) + goto error; /* check the permits to see if we've got one yet */ ret = afs_check_permit(vnode, key, &access); @@ -365,3 +418,12 @@ error: _leave(" = %d", ret); return ret; } + +void __exit afs_clean_up_permit_cache(void) +{ + int i; + + for (i = 0; i < HASH_SIZE(afs_permits_cache); i++) + WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i])); + +} diff --git a/fs/afs/server.c b/fs/afs/server.c index c001b1f2455f..1880f1b6a9f1 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -11,317 +11,689 @@ #include <linux/sched.h> #include <linux/slab.h> +#include "afs_fs.h" #include "internal.h" -static unsigned afs_server_timeout = 10; /* server timeout in seconds */ +static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ +static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ -static void afs_reap_server(struct work_struct *); +static void afs_inc_servers_outstanding(struct afs_net *net) +{ + atomic_inc(&net->servers_outstanding); +} + +static void afs_dec_servers_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->servers_outstanding)) + wake_up_atomic_t(&net->servers_outstanding); +} + +/* + * Find a server by one of its addresses. + */ +struct afs_server *afs_find_server(struct afs_net *net, + const struct sockaddr_rxrpc *srx) +{ + const struct sockaddr_in6 *a = &srx->transport.sin6, *b; + const struct afs_addr_list *alist; + struct afs_server *server = NULL; + unsigned int i; + bool ipv6 = true; + int seq = 0, diff; + + if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || + srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || + srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) + ipv6 = false; + + rcu_read_lock(); + + do { + if (server) + afs_put_server(net, server); + server = NULL; + read_seqbegin_or_lock(&net->fs_addr_lock, &seq); + + if (ipv6) { + hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + alist = rcu_dereference(server->addresses); + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + b = &alist->addrs[i].transport.sin6; + diff = (u16)a->sin6_port - (u16)b->sin6_port; + if (diff == 0) + diff = memcmp(&a->sin6_addr, + &b->sin6_addr, + sizeof(struct in6_addr)); + if (diff == 0) + goto found; + if (diff < 0) { + // TODO: Sort the list + //if (i == alist->nr_ipv4) + // goto not_found; + break; + } + } + } + } else { + hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { + alist = rcu_dereference(server->addresses); + for (i = 0; i < alist->nr_ipv4; i++) { + b = &alist->addrs[i].transport.sin6; + diff = (u16)a->sin6_port - (u16)b->sin6_port; + if (diff == 0) + diff = ((u32)a->sin6_addr.s6_addr32[3] - + (u32)b->sin6_addr.s6_addr32[3]); + if (diff == 0) + goto found; + if (diff < 0) { + // TODO: Sort the list + //if (i == 0) + // goto not_found; + break; + } + } + } + } + + //not_found: + server = NULL; + found: + if (server && !atomic_inc_not_zero(&server->usage)) + server = NULL; + + } while (need_seqretry(&net->fs_addr_lock, seq)); -/* tree of all the servers, indexed by IP address */ -static struct rb_root afs_servers = RB_ROOT; -static DEFINE_RWLOCK(afs_servers_lock); + done_seqretry(&net->fs_addr_lock, seq); -/* LRU list of all the servers not currently in use */ -static LIST_HEAD(afs_server_graveyard); -static DEFINE_SPINLOCK(afs_server_graveyard_lock); -static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server); + rcu_read_unlock(); + return server; +} /* - * install a server record in the master tree + * Look up a server by its UUID */ -static int afs_install_server(struct afs_server *server) +struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) { - struct afs_server *xserver; + struct afs_server *server = NULL; + struct rb_node *p; + int diff, seq = 0; + + _enter("%pU", uuid); + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + if (server) + afs_put_server(net, server); + server = NULL; + + read_seqbegin_or_lock(&net->fs_lock, &seq); + + p = net->fs_servers.rb_node; + while (p) { + server = rb_entry(p, struct afs_server, uuid_rb); + + diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); + if (diff < 0) { + p = p->rb_left; + } else if (diff > 0) { + p = p->rb_right; + } else { + afs_get_server(server); + break; + } + + server = NULL; + } + } while (need_seqretry(&net->fs_lock, seq)); + + done_seqretry(&net->fs_lock, seq); + + _leave(" = %p", server); + return server; +} + +/* + * Install a server record in the namespace tree + */ +static struct afs_server *afs_install_server(struct afs_net *net, + struct afs_server *candidate) +{ + const struct afs_addr_list *alist; + struct afs_server *server; struct rb_node **pp, *p; - int ret; + int ret = -EEXIST, diff; - _enter("%p", server); + _enter("%p", candidate); - write_lock(&afs_servers_lock); + write_seqlock(&net->fs_lock); - ret = -EEXIST; - pp = &afs_servers.rb_node; + /* Firstly install the server in the UUID lookup tree */ + pp = &net->fs_servers.rb_node; p = NULL; while (*pp) { p = *pp; _debug("- consider %p", p); - xserver = rb_entry(p, struct afs_server, master_rb); - if (server->addr.s_addr < xserver->addr.s_addr) + server = rb_entry(p, struct afs_server, uuid_rb); + diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); + if (diff < 0) pp = &(*pp)->rb_left; - else if (server->addr.s_addr > xserver->addr.s_addr) + else if (diff > 0) pp = &(*pp)->rb_right; else - goto error; + goto exists; } - rb_link_node(&server->master_rb, p, pp); - rb_insert_color(&server->master_rb, &afs_servers); + server = candidate; + rb_link_node(&server->uuid_rb, p, pp); + rb_insert_color(&server->uuid_rb, &net->fs_servers); + hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + + write_seqlock(&net->fs_addr_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&net->fs_addr_lock.lock)); + + /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install + * it in the IPv4 and/or IPv6 reverse-map lists. + * + * TODO: For speed we want to use something other than a flat list + * here; even sorting the list in terms of lowest address would help a + * bit, but anything we might want to do gets messy and memory + * intensive. + */ + if (alist->nr_ipv4 > 0) + hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); + if (alist->nr_addrs > alist->nr_ipv4) + hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); + + write_sequnlock(&net->fs_addr_lock); ret = 0; -error: - write_unlock(&afs_servers_lock); - return ret; +exists: + afs_get_server(server); + write_sequnlock(&net->fs_lock); + return server; } /* * allocate a new server record */ -static struct afs_server *afs_alloc_server(struct afs_cell *cell, - const struct in_addr *addr) +static struct afs_server *afs_alloc_server(struct afs_net *net, + const uuid_t *uuid, + struct afs_addr_list *alist) { struct afs_server *server; _enter(""); server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); - if (server) { - atomic_set(&server->usage, 1); - server->cell = cell; - - INIT_LIST_HEAD(&server->link); - INIT_LIST_HEAD(&server->grave); - init_rwsem(&server->sem); - spin_lock_init(&server->fs_lock); - server->fs_vnodes = RB_ROOT; - server->cb_promises = RB_ROOT; - spin_lock_init(&server->cb_lock); - init_waitqueue_head(&server->cb_break_waitq); - INIT_DELAYED_WORK(&server->cb_break_work, - afs_dispatch_give_up_callbacks); - - memcpy(&server->addr, addr, sizeof(struct in_addr)); - server->addr.s_addr = addr->s_addr; - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - } else { - _leave(" = NULL [nomem]"); - } + if (!server) + goto enomem; + + atomic_set(&server->usage, 1); + RCU_INIT_POINTER(server->addresses, alist); + server->addr_version = alist->version; + server->uuid = *uuid; + server->flags = (1UL << AFS_SERVER_FL_NEW); + server->update_at = ktime_get_real_seconds() + afs_server_update_delay; + rwlock_init(&server->fs_lock); + INIT_LIST_HEAD(&server->cb_interests); + rwlock_init(&server->cb_break_lock); + + afs_inc_servers_outstanding(net); + _leave(" = %p", server); return server; + +enomem: + _leave(" = NULL [nomem]"); + return NULL; +} + +/* + * Look up an address record for a server + */ +static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, + struct key *key, const uuid_t *uuid) +{ + struct afs_addr_cursor ac; + struct afs_addr_list *alist; + int ret; + + ret = afs_set_vl_cursor(&ac, cell); + if (ret < 0) + return ERR_PTR(ret); + + while (afs_iterate_addresses(&ac)) { + if (test_bit(ac.index, &ac.alist->yfs)) + alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); + else + alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); + switch (ac.error) { + case 0: + afs_end_cursor(&ac); + return alist; + case -ECONNABORTED: + ac.error = afs_abort_to_error(ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + break; + default: + ac.error = -EIO; + goto error; + } + } + +error: + return ERR_PTR(afs_end_cursor(&ac)); } /* - * get an FS-server record for a cell + * Get or create a fileserver record. */ -struct afs_server *afs_lookup_server(struct afs_cell *cell, - const struct in_addr *addr) +struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, + const uuid_t *uuid) { + struct afs_addr_list *alist; struct afs_server *server, *candidate; - _enter("%p,%pI4", cell, &addr->s_addr); + _enter("%p,%pU", cell->net, uuid); - /* quick scan of the list to see if we already have the server */ - read_lock(&cell->servers_lock); + server = afs_find_server_by_uuid(cell->net, uuid); + if (server) + return server; - list_for_each_entry(server, &cell->servers, link) { - if (server->addr.s_addr == addr->s_addr) - goto found_server_quickly; - } - read_unlock(&cell->servers_lock); + alist = afs_vl_lookup_addrs(cell, key, uuid); + if (IS_ERR(alist)) + return ERR_CAST(alist); - candidate = afs_alloc_server(cell, addr); + candidate = afs_alloc_server(cell->net, uuid, alist); if (!candidate) { - _leave(" = -ENOMEM"); + afs_put_addrlist(alist); return ERR_PTR(-ENOMEM); } - write_lock(&cell->servers_lock); - - /* check the cell's server list again */ - list_for_each_entry(server, &cell->servers, link) { - if (server->addr.s_addr == addr->s_addr) - goto found_server; + server = afs_install_server(cell->net, candidate); + if (server != candidate) { + afs_put_addrlist(alist); + kfree(candidate); } - _debug("new"); - server = candidate; - if (afs_install_server(server) < 0) - goto server_in_two_cells; - - afs_get_cell(cell); - list_add_tail(&server->link, &cell->servers); - - write_unlock(&cell->servers_lock); _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; +} - /* found a matching server quickly */ -found_server_quickly: - _debug("found quickly"); - afs_get_server(server); - read_unlock(&cell->servers_lock); -no_longer_unused: - if (!list_empty(&server->grave)) { - spin_lock(&afs_server_graveyard_lock); - list_del_init(&server->grave); - spin_unlock(&afs_server_graveyard_lock); +/* + * Set the server timer to fire after a given delay, assuming it's not already + * set for an earlier time. + */ +static void afs_set_server_timer(struct afs_net *net, time64_t delay) +{ + if (net->live) { + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) + afs_dec_servers_outstanding(net); } - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - return server; +} - /* found a matching server on the second pass */ -found_server: - _debug("found"); - afs_get_server(server); - write_unlock(&cell->servers_lock); - kfree(candidate); - goto no_longer_unused; - - /* found a server that seems to be in two cells */ -server_in_two_cells: - write_unlock(&cell->servers_lock); - kfree(candidate); - printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n", - addr); - _leave(" = -EEXIST"); - return ERR_PTR(-EEXIST); +/* + * Server management timer. We have an increment on fs_outstanding that we + * need to pass along to the work item. + */ +void afs_servers_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, fs_timer); + + _enter(""); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); } /* - * look up a server by its IP address + * Release a reference on a server record. */ -struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) +void afs_put_server(struct afs_net *net, struct afs_server *server) { - struct afs_server *server = NULL; - struct rb_node *p; - struct in_addr addr = srx->transport.sin.sin_addr; + unsigned int usage; - _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); + if (!server) + return; - if (srx->transport.family != AF_INET) { - WARN(true, "AFS does not yes support non-IPv4 addresses\n"); - return NULL; - } + server->put_time = ktime_get_real_seconds(); - read_lock(&afs_servers_lock); + usage = atomic_dec_return(&server->usage); - p = afs_servers.rb_node; - while (p) { - server = rb_entry(p, struct afs_server, master_rb); + _enter("{%u}", usage); - _debug("- consider %p", p); + if (likely(usage > 0)) + return; - if (addr.s_addr < server->addr.s_addr) { - p = p->rb_left; - } else if (addr.s_addr > server->addr.s_addr) { - p = p->rb_right; - } else { - afs_get_server(server); - goto found; - } - } + afs_set_server_timer(net, afs_server_gc_delay); +} - server = NULL; -found: - read_unlock(&afs_servers_lock); - ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr); - _leave(" = %p", server); - return server; +static void afs_server_rcu(struct rcu_head *rcu) +{ + struct afs_server *server = container_of(rcu, struct afs_server, rcu); + + afs_put_addrlist(server->addresses); + kfree(server); } /* - * destroy a server record - * - removes from the cell list + * destroy a dead server */ -void afs_put_server(struct afs_server *server) +static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { - if (!server) - return; + struct afs_addr_list *alist = server->addresses; + struct afs_addr_cursor ac = { + .alist = alist, + .addr = &alist->addrs[0], + .start = alist->index, + .index = alist->index, + .error = 0, + }; + _enter("%p", server); - _enter("%p{%d}", server, atomic_read(&server->usage)); + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + call_rcu(&server->rcu, afs_server_rcu); + afs_dec_servers_outstanding(net); +} - _debug("PUT SERVER %d", atomic_read(&server->usage)); +/* + * Garbage collect any expired servers. + */ +static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) +{ + struct afs_server *server; + bool deleted; + int usage; + + while ((server = gc_list)) { + gc_list = server->gc_next; + + write_seqlock(&net->fs_lock); + usage = 1; + deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); + if (deleted) { + rb_erase(&server->uuid_rb, &net->fs_servers); + hlist_del_rcu(&server->proc_link); + } + write_sequnlock(&net->fs_lock); - ASSERTCMP(atomic_read(&server->usage), >, 0); + if (deleted) + afs_destroy_server(net, server); + } +} - if (likely(!atomic_dec_and_test(&server->usage))) { - _leave(""); - return; +/* + * Manage the records of servers known to be within a network namespace. This + * includes garbage collecting unused servers. + * + * Note also that we were given an increment on net->servers_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_servers(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, fs_manager); + struct afs_server *gc_list = NULL; + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; + + _enter(""); + + /* Trawl the server list looking for servers that have expired from + * lack of use. + */ + read_seqlock_excl(&net->fs_lock); + + for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { + struct afs_server *server = + rb_entry(cursor, struct afs_server, uuid_rb); + int usage = atomic_read(&server->usage); + + _debug("manage %pU %u", &server->uuid, usage); + + ASSERTCMP(usage, >=, 1); + ASSERTIFCMP(purging, usage, ==, 1); + + if (usage == 1) { + time64_t expire_at = server->put_time; + + if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && + !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) + expire_at += afs_server_gc_delay; + if (purging || expire_at <= now) { + server->gc_next = gc_list; + gc_list = server; + } else if (expire_at < next_manage) { + next_manage = expire_at; + } + } } - afs_flush_callback_breaks(server); + read_sequnlock_excl(&net->fs_lock); + + /* Update the timer on the way out. We have to pass an increment on + * servers_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); - spin_lock(&afs_server_graveyard_lock); - if (atomic_read(&server->usage) == 0) { - list_move_tail(&server->grave, &afs_server_graveyard); - server->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &afs_server_reaper, - afs_server_timeout * HZ); + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->fs_manager)) + afs_inc_servers_outstanding(net); + } else { + afs_set_server_timer(net, next_manage - now); + } } - spin_unlock(&afs_server_graveyard_lock); - _leave(" [dead]"); + + afs_gc_servers(net, gc_list); + + afs_dec_servers_outstanding(net); + _leave(" [%d]", atomic_read(&net->servers_outstanding)); +} + +static void afs_queue_server_manager(struct afs_net *net) +{ + afs_inc_servers_outstanding(net); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); } /* - * destroy a dead server + * Purge list of servers. */ -static void afs_destroy_server(struct afs_server *server) +void afs_purge_servers(struct afs_net *net) { - _enter("%p", server); + _enter(""); - ASSERTIF(server->cb_break_head != server->cb_break_tail, - delayed_work_pending(&server->cb_break_work)); + if (del_timer_sync(&net->fs_timer)) + atomic_dec(&net->servers_outstanding); - ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); - ASSERTCMP(server->cb_promises.rb_node, ==, NULL); - ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); - ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0); + afs_queue_server_manager(net); - afs_put_cell(server->cell); - kfree(server); + _debug("wait"); + wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait, + TASK_UNINTERRUPTIBLE); + _leave(""); } /* - * reap dead server records + * Probe a fileserver to find its capabilities. + * + * TODO: Try service upgrade. */ -static void afs_reap_server(struct work_struct *work) +static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) { - LIST_HEAD(corpses); - struct afs_server *server; - unsigned long delay, expiry; - time64_t now; - - now = ktime_get_real_seconds(); - spin_lock(&afs_server_graveyard_lock); - - while (!list_empty(&afs_server_graveyard)) { - server = list_entry(afs_server_graveyard.next, - struct afs_server, grave); + _enter(""); - /* the queue is ordered most dead first */ - expiry = server->time_of_death + afs_server_timeout; - if (expiry > now) { - delay = (expiry - now) * HZ; - mod_delayed_work(afs_wq, &afs_server_reaper, delay); + fc->ac.addr = NULL; + fc->ac.start = READ_ONCE(fc->ac.alist->index); + fc->ac.index = fc->ac.start; + fc->ac.error = 0; + fc->ac.begun = false; + + while (afs_iterate_addresses(&fc->ac)) { + afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, + &fc->ac, fc->key); + switch (fc->ac.error) { + case 0: + afs_end_cursor(&fc->ac); + set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); + return true; + case -ECONNABORTED: + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: break; + default: + fc->ac.error = -EIO; + goto error; } + } - write_lock(&server->cell->servers_lock); - write_lock(&afs_servers_lock); - if (atomic_read(&server->usage) > 0) { - list_del_init(&server->grave); - } else { - list_move_tail(&server->grave, &corpses); - list_del_init(&server->link); - rb_erase(&server->master_rb, &afs_servers); - } - write_unlock(&afs_servers_lock); - write_unlock(&server->cell->servers_lock); +error: + afs_end_cursor(&fc->ac); + return false; +} + +/* + * If we haven't already, try probing the fileserver to get its capabilities. + * We try not to instigate parallel probes, but it's possible that the parallel + * probes will fail due to authentication failure when ours would succeed. + * + * TODO: Try sending an anonymous probe if an authenticated probe fails. + */ +bool afs_probe_fileserver(struct afs_fs_cursor *fc) +{ + bool success; + int ret, retries = 0; + + _enter(""); + +retry: + if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { + _leave(" = t"); + return true; } - spin_unlock(&afs_server_graveyard_lock); + if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { + success = afs_do_probe_fileserver(fc); + clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); + wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); + _leave(" = t"); + return success; + } + + _debug("wait"); + ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, + TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + fc->ac.error = ret; + _leave(" = f [%d]", ret); + return false; + } - /* now reap the corpses we've extracted */ - while (!list_empty(&corpses)) { - server = list_entry(corpses.next, struct afs_server, grave); - list_del(&server->grave); - afs_destroy_server(server); + retries++; + if (retries == 4) { + fc->ac.error = -ESTALE; + _leave(" = f [stale]"); + return false; } + _debug("retry"); + goto retry; } /* - * discard all the server records for rmmod + * Get an update for a server's address list. */ -void __exit afs_purge_servers(void) +static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) { - afs_server_timeout = 0; - mod_delayed_work(afs_wq, &afs_server_reaper, 0); + struct afs_addr_list *alist, *discard; + + _enter(""); + + alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, + &server->uuid); + if (IS_ERR(alist)) { + fc->ac.error = PTR_ERR(alist); + _leave(" = f [%d]", fc->ac.error); + return false; + } + + discard = alist; + if (server->addr_version != alist->version) { + write_lock(&server->fs_lock); + discard = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + rcu_assign_pointer(server->addresses, alist); + server->addr_version = alist->version; + write_unlock(&server->fs_lock); + } + + server->update_at = ktime_get_real_seconds() + afs_server_update_delay; + afs_put_addrlist(discard); + _leave(" = t"); + return true; +} + +/* + * See if a server's address list needs updating. + */ +bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +{ + time64_t now = ktime_get_real_seconds(); + long diff; + bool success; + int ret, retries = 0; + + _enter(""); + + ASSERT(server); + +retry: + diff = READ_ONCE(server->update_at) - now; + if (diff > 0) { + _leave(" = t [not now %ld]", diff); + return true; + } + + if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { + success = afs_update_server_record(fc, server); + clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); + wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); + _leave(" = %d", success); + return success; + } + + ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, + TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + fc->ac.error = ret; + _leave(" = f [intr]"); + return false; + } + + retries++; + if (retries == 4) { + _leave(" = f [stale]"); + ret = -ESTALE; + return false; + } + goto retry; } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c new file mode 100644 index 000000000000..26bad7032bba --- /dev/null +++ b/fs/afs/server_list.c @@ -0,0 +1,153 @@ +/* AFS fileserver list management. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include "internal.h" + +void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) +{ + int i; + + if (refcount_dec_and_test(&slist->usage)) { + for (i = 0; i < slist->nr_servers; i++) { + afs_put_cb_interest(net, slist->servers[i].cb_interest); + afs_put_server(net, slist->servers[i].server); + } + kfree(slist); + } +} + +/* + * Build a server list from a VLDB record. + */ +struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + struct key *key, + struct afs_vldb_entry *vldb, + u8 type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + slist = kzalloc(sizeof(struct afs_server_list) + + sizeof(struct afs_server_entry) * nr_servers, + GFP_KERNEL); + if (!slist) + goto error; + + refcount_set(&slist->usage, 1); + + /* Make sure a records exists for each server in the list. */ + for (i = 0; i < vldb->nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(cell, key, &vldb->fs_server[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT) + continue; + goto error_2; + } + + /* Insertion-sort by server pointer */ + for (j = 0; j < slist->nr_servers; j++) + if (slist->servers[j].server >= server) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(cell->net, server); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return slist; + +error_2: + afs_put_serverlist(cell->net, slist); +error: + return ERR_PTR(ret); +} + +/* + * Copy the annotations from an old server list to its potential replacement. + */ +bool afs_annotate_server_list(struct afs_server_list *new, + struct afs_server_list *old) +{ + struct afs_server *cur; + int i, j; + + if (old->nr_servers != new->nr_servers) + goto changed; + + for (i = 0; i < old->nr_servers; i++) + if (old->servers[i].server != new->servers[i].server) + goto changed; + + return false; + +changed: + /* Maintain the same current server as before if possible. */ + cur = old->servers[old->index].server; + for (j = 0; j < new->nr_servers; j++) { + if (new->servers[j].server == cur) { + new->index = j; + break; + } + } + + /* Keep the old callback interest records where possible so that we + * maintain callback interception. + */ + i = 0; + j = 0; + while (i < old->nr_servers && j < new->nr_servers) { + if (new->servers[j].server == old->servers[i].server) { + struct afs_cb_interest *cbi = old->servers[i].cb_interest; + if (cbi) { + new->servers[j].cb_interest = cbi; + refcount_inc(&cbi->usage); + } + i++; + j++; + continue; + } + + if (new->servers[j].server < old->servers[i].server) { + j++; + continue; + } + + i++; + continue; + } + + return true; +} diff --git a/fs/afs/super.c b/fs/afs/super.c index 689173c0a682..875b5eb02242 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -25,11 +25,10 @@ #include <linux/statfs.h> #include <linux/sched.h> #include <linux/nsproxy.h> +#include <linux/magic.h> #include <net/net_namespace.h> #include "internal.h" -#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ - static void afs_i_init_once(void *foo); static struct dentry *afs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); @@ -143,9 +142,9 @@ void __exit afs_fs_exit(void) */ static int afs_show_devname(struct seq_file *m, struct dentry *root) { - struct afs_super_info *as = root->d_sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(root->d_sb); struct afs_volume *volume = as->volume; - struct afs_cell *cell = volume->cell; + struct afs_cell *cell = as->cell; const char *suf = ""; char pref = '%'; @@ -163,7 +162,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) break; } - seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf); + seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); return 0; } @@ -201,12 +200,14 @@ static int afs_parse_options(struct afs_mount_params *params, token = match_token(p, afs_options_list, args); switch (token) { case afs_opt_cell: - cell = afs_cell_lookup(args[0].from, - args[0].to - args[0].from, - false); + rcu_read_lock(); + cell = afs_lookup_cell_rcu(params->net, + args[0].from, + args[0].to - args[0].from); + rcu_read_unlock(); if (IS_ERR(cell)) return PTR_ERR(cell); - afs_put_cell(params->cell); + afs_put_cell(params->net, params->cell); params->cell = cell; break; @@ -308,13 +309,14 @@ static int afs_parse_device_name(struct afs_mount_params *params, /* lookup the cell record */ if (cellname || !params->cell) { - cell = afs_cell_lookup(cellname, cellnamesz, true); + cell = afs_lookup_cell(params->net, cellname, cellnamesz, + NULL, false); if (IS_ERR(cell)) { printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_put_cell(params->cell); + afs_put_cell(params->net, params->cell); params->cell = cell; } @@ -332,14 +334,16 @@ static int afs_parse_device_name(struct afs_mount_params *params, static int afs_test_super(struct super_block *sb, void *data) { struct afs_super_info *as1 = data; - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); - return as->volume == as1->volume; + return as->net == as1->net && as->volume->vid == as1->volume->vid; } static int afs_set_super(struct super_block *sb, void *data) { - sb->s_fs_info = data; + struct afs_super_info *as = data; + + sb->s_fs_info = as; return set_anon_super(sb, NULL); } @@ -349,7 +353,7 @@ static int afs_set_super(struct super_block *sb, void *data) static int afs_fill_super(struct super_block *sb, struct afs_mount_params *params) { - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); struct afs_fid fid; struct inode *inode = NULL; int ret; @@ -366,13 +370,15 @@ static int afs_fill_super(struct super_block *sb, if (ret) return ret; sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); + sprintf(sb->s_id, "%u", as->volume->vid); + + afs_activate_volume(as->volume); /* allocate the root inode and dentry */ fid.vid = as->volume->vid; fid.vnode = 1; fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL); + inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -394,23 +400,45 @@ error: return ret; } +static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) +{ + struct afs_super_info *as; + + as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); + if (as) { + as->net = afs_get_net(params->net); + as->cell = afs_get_cell(params->cell); + } + return as; +} + +static void afs_destroy_sbi(struct afs_super_info *as) +{ + if (as) { + afs_put_volume(as->cell, as->volume); + afs_put_cell(as->net, as->cell); + afs_put_net(as->net); + kfree(as); + } +} + /* * get an AFS superblock */ static struct dentry *afs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *options) + int flags, const char *dev_name, void *options) { struct afs_mount_params params; struct super_block *sb; - struct afs_volume *vol; + struct afs_volume *candidate; struct key *key; - char *new_opts = kstrdup(options, GFP_KERNEL); struct afs_super_info *as; int ret; _enter(",,%s,%p", dev_name, options); memset(¶ms, 0, sizeof(params)); + params.net = &__afs_net; ret = -EINVAL; if (current->nsproxy->net_ns != &init_net) @@ -436,66 +464,75 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, } params.key = key; - /* parse the device name */ - vol = afs_volume_lookup(¶ms); - if (IS_ERR(vol)) { - ret = PTR_ERR(vol); - goto error; - } - /* allocate a superblock info record */ - as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); - if (!as) { - ret = -ENOMEM; - afs_put_volume(vol); - goto error; + ret = -ENOMEM; + as = afs_alloc_sbi(¶ms); + if (!as) + goto error_key; + + /* Assume we're going to need a volume record; at the very least we can + * use it to update the volume record if we have one already. This + * checks that the volume exists within the cell. + */ + candidate = afs_create_volume(¶ms); + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_as; } - as->volume = vol; + + as->volume = candidate; /* allocate a deviceless superblock */ sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); - afs_put_volume(vol); - kfree(as); - goto error; + goto error_as; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); ret = afs_fill_super(sb, ¶ms); - if (ret < 0) { - deactivate_locked_super(sb); - goto error; - } + if (ret < 0) + goto error_sb; + as = NULL; sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); - afs_put_volume(vol); - kfree(as); + afs_destroy_sbi(as); + as = NULL; } - afs_put_cell(params.cell); - kfree(new_opts); + afs_put_cell(params.net, params.cell); + key_put(params.key); _leave(" = 0 [%p]", sb); return dget(sb->s_root); -error: - afs_put_cell(params.cell); +error_sb: + deactivate_locked_super(sb); + goto error_key; +error_as: + afs_destroy_sbi(as); +error_key: key_put(params.key); - kfree(new_opts); +error: + afs_put_cell(params.net, params.cell); _leave(" = %d", ret); return ERR_PTR(ret); } static void afs_kill_super(struct super_block *sb) { - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); + + /* Clear the callback interests (which will do ilookup5) before + * deactivating the superblock. + */ + afs_clear_callback_interests(as->net, as->volume->servers); kill_anon_super(sb); - afs_put_volume(as->volume); - kfree(as); + afs_deactivate_volume(as->volume); + afs_destroy_sbi(as); } /* @@ -507,16 +544,15 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); - init_waitqueue_head(&vnode->update_waitq); - mutex_init(&vnode->permits_lock); + mutex_init(&vnode->io_lock); mutex_init(&vnode->validate_lock); - spin_lock_init(&vnode->writeback_lock); + spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); - INIT_LIST_HEAD(&vnode->writebacks); + INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); - INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); + seqlock_init(&vnode->cb_lock); } /* @@ -536,9 +572,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) memset(&vnode->status, 0, sizeof(vnode->status)); vnode->volume = NULL; - vnode->update_cnt = 0; vnode->flags = 1 << AFS_VNODE_UNSET; - vnode->cb_promised = false; _leave(" = %p", &vnode->vfs_inode); return &vnode->vfs_inode; @@ -562,7 +596,7 @@ static void afs_destroy_inode(struct inode *inode) _debug("DESTROY INODE %p", inode); - ASSERTCMP(vnode->server, ==, NULL); + ASSERTCMP(vnode->cb_interest, ==, NULL); call_rcu(&inode->i_rcu, afs_i_callback); atomic_dec(&afs_count_active_inodes); @@ -573,6 +607,7 @@ static void afs_destroy_inode(struct inode *inode) */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct afs_fs_cursor fc; struct afs_volume_status vs; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; @@ -582,21 +617,32 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) if (IS_ERR(key)) return PTR_ERR(key); - ret = afs_vnode_get_volume_status(vnode, key, &vs); - key_put(key); - if (ret < 0) { - _leave(" = %d", ret); - return ret; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_get_volume_status(&fc, &vs); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); } - buf->f_type = dentry->d_sb->s_magic; - buf->f_bsize = AFS_BLOCK_SIZE; - buf->f_namelen = AFSNAMEMAX - 1; + key_put(key); - if (vs.max_quota == 0) - buf->f_blocks = vs.part_max_blocks; - else - buf->f_blocks = vs.max_quota; - buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; - return 0; + if (ret == 0) { + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (vs.max_quota == 0) + buf->f_blocks = vs.part_max_blocks; + else + buf->f_blocks = vs.max_quota; + buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; + } + + return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index a5e4cc561b6c..e372f89fd36a 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -12,58 +12,19 @@ #include <linux/gfp.h> #include <linux/init.h> #include <linux/sched.h> +#include "afs_fs.h" #include "internal.h" /* - * map volume locator abort codes to error codes + * Deliver reply data to a VL.GetEntryByNameU call. */ -static int afs_vl_abort_to_error(u32 abort_code) +static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) { - _enter("%u", abort_code); - - switch (abort_code) { - case AFSVL_IDEXIST: return -EEXIST; - case AFSVL_IO: return -EREMOTEIO; - case AFSVL_NAMEEXIST: return -EEXIST; - case AFSVL_CREATEFAIL: return -EREMOTEIO; - case AFSVL_NOENT: return -ENOMEDIUM; - case AFSVL_EMPTY: return -ENOMEDIUM; - case AFSVL_ENTDELETED: return -ENOMEDIUM; - case AFSVL_BADNAME: return -EINVAL; - case AFSVL_BADINDEX: return -EINVAL; - case AFSVL_BADVOLTYPE: return -EINVAL; - case AFSVL_BADSERVER: return -EINVAL; - case AFSVL_BADPARTITION: return -EINVAL; - case AFSVL_REPSFULL: return -EFBIG; - case AFSVL_NOREPSERVER: return -ENOENT; - case AFSVL_DUPREPSERVER: return -EEXIST; - case AFSVL_RWNOTFOUND: return -ENOENT; - case AFSVL_BADREFCOUNT: return -EINVAL; - case AFSVL_SIZEEXCEEDED: return -EINVAL; - case AFSVL_BADENTRY: return -EINVAL; - case AFSVL_BADVOLIDBUMP: return -EINVAL; - case AFSVL_IDALREADYHASHED: return -EINVAL; - case AFSVL_ENTRYLOCKED: return -EBUSY; - case AFSVL_BADVOLOPER: return -EBADRQC; - case AFSVL_BADRELLOCKTYPE: return -EINVAL; - case AFSVL_RERELEASE: return -EREMOTEIO; - case AFSVL_BADSERVERFLAG: return -EINVAL; - case AFSVL_PERM: return -EACCES; - case AFSVL_NOMEM: return -EREMOTEIO; - default: - return afs_abort_to_error(abort_code); - } -} - -/* - * deliver reply data to a VL.GetEntryByXXX call - */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) -{ - struct afs_cache_vlocation *entry; - __be32 *bp; + struct afs_uvldbentry__xdr *uvldb; + struct afs_vldb_entry *entry; + bool new_only = false; u32 tmp; - int loop, ret; + int i, ret; _enter(""); @@ -72,144 +33,613 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) return ret; /* unmarshall the reply once we've received all of it */ - entry = call->reply; - bp = call->buffer; - - for (loop = 0; loop < 64; loop++) - entry->name[loop] = ntohl(*bp++); - entry->name[loop] = 0; - bp++; /* final NUL */ + uvldb = call->buffer; + entry = call->reply[0]; - bp++; /* type */ - entry->nservers = ntohl(*bp++); + for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++) + entry->name[i] = (u8)ntohl(uvldb->name[i]); + entry->name[i] = 0; + entry->name_len = strlen(entry->name); - for (loop = 0; loop < 8; loop++) - entry->servers[loop].s_addr = *bp++; + /* If there is a new replication site that we can use, ignore all the + * sites that aren't marked as new. + */ + for (i = 0; i < AFS_NMAXNSERVERS; i++) { + tmp = ntohl(uvldb->serverFlags[i]); + if (!(tmp & AFS_VLSF_DONTUSE) && + (tmp & AFS_VLSF_NEWREPSITE)) + new_only = true; + } - bp += 8; /* partition IDs */ + for (i = 0; i < AFS_NMAXNSERVERS; i++) { + struct afs_uuid__xdr *xdr; + struct afs_uuid *uuid; + int j; - for (loop = 0; loop < 8; loop++) { - tmp = ntohl(*bp++); - entry->srvtmask[loop] = 0; + tmp = ntohl(uvldb->serverFlags[i]); + if (tmp & AFS_VLSF_DONTUSE || + (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) + continue; if (tmp & AFS_VLSF_RWVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_RW; + entry->fs_mask[i] |= AFS_VOL_VTM_RW; if (tmp & AFS_VLSF_ROVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_RO; + entry->fs_mask[i] |= AFS_VOL_VTM_RO; if (tmp & AFS_VLSF_BACKVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_BAK; - } + entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + if (!entry->fs_mask[i]) + continue; - entry->vid[0] = ntohl(*bp++); - entry->vid[1] = ntohl(*bp++); - entry->vid[2] = ntohl(*bp++); + xdr = &uvldb->serverNumber[i]; + uuid = (struct afs_uuid *)&entry->fs_server[i]; + uuid->time_low = xdr->time_low; + uuid->time_mid = htons(ntohl(xdr->time_mid)); + uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); + uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved); + uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low); + for (j = 0; j < 6; j++) + uuid->node[j] = (u8)ntohl(xdr->node[j]); - bp++; /* clone ID */ + entry->nr_servers++; + } + + for (i = 0; i < AFS_MAXTYPES; i++) + entry->vid[i] = ntohl(uvldb->volumeId[i]); - tmp = ntohl(*bp++); /* flags */ - entry->vidmask = 0; + tmp = ntohl(uvldb->flags); if (tmp & AFS_VLF_RWEXISTS) - entry->vidmask |= AFS_VOL_VTM_RW; + __set_bit(AFS_VLDB_HAS_RW, &entry->flags); if (tmp & AFS_VLF_ROEXISTS) - entry->vidmask |= AFS_VOL_VTM_RO; + __set_bit(AFS_VLDB_HAS_RO, &entry->flags); if (tmp & AFS_VLF_BACKEXISTS) - entry->vidmask |= AFS_VOL_VTM_BAK; - if (!entry->vidmask) - return -EBADMSG; + __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); + if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + entry->error = -ENOMEDIUM; + __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); + } + + __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags); _leave(" = 0 [done]"); return 0; } -/* - * VL.GetEntryByName operation type - */ -static const struct afs_call_type afs_RXVLGetEntryByName = { - .name = "VL.GetEntryByName", - .deliver = afs_deliver_vl_get_entry_by_xxx, - .abort_to_error = afs_vl_abort_to_error, - .destructor = afs_flat_call_destructor, -}; +static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call) +{ + kfree(call->reply[0]); + afs_flat_call_destructor(call); +} /* - * VL.GetEntryById operation type + * VL.GetEntryByNameU operation type. */ -static const struct afs_call_type afs_RXVLGetEntryById = { - .name = "VL.GetEntryById", - .deliver = afs_deliver_vl_get_entry_by_xxx, - .abort_to_error = afs_vl_abort_to_error, - .destructor = afs_flat_call_destructor, +static const struct afs_call_type afs_RXVLGetEntryByNameU = { + .name = "VL.GetEntryByNameU", + .op = afs_VL_GetEntryByNameU, + .deliver = afs_deliver_vl_get_entry_by_name_u, + .destructor = afs_destroy_vl_get_entry_by_name_u, }; /* - * dispatch a get volume entry by name operation + * Dispatch a get volume entry by name or ID operation (uuid variant). If the + * volname is a decimal number then it's a volume ID not a volume name. */ -int afs_vl_get_entry_by_name(struct in_addr *addr, - struct key *key, - const char *volname, - struct afs_cache_vlocation *entry, - bool async) +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const char *volname, + int volnamesz) { + struct afs_vldb_entry *entry; struct afs_call *call; - size_t volnamesz, reqsz, padsz; + size_t reqsz, padsz; __be32 *bp; _enter(""); - volnamesz = strlen(volname); padsz = (4 - (volnamesz & 3)) & 3; reqsz = 8 + volnamesz + padsz; - call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384); - if (!call) - return -ENOMEM; + entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz, + sizeof(struct afs_uvldbentry__xdr)); + if (!call) { + kfree(entry); + return ERR_PTR(-ENOMEM); + } call->key = key; - call->reply = entry; - call->service_id = VL_SERVICE; - call->port = htons(AFS_VL_PORT); + call->reply[0] = entry; + call->ret_reply0 = true; - /* marshall the parameters */ + /* Marshall the parameters */ bp = call->request; - *bp++ = htonl(VLGETENTRYBYNAME); + *bp++ = htonl(VLGETENTRYBYNAMEU); *bp++ = htonl(volnamesz); memcpy(bp, volname, volnamesz); if (padsz > 0) - memset((void *) bp + volnamesz, 0, padsz); + memset((void *)bp + volnamesz, 0, padsz); - /* initiate the call */ - return afs_make_call(addr, call, GFP_KERNEL, async); + trace_afs_make_vl_call(call); + return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); } /* - * dispatch a get volume entry by ID operation + * Deliver reply data to a VL.GetAddrsU call. + * + * GetAddrsU(IN ListAddrByAttributes *inaddr, + * OUT afsUUID *uuidp1, + * OUT uint32_t *uniquifier, + * OUT uint32_t *nentries, + * OUT bulkaddrs *blkaddrs); */ -int afs_vl_get_entry_by_id(struct in_addr *addr, - struct key *key, - afs_volid_t volid, - afs_voltype_t voltype, - struct afs_cache_vlocation *entry, - bool async) +static int afs_deliver_vl_get_addrs_u(struct afs_call *call) { + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, nentries, count; + int i, ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */ + case 1: + ret = afs_extract_data(call, call->buffer, + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(struct afs_uuid__xdr); + uniquifier = ntohl(*bp++); + nentries = ntohl(*bp++); + count = ntohl(*bp); + + nentries = min(nentries, count); + alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->reply[0] = alist; + call->count = count; + call->count2 = nentries; + call->offset = 0; + call->unmarshall++; + + /* Extract entries */ + case 2: + count = min(call->count, 4U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 4); + if (ret < 0) + return ret; + + alist = call->reply[0]; + bp = call->buffer; + for (i = 0; i < count; i++) + if (alist->nr_addrs < call->count2) + afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT); + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_vl_get_addrs_u_destructor(struct afs_call *call) +{ + afs_put_server(call->net, (struct afs_server *)call->reply[0]); + kfree(call->reply[1]); + return afs_flat_call_destructor(call); +} + +/* + * VL.GetAddrsU operation type. + */ +static const struct afs_call_type afs_RXVLGetAddrsU = { + .name = "VL.GetAddrsU", + .op = afs_VL_GetAddrsU, + .deliver = afs_deliver_vl_get_addrs_u, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const uuid_t *uuid) +{ + struct afs_ListAddrByAttributes__xdr *r; + const struct afs_uuid *u = (const struct afs_uuid *)uuid; struct afs_call *call; __be32 *bp; + int i; _enter(""); - call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384); + call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU, + sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr), + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = key; + call->reply[0] = NULL; + call->ret_reply0 = true; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(VLGETADDRSU); + r = (struct afs_ListAddrByAttributes__xdr *)bp; + r->Mask = htonl(AFS_VLADDR_UUID); + r->ipaddr = 0; + r->index = 0; + r->spare = 0; + r->uuid.time_low = u->time_low; + r->uuid.time_mid = htonl(ntohs(u->time_mid)); + r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version)); + r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved); + r->uuid.clock_seq_low = htonl(u->clock_seq_low); + for (i = 0; i < 6; i++) + r->uuid.node[i] = ntohl(u->node[i]); + + trace_afs_make_vl_call(call); + return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); +} + +/* + * Deliver reply data to an VL.GetCapabilities operation. + */ +static int afs_deliver_vl_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, &call->tmp, + 1 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + + call->count = count; + call->count2 = count; + call->offset = 0; + call->unmarshall++; + + /* Extract capabilities words */ + case 2: + count = min(call->count, 16U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 16); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + call->reply[0] = (void *)(unsigned long)call->service_id; + + _leave(" = 0 [done]"); + return 0; +} + +/* + * VL.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXVLGetCapabilities = { + .name = "VL.GetCapabilities", + .op = afs_VL_GetCapabilities, + .deliver = afs_deliver_vl_get_capabilities, + .destructor = afs_flat_call_destructor, +}; + +/* + * Probe a fileserver for the capabilities that it supports. This can + * return up to 196 words. + * + * We use this to probe for service upgrade to determine what the server at the + * other end supports. + */ +int afs_vl_get_capabilities(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4); if (!call) return -ENOMEM; call->key = key; - call->reply = entry; - call->service_id = VL_SERVICE; - call->port = htons(AFS_VL_PORT); + call->upgrade = true; /* Let's see if this is a YFS server */ + call->reply[0] = (void *)VLGETCAPABILITIES; + call->ret_reply0 = true; /* marshall the parameters */ bp = call->request; - *bp++ = htonl(VLGETENTRYBYID); - *bp++ = htonl(volid); - *bp = htonl(voltype); + *bp++ = htonl(VLGETCAPABILITIES); + + /* Can't take a ref on server */ + trace_afs_make_vl_call(call); + return afs_make_call(ac, call, GFP_KERNEL, false); +} + +/* + * Deliver reply data to a YFSVL.GetEndpoints call. + * + * GetEndpoints(IN yfsServerAttributes *attr, + * OUT opr_uuid *uuid, + * OUT afs_int32 *uniquifier, + * OUT endpoints *fsEndpoints, + * OUT endpoints *volEndpoints) + */ +static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) +{ + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, size; + int ret; + + _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall = 1; + + /* Extract the returned uuid, uniquifier, fsEndpoints count and + * either the first fsEndpoint type or the volEndpoints + * count if there are no fsEndpoints. */ + case 1: + ret = afs_extract_data(call, call->buffer, + sizeof(uuid_t) + + 3 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(uuid_t); + uniquifier = ntohl(*bp++); + call->count = ntohl(*bp++); + call->count2 = ntohl(*bp); /* Type or next count */ + + if (call->count > YFS_MAXENDPOINTS) + return -EBADMSG; + + alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->reply[0] = alist; + call->offset = 0; + + if (call->count == 0) + goto extract_volendpoints; + + call->unmarshall = 2; + + /* Extract fsEndpoints[] entries */ + case 2: + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + size = sizeof(__be32) * (1 + 1 + 1); + break; + case YFS_ENDPOINT_IPV6: + size = sizeof(__be32) * (1 + 4 + 1); + break; + default: + return -EBADMSG; + } + + size += sizeof(__be32); + ret = afs_extract_data(call, call->buffer, size, true); + if (ret < 0) + return ret; + + alist = call->reply[0]; + bp = call->buffer; + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + if (ntohl(bp[0]) != sizeof(__be32) * 2) + return -EBADMSG; + afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); + bp += 3; + break; + case YFS_ENDPOINT_IPV6: + if (ntohl(bp[0]) != sizeof(__be32) * 5) + return -EBADMSG; + afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); + bp += 6; + break; + default: + return -EBADMSG; + } + + /* Got either the type of the next entry or the count of + * volEndpoints if no more fsEndpoints. + */ + call->count2 = htonl(*bp++); + + call->offset = 0; + call->count--; + if (call->count > 0) + goto again; + + extract_volendpoints: + /* Extract the list of volEndpoints. */ + call->count = call->count2; + if (!call->count) + goto end; + if (call->count > YFS_MAXENDPOINTS) + return -EBADMSG; + + call->unmarshall = 3; + + /* Extract the type of volEndpoints[0]. Normally we would + * extract the type of the next endpoint when we extract the + * data of the current one, but this is the first... + */ + case 3: + ret = afs_extract_data(call, call->buffer, sizeof(__be32), true); + if (ret < 0) + return ret; + + bp = call->buffer; + call->count2 = htonl(*bp++); + call->offset = 0; + call->unmarshall = 4; + + /* Extract volEndpoints[] entries */ + case 4: + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + size = sizeof(__be32) * (1 + 1 + 1); + break; + case YFS_ENDPOINT_IPV6: + size = sizeof(__be32) * (1 + 4 + 1); + break; + default: + return -EBADMSG; + } + + if (call->count > 1) + size += sizeof(__be32); + ret = afs_extract_data(call, call->buffer, size, true); + if (ret < 0) + return ret; + + bp = call->buffer; + switch (call->count2) { + case YFS_ENDPOINT_IPV4: + if (ntohl(bp[0]) != sizeof(__be32) * 2) + return -EBADMSG; + bp += 3; + break; + case YFS_ENDPOINT_IPV6: + if (ntohl(bp[0]) != sizeof(__be32) * 5) + return -EBADMSG; + bp += 6; + break; + default: + return -EBADMSG; + } + + /* Got either the type of the next entry or the count of + * volEndpoints if no more fsEndpoints. + */ + call->offset = 0; + call->count--; + if (call->count > 0) { + call->count2 = htonl(*bp++); + goto again; + } + + end: + call->unmarshall = 5; + + /* Done */ + case 5: + ret = afs_extract_data(call, call->buffer, 0, false); + if (ret < 0) + return ret; + call->unmarshall = 6; + + case 6: + break; + } + + alist = call->reply[0]; + + /* Start with IPv6 if available. */ + if (alist->nr_ipv4 < alist->nr_addrs) + alist->index = alist->nr_ipv4; + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFSVL.GetEndpoints operation type. + */ +static const struct afs_call_type afs_YFSVLGetEndpoints = { + .name = "YFSVL.GetEndpoints", + .op = afs_YFSVL_GetEndpoints, + .deliver = afs_deliver_yfsvl_get_endpoints, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const uuid_t *uuid) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints, + sizeof(__be32) * 2 + sizeof(*uuid), + sizeof(struct in6_addr) + sizeof(__be32) * 3); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = key; + call->reply[0] = NULL; + call->ret_reply0 = true; + + /* Marshall the parameters */ + bp = call->request; + *bp++ = htonl(YVLGETENDPOINTS); + *bp++ = htonl(YFS_SERVER_UUID); + memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ - /* initiate the call */ - return afs_make_call(addr, call, GFP_KERNEL, async); + trace_afs_make_vl_call(call); + return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c deleted file mode 100644 index 37b7c3b342a6..000000000000 --- a/fs/afs/vlocation.c +++ /dev/null @@ -1,720 +0,0 @@ -/* AFS volume location management - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/sched.h> -#include "internal.h" - -static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ -static unsigned afs_vlocation_update_timeout = 10 * 60; - -static void afs_vlocation_reaper(struct work_struct *); -static void afs_vlocation_updater(struct work_struct *); - -static LIST_HEAD(afs_vlocation_updates); -static LIST_HEAD(afs_vlocation_graveyard); -static DEFINE_SPINLOCK(afs_vlocation_updates_lock); -static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); -static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); -static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); -static struct workqueue_struct *afs_vlocation_update_worker; - -/* - * iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - */ -static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, - struct key *key, - struct afs_cache_vlocation *vldb) -{ - struct afs_cell *cell = vl->cell; - struct in_addr addr; - int count, ret; - - _enter("%s,%s", cell->name, vl->vldb.name); - - down_write(&vl->cell->vl_sem); - ret = -ENOMEDIUM; - for (count = cell->vl_naddrs; count > 0; count--) { - addr = cell->vl_addrs[cell->vl_curr_svix]; - - _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); - - /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, - false); - switch (ret) { - case 0: - goto out; - case -ENOMEM: - case -ENONET: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - if (ret == -ENOMEM || ret == -ENONET) - goto out; - goto rotate; - case -ENOMEDIUM: - case -EKEYREJECTED: - case -EKEYEXPIRED: - goto out; - default: - ret = -EIO; - goto rotate; - } - - /* rotate the server records upon lookup failure */ - rotate: - cell->vl_curr_svix++; - cell->vl_curr_svix %= cell->vl_naddrs; - } - -out: - up_write(&vl->cell->vl_sem); - _leave(" = %d", ret); - return ret; -} - -/* - * iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - */ -static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, - struct key *key, - afs_volid_t volid, - afs_voltype_t voltype, - struct afs_cache_vlocation *vldb) -{ - struct afs_cell *cell = vl->cell; - struct in_addr addr; - int count, ret; - - _enter("%s,%x,%d,", cell->name, volid, voltype); - - down_write(&vl->cell->vl_sem); - ret = -ENOMEDIUM; - for (count = cell->vl_naddrs; count > 0; count--) { - addr = cell->vl_addrs[cell->vl_curr_svix]; - - _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); - - /* attempt to access the VL server */ - ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, - false); - switch (ret) { - case 0: - goto out; - case -ENOMEM: - case -ENONET: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - if (ret == -ENOMEM || ret == -ENONET) - goto out; - goto rotate; - case -EBUSY: - vl->upd_busy_cnt++; - if (vl->upd_busy_cnt <= 3) { - if (vl->upd_busy_cnt > 1) { - /* second+ BUSY - sleep a little bit */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - } - continue; - } - break; - case -ENOMEDIUM: - vl->upd_rej_cnt++; - goto rotate; - default: - ret = -EIO; - goto rotate; - } - - /* rotate the server records upon lookup failure */ - rotate: - cell->vl_curr_svix++; - cell->vl_curr_svix %= cell->vl_naddrs; - vl->upd_busy_cnt = 0; - } - -out: - if (ret < 0 && vl->upd_rej_cnt > 0) { - printk(KERN_NOTICE "kAFS:" - " Active volume no longer valid '%s'\n", - vl->vldb.name); - vl->valid = 0; - ret = -ENOMEDIUM; - } - - up_write(&vl->cell->vl_sem); - _leave(" = %d", ret); - return ret; -} - -/* - * allocate a volume location record - */ -static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, - const char *name, - size_t namesz) -{ - struct afs_vlocation *vl; - - vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); - if (vl) { - vl->cell = cell; - vl->state = AFS_VL_NEW; - atomic_set(&vl->usage, 1); - INIT_LIST_HEAD(&vl->link); - INIT_LIST_HEAD(&vl->grave); - INIT_LIST_HEAD(&vl->update); - init_waitqueue_head(&vl->waitq); - spin_lock_init(&vl->lock); - memcpy(vl->vldb.name, name, namesz); - } - - _leave(" = %p", vl); - return vl; -} - -/* - * update record if we found it in the cache - */ -static int afs_vlocation_update_record(struct afs_vlocation *vl, - struct key *key, - struct afs_cache_vlocation *vldb) -{ - afs_voltype_t voltype; - afs_volid_t vid; - int ret; - - /* try to look up a cached volume in the cell VL databases by ID */ - _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", - vl->vldb.name, - vl->vldb.vidmask, - ntohl(vl->vldb.servers[0].s_addr), - vl->vldb.srvtmask[0], - ntohl(vl->vldb.servers[1].s_addr), - vl->vldb.srvtmask[1], - ntohl(vl->vldb.servers[2].s_addr), - vl->vldb.srvtmask[2]); - - _debug("Vids: %08x %08x %08x", - vl->vldb.vid[0], - vl->vldb.vid[1], - vl->vldb.vid[2]); - - if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { - vid = vl->vldb.vid[0]; - voltype = AFSVL_RWVOL; - } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { - vid = vl->vldb.vid[1]; - voltype = AFSVL_ROVOL; - } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { - vid = vl->vldb.vid[2]; - voltype = AFSVL_BACKVOL; - } else { - BUG(); - vid = 0; - voltype = 0; - } - - /* contact the server to make sure the volume is still available - * - TODO: need to handle disconnected operation here - */ - ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); - switch (ret) { - /* net error */ - default: - printk(KERN_WARNING "kAFS:" - " failed to update volume '%s' (%x) up in '%s': %d\n", - vl->vldb.name, vid, vl->cell->name, ret); - _leave(" = %d", ret); - return ret; - - /* pulled from local cache into memory */ - case 0: - _leave(" = 0"); - return 0; - - /* uh oh... looks like the volume got deleted */ - case -ENOMEDIUM: - printk(KERN_ERR "kAFS:" - " volume '%s' (%x) does not exist '%s'\n", - vl->vldb.name, vid, vl->cell->name); - - /* TODO: make existing record unavailable */ - _leave(" = %d", ret); - return ret; - } -} - -/* - * apply the update to a VL record - */ -static void afs_vlocation_apply_update(struct afs_vlocation *vl, - struct afs_cache_vlocation *vldb) -{ - _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", - vldb->name, vldb->vidmask, - ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], - ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], - ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); - - _debug("Vids: %08x %08x %08x", - vldb->vid[0], vldb->vid[1], vldb->vid[2]); - - if (strcmp(vldb->name, vl->vldb.name) != 0) - printk(KERN_NOTICE "kAFS:" - " name of volume '%s' changed to '%s' on server\n", - vl->vldb.name, vldb->name); - - vl->vldb = *vldb; - -#ifdef CONFIG_AFS_FSCACHE - fscache_update_cookie(vl->cache); -#endif -} - -/* - * fill in a volume location record, consulting the cache and the VL server - * both - */ -static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, - struct key *key) -{ - struct afs_cache_vlocation vldb; - int ret; - - _enter(""); - - ASSERTCMP(vl->valid, ==, 0); - - memset(&vldb, 0, sizeof(vldb)); - - /* see if we have an in-cache copy (will set vl->valid if there is) */ -#ifdef CONFIG_AFS_FSCACHE - vl->cache = fscache_acquire_cookie(vl->cell->cache, - &afs_vlocation_cache_index_def, vl, - true); -#endif - - if (vl->valid) { - /* try to update a known volume in the cell VL databases by - * ID as the name may have changed */ - _debug("found in cache"); - ret = afs_vlocation_update_record(vl, key, &vldb); - } else { - /* try to look up an unknown volume in the cell VL databases by - * name */ - ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); - if (ret < 0) { - printk("kAFS: failed to locate '%s' in cell '%s'\n", - vl->vldb.name, vl->cell->name); - return ret; - } - } - - afs_vlocation_apply_update(vl, &vldb); - _leave(" = 0"); - return 0; -} - -/* - * queue a vlocation record for updates - */ -static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) -{ - struct afs_vlocation *xvl; - - /* wait at least 10 minutes before updating... */ - vl->update_at = ktime_get_real_seconds() + - afs_vlocation_update_timeout; - - spin_lock(&afs_vlocation_updates_lock); - - if (!list_empty(&afs_vlocation_updates)) { - /* ... but wait at least 1 second more than the newest record - * already queued so that we don't spam the VL server suddenly - * with lots of requests - */ - xvl = list_entry(afs_vlocation_updates.prev, - struct afs_vlocation, update); - if (vl->update_at <= xvl->update_at) - vl->update_at = xvl->update_at + 1; - } else { - queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, - afs_vlocation_update_timeout * HZ); - } - - list_add_tail(&vl->update, &afs_vlocation_updates); - spin_unlock(&afs_vlocation_updates_lock); -} - -/* - * lookup volume location - * - iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - * - lookup in the local cache if not able to find on the VL server - * - insert/update in the local cache if did get a VL response - */ -struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, - struct key *key, - const char *name, - size_t namesz) -{ - struct afs_vlocation *vl; - int ret; - - _enter("{%s},{%x},%*.*s,%zu", - cell->name, key_serial(key), - (int) namesz, (int) namesz, name, namesz); - - if (namesz >= sizeof(vl->vldb.name)) { - _leave(" = -ENAMETOOLONG"); - return ERR_PTR(-ENAMETOOLONG); - } - - /* see if we have an in-memory copy first */ - down_write(&cell->vl_sem); - spin_lock(&cell->vl_lock); - list_for_each_entry(vl, &cell->vl_list, link) { - if (vl->vldb.name[namesz] != '\0') - continue; - if (memcmp(vl->vldb.name, name, namesz) == 0) - goto found_in_memory; - } - spin_unlock(&cell->vl_lock); - - /* not in the cell's in-memory lists - create a new record */ - vl = afs_vlocation_alloc(cell, name, namesz); - if (!vl) { - up_write(&cell->vl_sem); - return ERR_PTR(-ENOMEM); - } - - afs_get_cell(cell); - - list_add_tail(&vl->link, &cell->vl_list); - vl->state = AFS_VL_CREATING; - up_write(&cell->vl_sem); - -fill_in_record: - ret = afs_vlocation_fill_in_record(vl, key); - if (ret < 0) - goto error_abandon; - spin_lock(&vl->lock); - vl->state = AFS_VL_VALID; - spin_unlock(&vl->lock); - wake_up(&vl->waitq); - - /* update volume entry in local cache */ -#ifdef CONFIG_AFS_FSCACHE - fscache_update_cookie(vl->cache); -#endif - - /* schedule for regular updates */ - afs_vlocation_queue_for_updates(vl); - goto success; - -found_in_memory: - /* found in memory */ - _debug("found in memory"); - atomic_inc(&vl->usage); - spin_unlock(&cell->vl_lock); - if (!list_empty(&vl->grave)) { - spin_lock(&afs_vlocation_graveyard_lock); - list_del_init(&vl->grave); - spin_unlock(&afs_vlocation_graveyard_lock); - } - up_write(&cell->vl_sem); - - /* see if it was an abandoned record that we might try filling in */ - spin_lock(&vl->lock); - while (vl->state != AFS_VL_VALID) { - afs_vlocation_state_t state = vl->state; - - _debug("invalid [state %d]", state); - - if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { - vl->state = AFS_VL_CREATING; - spin_unlock(&vl->lock); - goto fill_in_record; - } - - /* must now wait for creation or update by someone else to - * complete */ - _debug("wait"); - - spin_unlock(&vl->lock); - ret = wait_event_interruptible(vl->waitq, - vl->state == AFS_VL_NEW || - vl->state == AFS_VL_VALID || - vl->state == AFS_VL_NO_VOLUME); - if (ret < 0) - goto error; - spin_lock(&vl->lock); - } - spin_unlock(&vl->lock); - -success: - _leave(" = %p", vl); - return vl; - -error_abandon: - spin_lock(&vl->lock); - vl->state = AFS_VL_NEW; - spin_unlock(&vl->lock); - wake_up(&vl->waitq); -error: - ASSERT(vl != NULL); - afs_put_vlocation(vl); - _leave(" = %d", ret); - return ERR_PTR(ret); -} - -/* - * finish using a volume location record - */ -void afs_put_vlocation(struct afs_vlocation *vl) -{ - if (!vl) - return; - - _enter("%s", vl->vldb.name); - - ASSERTCMP(atomic_read(&vl->usage), >, 0); - - if (likely(!atomic_dec_and_test(&vl->usage))) { - _leave(""); - return; - } - - spin_lock(&afs_vlocation_graveyard_lock); - if (atomic_read(&vl->usage) == 0) { - _debug("buried"); - list_move_tail(&vl->grave, &afs_vlocation_graveyard); - vl->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &afs_vlocation_reap, - afs_vlocation_timeout * HZ); - - /* suspend updates on this record */ - if (!list_empty(&vl->update)) { - spin_lock(&afs_vlocation_updates_lock); - list_del_init(&vl->update); - spin_unlock(&afs_vlocation_updates_lock); - } - } - spin_unlock(&afs_vlocation_graveyard_lock); - _leave(" [killed?]"); -} - -/* - * destroy a dead volume location record - */ -static void afs_vlocation_destroy(struct afs_vlocation *vl) -{ - _enter("%p", vl); - -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(vl->cache, 0); -#endif - afs_put_cell(vl->cell); - kfree(vl); -} - -/* - * reap dead volume location records - */ -static void afs_vlocation_reaper(struct work_struct *work) -{ - LIST_HEAD(corpses); - struct afs_vlocation *vl; - unsigned long delay, expiry; - time64_t now; - - _enter(""); - - now = ktime_get_real_seconds(); - spin_lock(&afs_vlocation_graveyard_lock); - - while (!list_empty(&afs_vlocation_graveyard)) { - vl = list_entry(afs_vlocation_graveyard.next, - struct afs_vlocation, grave); - - _debug("check %p", vl); - - /* the queue is ordered most dead first */ - expiry = vl->time_of_death + afs_vlocation_timeout; - if (expiry > now) { - delay = (expiry - now) * HZ; - _debug("delay %lu", delay); - mod_delayed_work(afs_wq, &afs_vlocation_reap, delay); - break; - } - - spin_lock(&vl->cell->vl_lock); - if (atomic_read(&vl->usage) > 0) { - _debug("no reap"); - list_del_init(&vl->grave); - } else { - _debug("reap"); - list_move_tail(&vl->grave, &corpses); - list_del_init(&vl->link); - } - spin_unlock(&vl->cell->vl_lock); - } - - spin_unlock(&afs_vlocation_graveyard_lock); - - /* now reap the corpses we've extracted */ - while (!list_empty(&corpses)) { - vl = list_entry(corpses.next, struct afs_vlocation, grave); - list_del(&vl->grave); - afs_vlocation_destroy(vl); - } - - _leave(""); -} - -/* - * initialise the VL update process - */ -int __init afs_vlocation_update_init(void) -{ - afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated", - WQ_MEM_RECLAIM, 0); - return afs_vlocation_update_worker ? 0 : -ENOMEM; -} - -/* - * discard all the volume location records for rmmod - */ -void afs_vlocation_purge(void) -{ - afs_vlocation_timeout = 0; - - spin_lock(&afs_vlocation_updates_lock); - list_del_init(&afs_vlocation_updates); - spin_unlock(&afs_vlocation_updates_lock); - mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0); - destroy_workqueue(afs_vlocation_update_worker); - - mod_delayed_work(afs_wq, &afs_vlocation_reap, 0); -} - -/* - * update a volume location - */ -static void afs_vlocation_updater(struct work_struct *work) -{ - struct afs_cache_vlocation vldb; - struct afs_vlocation *vl, *xvl; - time64_t now; - long timeout; - int ret; - - _enter(""); - - now = ktime_get_real_seconds(); - - /* find a record to update */ - spin_lock(&afs_vlocation_updates_lock); - for (;;) { - if (list_empty(&afs_vlocation_updates)) { - spin_unlock(&afs_vlocation_updates_lock); - _leave(" [nothing]"); - return; - } - - vl = list_entry(afs_vlocation_updates.next, - struct afs_vlocation, update); - if (atomic_read(&vl->usage) > 0) - break; - list_del_init(&vl->update); - } - - timeout = vl->update_at - now; - if (timeout > 0) { - queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, timeout * HZ); - spin_unlock(&afs_vlocation_updates_lock); - _leave(" [nothing]"); - return; - } - - list_del_init(&vl->update); - atomic_inc(&vl->usage); - spin_unlock(&afs_vlocation_updates_lock); - - /* we can now perform the update */ - _debug("update %s", vl->vldb.name); - vl->state = AFS_VL_UPDATING; - vl->upd_rej_cnt = 0; - vl->upd_busy_cnt = 0; - - ret = afs_vlocation_update_record(vl, NULL, &vldb); - spin_lock(&vl->lock); - switch (ret) { - case 0: - afs_vlocation_apply_update(vl, &vldb); - vl->state = AFS_VL_VALID; - break; - case -ENOMEDIUM: - vl->state = AFS_VL_VOLUME_DELETED; - break; - default: - vl->state = AFS_VL_UNCERTAIN; - break; - } - spin_unlock(&vl->lock); - wake_up(&vl->waitq); - - /* and then reschedule */ - _debug("reschedule"); - vl->update_at = ktime_get_real_seconds() + - afs_vlocation_update_timeout; - - spin_lock(&afs_vlocation_updates_lock); - - if (!list_empty(&afs_vlocation_updates)) { - /* next update in 10 minutes, but wait at least 1 second more - * than the newest record already queued so that we don't spam - * the VL server suddenly with lots of requests - */ - xvl = list_entry(afs_vlocation_updates.prev, - struct afs_vlocation, update); - if (vl->update_at <= xvl->update_at) - vl->update_at = xvl->update_at + 1; - xvl = list_entry(afs_vlocation_updates.next, - struct afs_vlocation, update); - timeout = xvl->update_at - now; - if (timeout < 0) - timeout = 0; - } else { - timeout = afs_vlocation_update_timeout; - } - - ASSERT(list_empty(&vl->update)); - - list_add_tail(&vl->update, &afs_vlocation_updates); - - _debug("timeout %ld", timeout); - queue_delayed_work(afs_vlocation_update_worker, - &afs_vlocation_update, timeout * HZ); - spin_unlock(&afs_vlocation_updates_lock); - afs_put_vlocation(vl); -} diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c deleted file mode 100644 index dcb956143c86..000000000000 --- a/fs/afs/vnode.c +++ /dev/null @@ -1,1025 +0,0 @@ -/* AFS vnode management - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include "internal.h" - -#if 0 -static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent, - int depth, char lr) -{ - struct afs_vnode *vnode; - bool bad = false; - - if (!node) - return false; - - if (node->rb_left) - bad = dump_tree_aux(node->rb_left, node, depth + 2, '/'); - - vnode = rb_entry(node, struct afs_vnode, cb_promise); - _debug("%c %*.*s%c%p {%d}", - rb_is_red(node) ? 'R' : 'B', - depth, depth, "", lr, - vnode, vnode->cb_expires_at); - if (rb_parent(node) != parent) { - printk("BAD: %p != %p\n", rb_parent(node), parent); - bad = true; - } - - if (node->rb_right) - bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\'); - - return bad; -} - -static noinline void dump_tree(const char *name, struct afs_server *server) -{ - _enter("%s", name); - if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-')) - BUG(); -} -#endif - -/* - * insert a vnode into the backing server's vnode tree - */ -static void afs_install_vnode(struct afs_vnode *vnode, - struct afs_server *server) -{ - struct afs_server *old_server = vnode->server; - struct afs_vnode *xvnode; - struct rb_node *parent, **p; - - _enter("%p,%p", vnode, server); - - if (old_server) { - spin_lock(&old_server->fs_lock); - rb_erase(&vnode->server_rb, &old_server->fs_vnodes); - spin_unlock(&old_server->fs_lock); - } - - afs_get_server(server); - vnode->server = server; - afs_put_server(old_server); - - /* insert into the server's vnode tree in FID order */ - spin_lock(&server->fs_lock); - - parent = NULL; - p = &server->fs_vnodes.rb_node; - while (*p) { - parent = *p; - xvnode = rb_entry(parent, struct afs_vnode, server_rb); - if (vnode->fid.vid < xvnode->fid.vid) - p = &(*p)->rb_left; - else if (vnode->fid.vid > xvnode->fid.vid) - p = &(*p)->rb_right; - else if (vnode->fid.vnode < xvnode->fid.vnode) - p = &(*p)->rb_left; - else if (vnode->fid.vnode > xvnode->fid.vnode) - p = &(*p)->rb_right; - else if (vnode->fid.unique < xvnode->fid.unique) - p = &(*p)->rb_left; - else if (vnode->fid.unique > xvnode->fid.unique) - p = &(*p)->rb_right; - else - BUG(); /* can't happen unless afs_iget() malfunctions */ - } - - rb_link_node(&vnode->server_rb, parent, p); - rb_insert_color(&vnode->server_rb, &server->fs_vnodes); - - spin_unlock(&server->fs_lock); - _leave(""); -} - -/* - * insert a vnode into the promising server's update/expiration tree - * - caller must hold vnode->lock - */ -static void afs_vnode_note_promise(struct afs_vnode *vnode, - struct afs_server *server) -{ - struct afs_server *old_server; - struct afs_vnode *xvnode; - struct rb_node *parent, **p; - - _enter("%p,%p", vnode, server); - - ASSERT(server != NULL); - - old_server = vnode->server; - if (vnode->cb_promised) { - if (server == old_server && - vnode->cb_expires == vnode->cb_expires_at) { - _leave(" [no change]"); - return; - } - - spin_lock(&old_server->cb_lock); - if (vnode->cb_promised) { - _debug("delete"); - rb_erase(&vnode->cb_promise, &old_server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&old_server->cb_lock); - } - - if (vnode->server != server) - afs_install_vnode(vnode, server); - - vnode->cb_expires_at = vnode->cb_expires; - _debug("PROMISE on %p {%lu}", - vnode, (unsigned long) vnode->cb_expires_at); - - /* abuse an RB-tree to hold the expiration order (we may have multiple - * items with the same expiration time) */ - spin_lock(&server->cb_lock); - - parent = NULL; - p = &server->cb_promises.rb_node; - while (*p) { - parent = *p; - xvnode = rb_entry(parent, struct afs_vnode, cb_promise); - if (vnode->cb_expires_at < xvnode->cb_expires_at) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&vnode->cb_promise, parent, p); - rb_insert_color(&vnode->cb_promise, &server->cb_promises); - vnode->cb_promised = true; - - spin_unlock(&server->cb_lock); - _leave(""); -} - -/* - * handle remote file deletion by discarding the callback promise - */ -static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) -{ - struct afs_server *server; - - _enter("{%p}", vnode->server); - - set_bit(AFS_VNODE_DELETED, &vnode->flags); - - server = vnode->server; - if (server) { - if (vnode->cb_promised) { - spin_lock(&server->cb_lock); - if (vnode->cb_promised) { - rb_erase(&vnode->cb_promise, - &server->cb_promises); - vnode->cb_promised = false; - } - spin_unlock(&server->cb_lock); - } - - spin_lock(&server->fs_lock); - rb_erase(&vnode->server_rb, &server->fs_vnodes); - spin_unlock(&server->fs_lock); - - vnode->server = NULL; - afs_put_server(server); - } else { - ASSERT(!vnode->cb_promised); - } - - _leave(""); -} - -/* - * finish off updating the recorded status of a file after a successful - * operation completion - * - starts callback expiry timer - * - adds to server's callback list - */ -void afs_vnode_finalise_status_update(struct afs_vnode *vnode, - struct afs_server *server) -{ - struct afs_server *oldserver = NULL; - - _enter("%p,%p", vnode, server); - - spin_lock(&vnode->lock); - clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - afs_vnode_note_promise(vnode, server); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - - wake_up_all(&vnode->update_waitq); - afs_put_server(oldserver); - _leave(""); -} - -/* - * finish off updating the recorded status of a file after an operation failed - */ -static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret) -{ - _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret); - - spin_lock(&vnode->lock); - - clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); - - if (ret == -ENOENT) { - /* the file was deleted on the server */ - _debug("got NOENT from server - marking file deleted"); - afs_vnode_deleted_remotely(vnode); - } - - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - - wake_up_all(&vnode->update_waitq); - _leave(""); -} - -/* - * fetch file status from the volume - * - don't issue a fetch if: - * - the changed bit is not set and there's a valid callback - * - there are any outstanding ops that will fetch the status - * - TODO implement local caching - */ -int afs_vnode_fetch_status(struct afs_vnode *vnode, - struct afs_vnode *auth_vnode, struct key *key) -{ - struct afs_server *server; - unsigned long acl_order; - int ret; - - DECLARE_WAITQUEUE(myself, current); - - _enter("%s,{%x:%u.%u}", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - - if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && - vnode->cb_promised) { - _leave(" [unchanged]"); - return 0; - } - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - _leave(" [deleted]"); - return -ENOENT; - } - - acl_order = 0; - if (auth_vnode) - acl_order = auth_vnode->acl_order; - - spin_lock(&vnode->lock); - - if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && - vnode->cb_promised) { - spin_unlock(&vnode->lock); - _leave(" [unchanged]"); - return 0; - } - - ASSERTCMP(vnode->update_cnt, >=, 0); - - if (vnode->update_cnt > 0) { - /* someone else started a fetch */ - _debug("wait on fetch %d", vnode->update_cnt); - - set_current_state(TASK_UNINTERRUPTIBLE); - ASSERT(myself.func != NULL); - add_wait_queue(&vnode->update_waitq, &myself); - - /* wait for the status to be updated */ - for (;;) { - if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) - break; - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - break; - - /* check to see if it got updated and invalidated all - * before we saw it */ - if (vnode->update_cnt == 0) { - remove_wait_queue(&vnode->update_waitq, - &myself); - set_current_state(TASK_RUNNING); - goto get_anyway; - } - - spin_unlock(&vnode->lock); - - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - - spin_lock(&vnode->lock); - } - - remove_wait_queue(&vnode->update_waitq, &myself); - spin_unlock(&vnode->lock); - set_current_state(TASK_RUNNING); - - return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? - -ENOENT : 0; - } - -get_anyway: - /* okay... we're going to have to initiate the op */ - vnode->update_cnt++; - - spin_unlock(&vnode->lock); - - /* merge AFS status fetches and clear outstanding callback on this - * vnode */ - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %p{%08x}", - server, ntohl(server->addr.s_addr)); - - ret = afs_fs_fetch_file_status(server, key, vnode, NULL, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - _debug("adjust"); - if (auth_vnode) - afs_cache_permit(vnode, key, acl_order); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - } else { - _debug("failed [%d]", ret); - afs_vnode_status_update_failed(vnode, ret); - } - - ASSERTCMP(vnode->update_cnt, >=, 0); - - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); -} - -/* - * fetch file data from the volume - * - TODO implement caching - */ -int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, - struct afs_read *desc) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x,,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - /* merge in AFS status fetches and clear outstanding callback on this - * vnode */ - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_fetch_data(server, key, vnode, desc, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - } else { - afs_vnode_status_update_failed(vnode, ret); - } - - _leave(" = %d", ret); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - return PTR_ERR(server); -} - -/* - * make a file or a directory - */ -int afs_vnode_create(struct afs_vnode *vnode, struct key *key, - const char *name, umode_t mode, struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb, struct afs_server **_server) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x,%s,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're creating in */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_create(server, key, vnode, name, mode, newfid, - newstatus, newcb, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - *_server = server; - } else { - afs_vnode_status_update_failed(vnode, ret); - *_server = NULL; - } - - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); -} - -/* - * remove a file or directory - */ -int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, - bool isdir) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x,%s", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're removing from */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_remove(server, key, vnode, name, isdir, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - } else { - afs_vnode_status_update_failed(vnode, ret); - } - - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); -} - -/* - * create a hard link - */ -int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, - struct key *key, const char *name) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", - dvnode->volume->vlocation->vldb.name, - dvnode->fid.vid, - dvnode->fid.vnode, - dvnode->fid.unique, - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're removing from */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - spin_lock(&dvnode->lock); - dvnode->update_cnt++; - spin_unlock(&dvnode->lock); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(dvnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_link(server, key, dvnode, vnode, name, - false); - - } while (!afs_volume_release_fileserver(dvnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_vnode_finalise_status_update(dvnode, server); - afs_put_server(server); - } else { - afs_vnode_status_update_failed(vnode, ret); - afs_vnode_status_update_failed(dvnode, ret); - } - - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - spin_lock(&dvnode->lock); - dvnode->update_cnt--; - ASSERTCMP(dvnode->update_cnt, >=, 0); - spin_unlock(&dvnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); -} - -/* - * create a symbolic link - */ -int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, - const char *name, const char *content, - struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_server **_server) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x,%s,%s,,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name, content); - - /* this op will fetch the status on the directory we're creating in */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_symlink(server, key, vnode, name, content, - newfid, newstatus, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - *_server = server; - } else { - afs_vnode_status_update_failed(vnode, ret); - *_server = NULL; - } - - _leave(" = %d [cnt %d]", ret, vnode->update_cnt); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt); - return PTR_ERR(server); -} - -/* - * rename a file - */ -int afs_vnode_rename(struct afs_vnode *orig_dvnode, - struct afs_vnode *new_dvnode, - struct key *key, - const char *orig_name, - const char *new_name) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", - orig_dvnode->volume->vlocation->vldb.name, - orig_dvnode->fid.vid, - orig_dvnode->fid.vnode, - orig_dvnode->fid.unique, - new_dvnode->volume->vlocation->vldb.name, - new_dvnode->fid.vid, - new_dvnode->fid.vnode, - new_dvnode->fid.unique, - key_serial(key), - orig_name, - new_name); - - /* this op will fetch the status on both the directories we're dealing - * with */ - spin_lock(&orig_dvnode->lock); - orig_dvnode->update_cnt++; - spin_unlock(&orig_dvnode->lock); - if (new_dvnode != orig_dvnode) { - spin_lock(&new_dvnode->lock); - new_dvnode->update_cnt++; - spin_unlock(&new_dvnode->lock); - } - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(orig_dvnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_rename(server, key, orig_dvnode, orig_name, - new_dvnode, new_name, false); - - } while (!afs_volume_release_fileserver(orig_dvnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(orig_dvnode, server); - if (new_dvnode != orig_dvnode) - afs_vnode_finalise_status_update(new_dvnode, server); - afs_put_server(server); - } else { - afs_vnode_status_update_failed(orig_dvnode, ret); - if (new_dvnode != orig_dvnode) - afs_vnode_status_update_failed(new_dvnode, ret); - } - - _leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt); - return ret; - -no_server: - spin_lock(&orig_dvnode->lock); - orig_dvnode->update_cnt--; - ASSERTCMP(orig_dvnode->update_cnt, >=, 0); - spin_unlock(&orig_dvnode->lock); - if (new_dvnode != orig_dvnode) { - spin_lock(&new_dvnode->lock); - new_dvnode->update_cnt--; - ASSERTCMP(new_dvnode->update_cnt, >=, 0); - spin_unlock(&new_dvnode->lock); - } - _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); - return PTR_ERR(server); -} - -/* - * write to a file - */ -int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) -{ - struct afs_server *server; - struct afs_vnode *vnode = wb->vnode; - int ret; - - _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(wb->key), - first, last, offset, to); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_store_data(server, wb, first, last, offset, to, - false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - } else { - afs_vnode_status_update_failed(vnode, ret); - } - - _leave(" = %d", ret); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - return PTR_ERR(server); -} - -/* - * set the attributes on a file - */ -int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, - struct iattr *attr) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_setattr(server, key, vnode, attr, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) { - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(server); - } else { - afs_vnode_status_update_failed(vnode, ret); - } - - _leave(" = %d", ret); - return ret; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - return PTR_ERR(server); -} - -/* - * get the status of a volume - */ -int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, - struct afs_volume_status *vs) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_get_volume_status(server, key, vnode, vs, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); -} - -/* - * get a lock on a file - */ -int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, - afs_lock_type_t type) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x,%u", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), type); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_set_lock(server, key, vnode, type, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); -} - -/* - * extend a lock on a file - */ -int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_extend_lock(server, key, vnode, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); -} - -/* - * release a lock on a file - */ -int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) -{ - struct afs_server *server; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - do { - /* pick a server to query */ - server = afs_volume_pick_fileserver(vnode); - if (IS_ERR(server)) - goto no_server; - - _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); - - ret = afs_fs_release_lock(server, key, vnode, false); - - } while (!afs_volume_release_fileserver(vnode, server, ret)); - - /* adjust the flags */ - if (ret == 0) - afs_put_server(server); - - _leave(" = %d", ret); - return ret; - -no_server: - return PTR_ERR(server); -} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index db73d6dad02b..684c48293353 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -10,19 +10,167 @@ */ #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> #include <linux/slab.h> -#include <linux/fs.h> -#include <linux/pagemap.h> -#include <linux/sched.h> #include "internal.h" -static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; +unsigned __read_mostly afs_volume_gc_delay = 10; +unsigned __read_mostly afs_volume_record_life = 60 * 60; + +static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; + +/* + * Allocate a volume record and load it up from a vldb record. + */ +static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, + struct afs_vldb_entry *vldb, + unsigned long type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + struct afs_volume *volume; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); + if (!volume) + goto error_0; + + volume->vid = vldb->vid[params->type]; + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + volume->cell = afs_get_cell(params->cell); + volume->type = params->type; + volume->type_force = params->force; + volume->name_len = vldb->name_len; + + atomic_set(&volume->usage, 1); + INIT_LIST_HEAD(&volume->proc_link); + rwlock_init(&volume->servers_lock); + memcpy(volume->name, vldb->name, vldb->name_len + 1); + + slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); + if (IS_ERR(slist)) { + ret = PTR_ERR(slist); + goto error_1; + } + + refcount_set(&slist->usage, 1); + volume->servers = slist; + + /* Make sure a records exists for each server this volume occupies. */ + for (i = 0; i < nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(params->cell, params->key, + &vldb->fs_server[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT) + continue; + goto error_2; + } + + /* Insertion-sort by server pointer */ + for (j = 0; j < slist->nr_servers; j++) + if (slist->servers[j].server >= server) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(params->net, server); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return volume; + +error_2: + afs_put_serverlist(params->net, slist); +error_1: + kfree(volume); +error_0: + return ERR_PTR(ret); +} /* - * lookup a volume by name - * - this can be one of the following: + * Look up a VLDB record for a volume. + */ +static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, + struct key *key, + const char *volname, + size_t volnamesz) +{ + struct afs_addr_cursor ac; + struct afs_vldb_entry *vldb; + int ret; + + ret = afs_set_vl_cursor(&ac, cell); + if (ret < 0) + return ERR_PTR(ret); + + while (afs_iterate_addresses(&ac)) { + if (!test_bit(ac.index, &ac.alist->probed)) { + ret = afs_vl_get_capabilities(cell->net, &ac, key); + switch (ret) { + case VL_SERVICE: + clear_bit(ac.index, &ac.alist->yfs); + set_bit(ac.index, &ac.alist->probed); + ac.addr->srx_service = ret; + break; + case YFS_VL_SERVICE: + set_bit(ac.index, &ac.alist->yfs); + set_bit(ac.index, &ac.alist->probed); + ac.addr->srx_service = ret; + break; + } + } + + vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, + volname, volnamesz); + switch (ac.error) { + case 0: + afs_end_cursor(&ac); + return vldb; + case -ECONNABORTED: + ac.error = afs_abort_to_error(ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + break; + default: + ac.error = -EIO; + goto error; + } + } + +error: + return ERR_PTR(afs_end_cursor(&ac)); +} + +/* + * Look up a volume in the VL server and create a candidate volume record for + * it. + * + * The volume name can be one of the following: * "%[cell:]volume[.]" R/W volume * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), * or R/W (rwparent=1) volume @@ -42,353 +190,218 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; * - Rule 3: If parent volume is R/W, then only mount R/W volume unless * explicitly told otherwise */ -struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) +struct afs_volume *afs_create_volume(struct afs_mount_params *params) { - struct afs_vlocation *vlocation = NULL; - struct afs_volume *volume = NULL; - struct afs_server *server = NULL; - char srvtmask; - int ret, loop; - - _enter("{%*.*s,%d}", - params->volnamesz, params->volnamesz, params->volname, params->rwpath); - - /* lookup the volume location record */ - vlocation = afs_vlocation_lookup(params->cell, params->key, - params->volname, params->volnamesz); - if (IS_ERR(vlocation)) { - ret = PTR_ERR(vlocation); - vlocation = NULL; - goto error; - } + struct afs_vldb_entry *vldb; + struct afs_volume *volume; + unsigned long type_mask = 1UL << params->type; - /* make the final decision on the type we want */ - ret = -ENOMEDIUM; - if (params->force && !(vlocation->vldb.vidmask & (1 << params->type))) - goto error; + vldb = afs_vl_lookup_vldb(params->cell, params->key, + params->volname, params->volnamesz); + if (IS_ERR(vldb)) + return ERR_CAST(vldb); - srvtmask = 0; - for (loop = 0; loop < vlocation->vldb.nservers; loop++) - srvtmask |= vlocation->vldb.srvtmask[loop]; + if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { + volume = ERR_PTR(vldb->error); + goto error; + } + /* Make the final decision on the type we want */ + volume = ERR_PTR(-ENOMEDIUM); if (params->force) { - if (!(srvtmask & (1 << params->type))) + if (!(vldb->flags & type_mask)) goto error; - } else if (srvtmask & AFS_VOL_VTM_RO) { + } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { params->type = AFSVL_ROVOL; - } else if (srvtmask & AFS_VOL_VTM_RW) { + } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { params->type = AFSVL_RWVOL; } else { goto error; } - down_write(¶ms->cell->vl_sem); + type_mask = 1UL << params->type; + volume = afs_alloc_volume(params, vldb, type_mask); - /* is the volume already active? */ - if (vlocation->vols[params->type]) { - /* yes - re-use it */ - volume = vlocation->vols[params->type]; - afs_get_volume(volume); - goto success; - } +error: + kfree(vldb); + return volume; +} - /* create a new volume record */ - _debug("creating new volume record"); +/* + * Destroy a volume record + */ +static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) +{ + _enter("%p", volume); - ret = -ENOMEM; - volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); - if (!volume) - goto error_up; +#ifdef CONFIG_AFS_FSCACHE + ASSERTCMP(volume->cache, ==, NULL); +#endif - atomic_set(&volume->usage, 1); - volume->type = params->type; - volume->type_force = params->force; - volume->cell = params->cell; - volume->vid = vlocation->vldb.vid[params->type]; - - init_rwsem(&volume->server_sem); - - /* look up all the applicable server records */ - for (loop = 0; loop < 8; loop++) { - if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { - server = afs_lookup_server( - volume->cell, &vlocation->vldb.servers[loop]); - if (IS_ERR(server)) { - ret = PTR_ERR(server); - goto error_discard; - } + afs_put_serverlist(net, volume->servers); + afs_put_cell(net, volume->cell); + kfree(volume); - volume->servers[volume->nservers] = server; - volume->nservers++; - } + _leave(" [destroyed]"); +} + +/* + * Drop a reference on a volume record. + */ +void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) +{ + if (volume) { + _enter("%s", volume->name); + + if (atomic_dec_and_test(&volume->usage)) + afs_destroy_volume(cell->net, volume); } +} - /* attach the cache and volume location */ +/* + * Activate a volume. + */ +void afs_activate_volume(struct afs_volume *volume) +{ #ifdef CONFIG_AFS_FSCACHE - volume->cache = fscache_acquire_cookie(vlocation->cache, + volume->cache = fscache_acquire_cookie(volume->cell->cache, &afs_volume_cache_index_def, volume, true); #endif - afs_get_vlocation(vlocation); - volume->vlocation = vlocation; - - vlocation->vols[volume->type] = volume; - -success: - _debug("kAFS selected %s volume %08x", - afs_voltypes[volume->type], volume->vid); - up_write(¶ms->cell->vl_sem); - afs_put_vlocation(vlocation); - _leave(" = %p", volume); - return volume; - - /* clean up */ -error_up: - up_write(¶ms->cell->vl_sem); -error: - afs_put_vlocation(vlocation); - _leave(" = %d", ret); - return ERR_PTR(ret); - -error_discard: - up_write(¶ms->cell->vl_sem); - - for (loop = volume->nservers - 1; loop >= 0; loop--) - afs_put_server(volume->servers[loop]); - kfree(volume); - goto error; + write_lock(&volume->cell->proc_lock); + list_add_tail(&volume->proc_link, &volume->cell->proc_volumes); + write_unlock(&volume->cell->proc_lock); } /* - * destroy a volume record + * Deactivate a volume. */ -void afs_put_volume(struct afs_volume *volume) +void afs_deactivate_volume(struct afs_volume *volume) { - struct afs_vlocation *vlocation; - int loop; - - if (!volume) - return; - - _enter("%p", volume); + _enter("%s", volume->name); - ASSERTCMP(atomic_read(&volume->usage), >, 0); + write_lock(&volume->cell->proc_lock); + list_del_init(&volume->proc_link); + write_unlock(&volume->cell->proc_lock); - vlocation = volume->vlocation; - - /* to prevent a race, the decrement and the dequeue must be effectively - * atomic */ - down_write(&vlocation->cell->vl_sem); - - if (likely(!atomic_dec_and_test(&volume->usage))) { - up_write(&vlocation->cell->vl_sem); - _leave(""); - return; - } - - vlocation->vols[volume->type] = NULL; - - up_write(&vlocation->cell->vl_sem); - - /* finish cleaning up the volume */ #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(volume->cache, 0); + fscache_relinquish_cookie(volume->cache, + test_bit(AFS_VOLUME_DELETED, &volume->flags)); + volume->cache = NULL; #endif - afs_put_vlocation(vlocation); - - for (loop = volume->nservers - 1; loop >= 0; loop--) - afs_put_server(volume->servers[loop]); - - kfree(volume); - _leave(" [destroyed]"); + _leave(""); } /* - * pick a server to use to try accessing this volume - * - returns with an elevated usage count on the server chosen + * Query the VL service to update the volume status. */ -struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode) +static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { - struct afs_volume *volume = vnode->volume; - struct afs_server *server; - int ret, state, loop; + struct afs_server_list *new, *old, *discard; + struct afs_vldb_entry *vldb; + char idbuf[16]; + int ret, idsz; + + _enter(""); + + /* We look up an ID by passing it as a decimal string in the + * operation's name parameter. + */ + idsz = sprintf(idbuf, "%u", volume->vid); - _enter("%s", volume->vlocation->vldb.name); + vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); + if (IS_ERR(vldb)) { + ret = PTR_ERR(vldb); + goto error; + } - /* stick with the server we're already using if we can */ - if (vnode->server && vnode->server->fs_state == 0) { - afs_get_server(vnode->server); - _leave(" = %p [current]", vnode->server); - return vnode->server; + /* See if the volume got renamed. */ + if (vldb->name_len != volume->name_len || + memcmp(vldb->name, volume->name, vldb->name_len) != 0) { + /* TODO: Use RCU'd string. */ + memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); + volume->name_len = vldb->name_len; } - down_read(&volume->server_sem); + /* See if the volume's server list got updated. */ + new = afs_alloc_server_list(volume->cell, key, + vldb, (1 << volume->type)); + if (IS_ERR(new)) { + ret = PTR_ERR(new); + goto error_vldb; + } - /* handle the no-server case */ - if (volume->nservers == 0) { - ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; - up_read(&volume->server_sem); - _leave(" = %d [no servers]", ret); - return ERR_PTR(ret); + write_lock(&volume->servers_lock); + + discard = new; + old = volume->servers; + if (afs_annotate_server_list(new, old)) { + new->seq = volume->servers_seq + 1; + volume->servers = new; + smp_wmb(); + volume->servers_seq++; + discard = old; } - /* basically, just search the list for the first live server and use - * that */ + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + write_unlock(&volume->servers_lock); ret = 0; - for (loop = 0; loop < volume->nservers; loop++) { - server = volume->servers[loop]; - state = server->fs_state; - _debug("consider %d [%d]", loop, state); + afs_put_serverlist(volume->cell->net, discard); +error_vldb: + kfree(vldb); +error: + _leave(" = %d", ret); + return ret; +} - switch (state) { - /* found an apparently healthy server */ - case 0: - afs_get_server(server); - up_read(&volume->server_sem); - _leave(" = %p (picked %08x)", - server, ntohl(server->addr.s_addr)); - return server; +/* + * Make sure the volume record is up to date. + */ +int afs_check_volume_status(struct afs_volume *volume, struct key *key) +{ + time64_t now = ktime_get_real_seconds(); + int ret, retries = 0; - case -ENETUNREACH: - if (ret == 0) - ret = state; - break; + _enter(""); - case -EHOSTUNREACH: - if (ret == 0 || - ret == -ENETUNREACH) - ret = state; - break; + if (volume->update_at <= now) + set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); - case -ECONNREFUSED: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH) - ret = state; - break; +retry: + if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) && + !test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0"); + return 0; + } - default: - case -EREMOTEIO: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH || - ret == -ECONNREFUSED) - ret = state; - break; - } + if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { + ret = afs_update_volume_status(volume, key); + clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); + clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); + wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); + _leave(" = %d", ret); + return ret; } - /* no available servers - * - TODO: handle the no active servers case better - */ - up_read(&volume->server_sem); - _leave(" = %d", ret); - return ERR_PTR(ret); -} + if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0 [no wait]"); + return 0; + } -/* - * release a server after use - * - releases the ref on the server struct that was acquired by picking - * - records result of using a particular server to access a volume - * - return 0 to try again, 1 if okay or to issue error - * - the caller must release the server struct if result was 0 - */ -int afs_volume_release_fileserver(struct afs_vnode *vnode, - struct afs_server *server, - int result) -{ - struct afs_volume *volume = vnode->volume; - unsigned loop; - - _enter("%s,%08x,%d", - volume->vlocation->vldb.name, ntohl(server->addr.s_addr), - result); - - switch (result) { - /* success */ - case 0: - server->fs_act_jif = jiffies; - server->fs_state = 0; - _leave(""); - return 1; - - /* the fileserver denied all knowledge of the volume */ - case -ENOMEDIUM: - server->fs_act_jif = jiffies; - down_write(&volume->server_sem); - - /* firstly, find where the server is in the active list (if it - * is) */ - for (loop = 0; loop < volume->nservers; loop++) - if (volume->servers[loop] == server) - goto present; - - /* no longer there - may have been discarded by another op */ - goto try_next_server_upw; - - present: - volume->nservers--; - memmove(&volume->servers[loop], - &volume->servers[loop + 1], - sizeof(volume->servers[loop]) * - (volume->nservers - loop)); - volume->servers[volume->nservers] = NULL; - afs_put_server(server); - volume->rjservers++; - - if (volume->nservers > 0) - /* another server might acknowledge its existence */ - goto try_next_server_upw; - - /* handle the case where all the fileservers have rejected the - * volume - * - TODO: try asking the fileservers for volume information - * - TODO: contact the VL server again to see if the volume is - * no longer registered - */ - up_write(&volume->server_sem); - afs_put_server(server); - _leave(" [completely rejected]"); - return 1; - - /* problem reaching the server */ - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -ETIME: - case -ETIMEDOUT: - case -EREMOTEIO: - /* mark the server as dead - * TODO: vary dead timeout depending on error - */ - spin_lock(&server->fs_lock); - if (!server->fs_state) { - server->fs_dead_jif = jiffies + HZ * 10; - server->fs_state = result; - printk("kAFS: SERVER DEAD state=%d\n", result); - } - spin_unlock(&server->fs_lock); - goto try_next_server; - - /* miscellaneous error */ - default: - server->fs_act_jif = jiffies; - case -ENOMEM: - case -ENONET: - /* tell the caller to accept the result */ - afs_put_server(server); - _leave(" [local failure]"); - return 1; + ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + _leave(" = %d", ret); + return ret; } - /* tell the caller to loop around and try the next server */ -try_next_server_upw: - up_write(&volume->server_sem); -try_next_server: - afs_put_server(server); - _leave(" [try next server]"); - return 0; + retries++; + if (retries == 4) { + _leave(" = -ESTALE"); + return -ESTALE; + } + goto retry; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 11dd0526b96b..18e46e31523c 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -8,6 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + #include <linux/backing-dev.h> #include <linux/slab.h> #include <linux/fs.h> @@ -16,9 +17,6 @@ #include <linux/pagevec.h> #include "internal.h" -static int afs_write_back_from_locked_page(struct afs_writeback *wb, - struct page *page); - /* * mark a page as having been made dirty and thus needing writeback */ @@ -29,58 +27,6 @@ int afs_set_page_dirty(struct page *page) } /* - * unlink a writeback record because its usage has reached zero - * - must be called with the wb->vnode->writeback_lock held - */ -static void afs_unlink_writeback(struct afs_writeback *wb) -{ - struct afs_writeback *front; - struct afs_vnode *vnode = wb->vnode; - - list_del_init(&wb->link); - if (!list_empty(&vnode->writebacks)) { - /* if an fsync rises to the front of the queue then wake it - * up */ - front = list_entry(vnode->writebacks.next, - struct afs_writeback, link); - if (front->state == AFS_WBACK_SYNCING) { - _debug("wake up sync"); - front->state = AFS_WBACK_COMPLETE; - wake_up(&front->waitq); - } - } -} - -/* - * free a writeback record - */ -static void afs_free_writeback(struct afs_writeback *wb) -{ - _enter(""); - key_put(wb->key); - kfree(wb); -} - -/* - * dispose of a reference to a writeback record - */ -void afs_put_writeback(struct afs_writeback *wb) -{ - struct afs_vnode *vnode = wb->vnode; - - _enter("{%d}", wb->usage); - - spin_lock(&vnode->writeback_lock); - if (--wb->usage == 0) - afs_unlink_writeback(wb); - else - wb = NULL; - spin_unlock(&vnode->writeback_lock); - if (wb) - afs_free_writeback(wb); -} - -/* * partly or wholly fill a page that's under preparation for writing */ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, @@ -103,7 +49,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, req->pages[0] = page; get_page(page); - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { if (ret == -ENOENT) { @@ -125,42 +71,32 @@ int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct afs_writeback *candidate, *wb; struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; - struct key *key = file->private_data; - unsigned from = pos & (PAGE_SIZE - 1); - unsigned to = from + len; + struct key *key = afs_file_key(file); + unsigned long priv; + unsigned f, from = pos & (PAGE_SIZE - 1); + unsigned t, to = from + len; pgoff_t index = pos >> PAGE_SHIFT; int ret; _enter("{%x:%u},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); - candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); - if (!candidate) - return -ENOMEM; - candidate->vnode = vnode; - candidate->first = candidate->last = index; - candidate->offset_first = from; - candidate->to_last = to; - INIT_LIST_HEAD(&candidate->link); - candidate->usage = 1; - candidate->state = AFS_WBACK_PENDING; - init_waitqueue_head(&candidate->waitq); + /* We want to store information about how much of a page is altered in + * page->private. + */ + BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - kfree(candidate); + if (!page) return -ENOMEM; - } if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { unlock_page(page); put_page(page); - kfree(candidate); _leave(" = %d [prep]", ret); return ret; } @@ -171,79 +107,54 @@ int afs_write_begin(struct file *file, struct address_space *mapping, *pagep = page; try_again: - spin_lock(&vnode->writeback_lock); - - /* see if this page is already pending a writeback under a suitable key - * - if so we can just join onto that one */ - wb = (struct afs_writeback *) page_private(page); - if (wb) { - if (wb->key == key && wb->state == AFS_WBACK_PENDING) - goto subsume_in_current_wb; - goto flush_conflicting_wb; + /* See if this page is already partially written in a way that we can + * merge the new write with. + */ + t = f = 0; + if (PagePrivate(page)) { + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + ASSERTCMP(f, <=, t); } - if (index > 0) { - /* see if we can find an already pending writeback that we can - * append this page to */ - list_for_each_entry(wb, &vnode->writebacks, link) { - if (wb->last == index - 1 && wb->key == key && - wb->state == AFS_WBACK_PENDING) - goto append_to_previous_wb; - } + if (f != t) { + if (to < f || from > t) + goto flush_conflicting_write; + if (from < f) + f = from; + if (to > t) + t = to; + } else { + f = from; + t = to; } - list_add_tail(&candidate->link, &vnode->writebacks); - candidate->key = key_get(key); - spin_unlock(&vnode->writeback_lock); - SetPagePrivate(page); - set_page_private(page, (unsigned long) candidate); - _leave(" = 0 [new]"); - return 0; - -subsume_in_current_wb: - _debug("subsume"); - ASSERTRANGE(wb->first, <=, index, <=, wb->last); - if (index == wb->first && from < wb->offset_first) - wb->offset_first = from; - if (index == wb->last && to > wb->to_last) - wb->to_last = to; - spin_unlock(&vnode->writeback_lock); - kfree(candidate); - _leave(" = 0 [sub]"); - return 0; - -append_to_previous_wb: - _debug("append into %lx-%lx", wb->first, wb->last); - wb->usage++; - wb->last++; - wb->to_last = to; - spin_unlock(&vnode->writeback_lock); + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; + trace_afs_page_dirty(vnode, tracepoint_string("begin"), + page->index, priv); SetPagePrivate(page); - set_page_private(page, (unsigned long) wb); - kfree(candidate); - _leave(" = 0 [app]"); + set_page_private(page, priv); + _leave(" = 0"); return 0; - /* the page is currently bound to another context, so if it's dirty we - * need to flush it before we can use the new context */ -flush_conflicting_wb: + /* The previous write and this write aren't adjacent or overlapping, so + * flush the page out. + */ +flush_conflicting_write: _debug("flush conflict"); - if (wb->state == AFS_WBACK_PENDING) - wb->state = AFS_WBACK_CONFLICTING; - spin_unlock(&vnode->writeback_lock); - if (clear_page_dirty_for_io(page)) { - ret = afs_write_back_from_locked_page(wb, page); - if (ret < 0) { - afs_put_writeback(candidate); - _leave(" = %d", ret); - return ret; - } + ret = write_one_page(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; } - /* the page holds a ref on the writeback record */ - afs_put_writeback(wb); - set_page_private(page, 0); - ClearPagePrivate(page); + ret = lock_page_killable(page); + if (ret < 0) { + _leave(" = %d", ret); + return ret; + } goto try_again; } @@ -255,7 +166,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - struct key *key = file->private_data; + struct key *key = afs_file_key(file); loff_t i_size, maybe_i_size; int ret; @@ -266,11 +177,11 @@ int afs_write_end(struct file *file, struct address_space *mapping, i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) { - spin_lock(&vnode->writeback_lock); + spin_lock(&vnode->wb_lock); i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) i_size_write(&vnode->vfs_inode, maybe_i_size); - spin_unlock(&vnode->writeback_lock); + spin_unlock(&vnode->wb_lock); } if (!PageUptodate(page)) { @@ -299,9 +210,10 @@ int afs_write_end(struct file *file, struct address_space *mapping, /* * kill all the pages in the given range */ -static void afs_kill_pages(struct afs_vnode *vnode, bool error, +static void afs_kill_pages(struct address_space *mapping, pgoff_t first, pgoff_t last) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct pagevec pv; unsigned count, loop; @@ -316,37 +228,157 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, count = last - first + 1; if (count > PAGEVEC_SIZE) count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, - first, count, pv.pages); + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { struct page *page = pv.pages[loop]; ClearPageUptodate(page); - if (error) - SetPageError(page); - if (PageWriteback(page)) - end_page_writeback(page); + SetPageError(page); + end_page_writeback(page); if (page->index >= first) first = page->index + 1; + lock_page(page); + generic_error_remove_page(mapping, page); } __pagevec_release(&pv); - } while (first < last); + } while (first <= last); _leave(""); } /* - * synchronously write back the locked page and any subsequent non-locked dirty - * pages also covered by the same writeback record + * Redirty all the pages in a given range. + */ +static void afs_redirty_pages(struct writeback_control *wbc, + struct address_space *mapping, + pgoff_t first, pgoff_t last) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct pagevec pv; + unsigned count, loop; + + _enter("{%x:%u},%lx-%lx", + vnode->fid.vid, vnode->fid.vnode, first, last); + + pagevec_init(&pv); + + do { + _debug("redirty %lx-%lx", first, last); + + count = last - first + 1; + if (count > PAGEVEC_SIZE) + count = PAGEVEC_SIZE; + pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); + ASSERTCMP(pv.nr, ==, count); + + for (loop = 0; loop < count; loop++) { + struct page *page = pv.pages[loop]; + + redirty_page_for_writepage(wbc, page); + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; + } + + __pagevec_release(&pv); + } while (first <= last); + + _leave(""); +} + +/* + * write to a file + */ +static int afs_store_data(struct address_space *mapping, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct afs_fs_cursor fc; + struct afs_wb_key *wbk = NULL; + struct list_head *p; + int ret = -ENOKEY, ret2; + + _enter("%s{%x:%u.%u},%lx,%lx,%x,%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + first, last, offset, to); + + spin_lock(&vnode->wb_lock); + p = vnode->wb_keys.next; + + /* Iterate through the list looking for a valid key to use. */ +try_next_key: + while (p != &vnode->wb_keys) { + wbk = list_entry(p, struct afs_wb_key, vnode_link); + _debug("wbk %u", key_serial(wbk->key)); + ret2 = key_validate(wbk->key); + if (ret2 == 0) + goto found_key; + if (ret == -ENOKEY) + ret = ret2; + p = p->next; + } + + spin_unlock(&vnode->wb_lock); + afs_put_wb_key(wbk); + _leave(" = %d [no keys]", ret); + return ret; + +found_key: + refcount_inc(&wbk->usage); + spin_unlock(&vnode->wb_lock); + + _debug("USE WB KEY %u", key_serial(wbk->key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_store_data(&fc, mapping, first, last, offset, to); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + switch (ret) { + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + _debug("next"); + spin_lock(&vnode->wb_lock); + p = wbk->vnode_link.next; + afs_put_wb_key(wbk); + goto try_next_key; + } + + afs_put_wb_key(wbk); + _leave(" = %d", ret); + return ret; +} + +/* + * Synchronously write back the locked page and any subsequent non-locked dirty + * pages. */ -static int afs_write_back_from_locked_page(struct afs_writeback *wb, - struct page *primary_page) +static int afs_write_back_from_locked_page(struct address_space *mapping, + struct writeback_control *wbc, + struct page *primary_page, + pgoff_t final_page) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct page *pages[8], *page; - unsigned long count; - unsigned n, offset, to; + unsigned long count, priv; + unsigned n, offset, to, f, t; pgoff_t start, first, last; int loop, ret; @@ -356,20 +388,33 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, if (test_set_page_writeback(primary_page)) BUG(); - /* find all consecutive lockable dirty pages, stopping when we find a - * page that is not immediately lockable, is not dirty or is missing, - * or we reach the end of the range */ + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. + */ start = primary_page->index; - if (start >= wb->last) + priv = page_private(primary_page); + offset = priv & AFS_PRIV_MAX; + to = priv >> AFS_PRIV_SHIFT; + trace_afs_page_dirty(vnode, tracepoint_string("store"), + primary_page->index, priv); + + WARN_ON(offset == to); + if (offset == to) + trace_afs_page_dirty(vnode, tracepoint_string("WARN"), + primary_page->index, priv); + + if (start >= final_page || to < PAGE_SIZE) goto no_more; + start++; do { _debug("more %lx [%lx]", start, count); - n = wb->last - start + 1; + n = final_page - start + 1; if (n > ARRAY_SIZE(pages)) n = ARRAY_SIZE(pages); - n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, - start, n, pages); + n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages); _debug("fgpc %u", n); if (n == 0) goto no_more; @@ -381,16 +426,30 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, } for (loop = 0; loop < n; loop++) { + if (to != PAGE_SIZE) + break; page = pages[loop]; - if (page->index > wb->last) + if (page->index > final_page) break; if (!trylock_page(page)) break; - if (!PageDirty(page) || - page_private(page) != (unsigned long) wb) { + if (!PageDirty(page) || PageWriteback(page)) { unlock_page(page); break; } + + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + if (f != 0) { + unlock_page(page); + break; + } + to = t; + + trace_afs_page_dirty(vnode, tracepoint_string("store+"), + page->index, priv); + if (!clear_page_dirty_for_io(page)) BUG(); if (test_set_page_writeback(page)) @@ -406,50 +465,55 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, } start += loop; - } while (start <= wb->last && count < 65536); + } while (start <= final_page && count < 65536); no_more: - /* we now have a contiguous set of dirty pages, each with writeback set - * and the dirty mark cleared; the first page is locked and must remain - * so, all the rest are unlocked */ + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + unlock_page(primary_page); + first = primary_page->index; last = first + count - 1; - offset = (first == wb->first) ? wb->offset_first : 0; - to = (last == wb->last) ? wb->to_last : PAGE_SIZE; - _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); - ret = afs_vnode_store_data(wb, first, last, offset, to); - if (ret < 0) { - switch (ret) { - case -EDQUOT: - case -ENOSPC: - mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); - break; - case -EROFS: - case -EIO: - case -EREMOTEIO: - case -EFBIG: - case -ENOENT: - case -ENOMEDIUM: - case -ENXIO: - afs_kill_pages(wb->vnode, true, first, last); - mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); - break; - case -EACCES: - case -EPERM: - case -ENOKEY: - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EKEYREVOKED: - afs_kill_pages(wb->vnode, false, first, last); - break; - default: - break; - } - } else { + ret = afs_store_data(mapping, first, last, offset, to); + switch (ret) { + case 0: ret = count; + break; + + default: + pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); + /* Fall through */ + case -EACCES: + case -EPERM: + case -ENOKEY: + case -EKEYEXPIRED: + case -EKEYREJECTED: + case -EKEYREVOKED: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, ret); + break; + + case -EDQUOT: + case -ENOSPC: + afs_redirty_pages(wbc, mapping, first, last); + mapping_set_error(mapping, -ENOSPC); + break; + + case -EROFS: + case -EIO: + case -EREMOTEIO: + case -EFBIG: + case -ENOENT: + case -ENOMEDIUM: + case -ENXIO: + afs_kill_pages(mapping, first, last); + mapping_set_error(mapping, ret); + break; } _leave(" = %d", ret); @@ -462,16 +526,12 @@ no_more: */ int afs_writepage(struct page *page, struct writeback_control *wbc) { - struct afs_writeback *wb; int ret; _enter("{%lx},", page->index); - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - - ret = afs_write_back_from_locked_page(wb, page); - unlock_page(page); + ret = afs_write_back_from_locked_page(page->mapping, wbc, page, + wbc->range_end >> PAGE_SHIFT); if (ret < 0) { _leave(" = %d", ret); return 0; @@ -490,7 +550,6 @@ static int afs_writepages_region(struct address_space *mapping, struct writeback_control *wbc, pgoff_t index, pgoff_t end, pgoff_t *_next) { - struct afs_writeback *wb; struct page *page; int ret, n; @@ -509,7 +568,12 @@ static int afs_writepages_region(struct address_space *mapping, * (changing page->mapping to NULL), or even swizzled back from * swapper_space to tmpfs file mapping */ - lock_page(page); + ret = lock_page_killable(page); + if (ret < 0) { + put_page(page); + _leave(" = %d", ret); + return ret; + } if (page->mapping != mapping || !PageDirty(page)) { unlock_page(page); @@ -525,17 +589,9 @@ static int afs_writepages_region(struct address_space *mapping, continue; } - wb = (struct afs_writeback *) page_private(page); - ASSERT(wb != NULL); - - spin_lock(&wb->vnode->writeback_lock); - wb->state = AFS_WBACK_WRITING; - spin_unlock(&wb->vnode->writeback_lock); - if (!clear_page_dirty_for_io(page)) BUG(); - ret = afs_write_back_from_locked_page(wb, page); - unlock_page(page); + ret = afs_write_back_from_locked_page(mapping, wbc, page, end); put_page(page); if (ret < 0) { _leave(" = %d", ret); @@ -591,17 +647,14 @@ int afs_writepages(struct address_space *mapping, */ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) { - struct afs_writeback *wb = call->wb; struct pagevec pv; + unsigned long priv; unsigned count, loop; pgoff_t first = call->first, last = call->last; - bool free_wb; _enter("{%x:%u},{%lx-%lx}", vnode->fid.vid, vnode->fid.vnode, first, last); - ASSERT(wb != NULL); - pagevec_init(&pv); do { @@ -610,35 +663,22 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) count = last - first + 1; if (count > PAGEVEC_SIZE) count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(call->mapping, first, count, - pv.pages); + pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, + first, count, pv.pages); ASSERTCMP(pv.nr, ==, count); - spin_lock(&vnode->writeback_lock); for (loop = 0; loop < count; loop++) { - struct page *page = pv.pages[loop]; - end_page_writeback(page); - if (page_private(page) == (unsigned long) wb) { - set_page_private(page, 0); - ClearPagePrivate(page); - wb->usage--; - } - } - free_wb = false; - if (wb->usage == 0) { - afs_unlink_writeback(wb); - free_wb = true; + priv = page_private(pv.pages[loop]); + trace_afs_page_dirty(vnode, tracepoint_string("clear"), + pv.pages[loop]->index, priv); + set_page_private(pv.pages[loop], 0); + end_page_writeback(pv.pages[loop]); } - spin_unlock(&vnode->writeback_lock); first += count; - if (free_wb) { - afs_free_writeback(wb); - wb = NULL; - } - __pagevec_release(&pv); } while (first <= last); + afs_prune_wb_keys(vnode); _leave(""); } @@ -670,28 +710,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) } /* - * flush the vnode to the fileserver - */ -int afs_writeback_all(struct afs_vnode *vnode) -{ - struct address_space *mapping = vnode->vfs_inode.i_mapping; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_cyclic = 1, - }; - int ret; - - _enter(""); - - ret = mapping->a_ops->writepages(mapping, &wbc); - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - - _leave(" = %d", ret); - return ret; -} - -/* * flush any dirty pages for this process, and check for write errors. * - the return status from this call provides a reliable indication of * whether any write errors occurred for this process. @@ -699,61 +717,13 @@ int afs_writeback_all(struct afs_vnode *vnode) int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file_inode(file); - struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(inode); - int ret; _enter("{%x:%u},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - inode_lock(inode); - - /* use a writeback record as a marker in the queue - when this reaches - * the front of the queue, all the outstanding writes are either - * completed or rejected */ - wb = kzalloc(sizeof(*wb), GFP_KERNEL); - if (!wb) { - ret = -ENOMEM; - goto out; - } - wb->vnode = vnode; - wb->first = 0; - wb->last = -1; - wb->offset_first = 0; - wb->to_last = PAGE_SIZE; - wb->usage = 1; - wb->state = AFS_WBACK_SYNCING; - init_waitqueue_head(&wb->waitq); - - spin_lock(&vnode->writeback_lock); - list_for_each_entry(xwb, &vnode->writebacks, link) { - if (xwb->state == AFS_WBACK_PENDING) - xwb->state = AFS_WBACK_CONFLICTING; - } - list_add_tail(&wb->link, &vnode->writebacks); - spin_unlock(&vnode->writeback_lock); - - /* push all the outstanding writebacks to the server */ - ret = afs_writeback_all(vnode); - if (ret < 0) { - afs_put_writeback(wb); - _leave(" = %d [wb]", ret); - goto out; - } - - /* wait for the preceding writes to actually complete */ - ret = wait_event_interruptible(wb->waitq, - wb->state == AFS_WBACK_COMPLETE || - vnode->writebacks.next == &wb->link); - afs_put_writeback(wb); - _leave(" = %d", ret); -out: - inode_unlock(inode); - return ret; + return file_write_and_wait_range(file, start, end); } /* @@ -774,19 +744,114 @@ int afs_flush(struct file *file, fl_owner_t id) * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal */ -int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int afs_page_mkwrite(struct vm_fault *vmf) { - struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); + struct file *file = vmf->vma->vm_file; + struct inode *inode = file_inode(file); + struct afs_vnode *vnode = AFS_FS_I(inode); + unsigned long priv; _enter("{{%x:%u}},{%lx}", - vnode->fid.vid, vnode->fid.vnode, page->index); + vnode->fid.vid, vnode->fid.vnode, vmf->page->index); + + sb_start_pagefault(inode->i_sb); - /* wait for the page to be written to the cache before we allow it to - * be modified */ + /* Wait for the page to be written to the cache before we allow it to + * be modified. We then assume the entire page will need writing back. + */ #ifdef CONFIG_AFS_FSCACHE - fscache_wait_on_page_write(vnode->cache, page); + fscache_wait_on_page_write(vnode->cache, vmf->page); #endif - _leave(" = 0"); - return 0; + if (PageWriteback(vmf->page) && + wait_on_page_bit_killable(vmf->page, PG_writeback) < 0) + return VM_FAULT_RETRY; + + if (lock_page_killable(vmf->page) < 0) + return VM_FAULT_RETRY; + + /* We mustn't change page->private until writeback is complete as that + * details the portion of the page we need to write back and we might + * need to redirty the page if there's a problem. + */ + wait_on_page_writeback(vmf->page); + + priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */ + priv |= 0; /* From */ + trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), + vmf->page->index, priv); + SetPagePrivate(vmf->page); + set_page_private(vmf->page, priv); + + sb_end_pagefault(inode->i_sb); + return VM_FAULT_LOCKED; +} + +/* + * Prune the keys cached for writeback. The caller must hold vnode->wb_lock. + */ +void afs_prune_wb_keys(struct afs_vnode *vnode) +{ + LIST_HEAD(graveyard); + struct afs_wb_key *wbk, *tmp; + + /* Discard unused keys */ + spin_lock(&vnode->wb_lock); + + if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) && + !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) { + list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) { + if (refcount_read(&wbk->usage) == 1) + list_move(&wbk->vnode_link, &graveyard); + } + } + + spin_unlock(&vnode->wb_lock); + + while (!list_empty(&graveyard)) { + wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link); + list_del(&wbk->vnode_link); + afs_put_wb_key(wbk); + } +} + +/* + * Clean up a page during invalidation. + */ +int afs_launder_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + unsigned long priv; + unsigned int f, t; + int ret = 0; + + _enter("{%lx}", page->index); + + priv = page_private(page); + if (clear_page_dirty_for_io(page)) { + f = 0; + t = PAGE_SIZE; + if (PagePrivate(page)) { + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + } + + trace_afs_page_dirty(vnode, tracepoint_string("launder"), + page->index, priv); + ret = afs_store_data(mapping, page->index, page->index, t, f); + } + + trace_afs_page_dirty(vnode, tracepoint_string("laundered"), + page->index, priv); + set_page_private(page, 0); + ClearPagePrivate(page); + +#ifdef CONFIG_AFS_FSCACHE + if (PageFsCache(page)) { + fscache_wait_on_page_write(vnode->cache, page); + fscache_uncache_page(vnode->cache, page); + } +#endif + return ret; } diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 2830e4f48d85..cfcc674e64a5 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -45,7 +45,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler, struct afs_cell *cell = vnode->volume->cell; size_t namelen; - namelen = strlen(cell->name); + namelen = cell->name_len; if (size == 0) return namelen; if (namelen > size) @@ -96,7 +96,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler, void *buffer, size_t size) { struct afs_vnode *vnode = AFS_FS_I(inode); - const char *volname = vnode->volume->vlocation->vldb.name; + const char *volname = vnode->volume->name; size_t namelen; namelen = strlen(volname); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 673ac4e01dd0..7208ecef7088 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3992,16 +3992,9 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) btrfs_put_block_group(bg); } -static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) { - wait_on_atomic_t(&bg->nocow_writers, - btrfs_wait_nocow_writers_atomic_t, + wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait, TASK_UNINTERRUPTIBLE); } @@ -6530,12 +6523,6 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, btrfs_put_block_group(bg); } -static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) { struct btrfs_space_info *space_info = bg->space_info; @@ -6558,8 +6545,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) down_write(&space_info->groups_sem); up_write(&space_info->groups_sem); - wait_on_atomic_t(&bg->reservations, - btrfs_wait_bg_reservations_atomic_t, + wait_on_atomic_t(&bg->reservations, atomic_t_wait, TASK_UNINTERRUPTIBLE); } @@ -11059,12 +11045,6 @@ int btrfs_start_write_no_snapshotting(struct btrfs_root *root) return 1; } -static int wait_snapshotting_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) { while (true) { @@ -11073,8 +11053,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) ret = btrfs_start_write_no_snapshotting(root); if (ret) break; - wait_on_atomic_t(&root->will_be_snapshotted, - wait_snapshotting_atomic_t, + wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait, TASK_UNINTERRUPTIBLE); } } diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 40d61077bead..ff84258132bb 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -558,7 +558,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) * have completed. */ if (!atomic_dec_and_test(&cookie->n_active)) - wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, + wait_on_atomic_t(&cookie->n_active, atomic_t_wait, TASK_UNINTERRUPTIBLE); /* Make sure any pending writes are cancelled. */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 97ec45110957..0ff4b49a0037 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void) return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); } -extern int fscache_wait_atomic_t(atomic_t *); - /* * object.c */ diff --git a/fs/fscache/main.c b/fs/fscache/main.c index b39d487ccfb0..249968dcbf5c 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -195,12 +195,3 @@ static void __exit fscache_exit(void) } module_exit(fscache_exit); - -/* - * wait_on_atomic_t() sleep function for uninterruptible waiting - */ -int fscache_wait_atomic_t(atomic_t *p) -{ - schedule(); - return 0; -} diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 134d9f560240..1629056aa2c9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -85,9 +85,9 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); -int nfs_wait_atomic_killable(atomic_t *p) +int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode) { - return nfs_wait_killable(TASK_KILLABLE); + return nfs_wait_killable(mode); } /** diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f9a4a5524bd5..5ab17fd4700a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -388,7 +388,7 @@ extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); -extern int nfs_wait_atomic_killable(atomic_t *p); +extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index 2cabbcf2f28e..e87279e49ba3 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c @@ -129,19 +129,13 @@ static struct kobj_attribute ocfs2_attr_filecheck_set = ocfs2_filecheck_show, ocfs2_filecheck_store); -static int ocfs2_filecheck_sysfs_wait(atomic_t *p) -{ - schedule(); - return 0; -} - static void ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry) { struct ocfs2_filecheck_entry *p; if (!atomic_dec_and_test(&entry->fs_count)) - wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait, + wait_on_atomic_t(&entry->fs_count, atomic_t_wait, TASK_UNINTERRUPTIBLE); spin_lock(&entry->fs_fcheck->fc_lock); |