summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/Makefile5
-rw-r--r--fs/afs/addr_list.c381
-rw-r--r--fs/afs/afs.h35
-rw-r--r--fs/afs/afs_fs.h6
-rw-r--r--fs/afs/afs_vl.h73
-rw-r--r--fs/afs/cache.c239
-rw-r--r--fs/afs/callback.c504
-rw-r--r--fs/afs/cell.c887
-rw-r--r--fs/afs/cmservice.c77
-rw-r--r--fs/afs/dir.c461
-rw-r--r--fs/afs/file.c194
-rw-r--r--fs/afs/flock.c159
-rw-r--r--fs/afs/fsclient.c830
-rw-r--r--fs/afs/inode.c177
-rw-r--r--fs/afs/internal.h905
-rw-r--r--fs/afs/main.c145
-rw-r--r--fs/afs/misc.c38
-rw-r--r--fs/afs/proc.c247
-rw-r--r--fs/afs/rotate.c715
-rw-r--r--fs/afs/rxrpc.c335
-rw-r--r--fs/afs/security.c378
-rw-r--r--fs/afs/server.c782
-rw-r--r--fs/afs/server_list.c153
-rw-r--r--fs/afs/super.c190
-rw-r--r--fs/afs/vlclient.c682
-rw-r--r--fs/afs/vlocation.c720
-rw-r--r--fs/afs/vnode.c1025
-rw-r--r--fs/afs/volume.c611
-rw-r--r--fs/afs/write.c709
-rw-r--r--fs/afs/xattr.c4
-rw-r--r--fs/btrfs/extent-tree.c27
-rw-r--r--fs/fscache/cookie.c2
-rw-r--r--fs/fscache/internal.h2
-rw-r--r--fs/fscache/main.c9
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/ocfs2/filecheck.c8
37 files changed, 6444 insertions, 5277 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 641148208e90..45b7fc405fa6 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -7,6 +7,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
kafs-objs := \
$(afs-cache-y) \
+ addr_list.o \
callback.o \
cell.o \
cmservice.o \
@@ -19,14 +20,14 @@ kafs-objs := \
misc.o \
mntpt.o \
proc.o \
+ rotate.o \
rxrpc.o \
security.o \
server.o \
+ server_list.o \
super.o \
netdevices.o \
vlclient.o \
- vlocation.o \
- vnode.o \
volume.o \
write.o \
xattr.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
new file mode 100644
index 000000000000..a537368ba0db
--- /dev/null
+++ b/fs/afs/addr_list.c
@@ -0,0 +1,381 @@
+/* Server address list management
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/dns_resolver.h>
+#include <linux/inet.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+//#define AFS_MAX_ADDRESSES
+// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
+// sizeof(struct sockaddr_rxrpc)))
+#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
+
+/*
+ * Release an address list.
+ */
+void afs_put_addrlist(struct afs_addr_list *alist)
+{
+ if (alist && refcount_dec_and_test(&alist->usage))
+ call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+}
+
+/*
+ * Allocate an address list.
+ */
+struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
+ unsigned short service,
+ unsigned short port)
+{
+ struct afs_addr_list *alist;
+ unsigned int i;
+
+ _enter("%u,%u,%u", nr, service, port);
+
+ alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
+ GFP_KERNEL);
+ if (!alist)
+ return NULL;
+
+ refcount_set(&alist->usage, 1);
+
+ for (i = 0; i < nr; i++) {
+ struct sockaddr_rxrpc *srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->srx_service = service;
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = htons(port);
+ }
+
+ return alist;
+}
+
+/*
+ * Parse a text string consisting of delimited addresses.
+ */
+struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
+ char delim,
+ unsigned short service,
+ unsigned short port)
+{
+ struct afs_addr_list *alist;
+ const char *p, *end = text + len;
+ unsigned int nr = 0;
+
+ _enter("%*.*s,%c", (int)len, (int)len, text, delim);
+
+ if (!len)
+ return ERR_PTR(-EDESTADDRREQ);
+
+ if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
+ delim = ',';
+
+ /* Count the addresses */
+ p = text;
+ do {
+ if (!*p)
+ return ERR_PTR(-EINVAL);
+ if (*p == delim)
+ continue;
+ nr++;
+ if (*p == '[') {
+ p++;
+ if (p == end)
+ return ERR_PTR(-EINVAL);
+ p = memchr(p, ']', end - p);
+ if (!p)
+ return ERR_PTR(-EINVAL);
+ p++;
+ if (p >= end)
+ break;
+ }
+
+ p = memchr(p, delim, end - p);
+ if (!p)
+ break;
+ p++;
+ } while (p < end);
+
+ _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
+ if (nr > AFS_MAX_ADDRESSES)
+ nr = AFS_MAX_ADDRESSES;
+
+ alist = afs_alloc_addrlist(nr, service, port);
+ if (!alist)
+ return ERR_PTR(-ENOMEM);
+
+ /* Extract the addresses */
+ p = text;
+ do {
+ struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
+ char tdelim = delim;
+
+ if (*p == delim) {
+ p++;
+ continue;
+ }
+
+ if (*p == '[') {
+ p++;
+ tdelim = ']';
+ }
+
+ if (in4_pton(p, end - p,
+ (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
+ tdelim, &p)) {
+ srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
+ srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
+ srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+ } else if (in6_pton(p, end - p,
+ srx->transport.sin6.sin6_addr.s6_addr,
+ tdelim, &p)) {
+ /* Nothing to do */
+ } else {
+ goto bad_address;
+ }
+
+ if (tdelim == ']') {
+ if (p == end || *p != ']')
+ goto bad_address;
+ p++;
+ }
+
+ if (p < end) {
+ if (*p == '+') {
+ /* Port number specification "+1234" */
+ unsigned int xport = 0;
+ p++;
+ if (p >= end || !isdigit(*p))
+ goto bad_address;
+ do {
+ xport *= 10;
+ xport += *p - '0';
+ if (xport > 65535)
+ goto bad_address;
+ p++;
+ } while (p < end && isdigit(*p));
+ srx->transport.sin6.sin6_port = htons(xport);
+ } else if (*p == delim) {
+ p++;
+ } else {
+ goto bad_address;
+ }
+ }
+
+ alist->nr_addrs++;
+ } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
+
+ _leave(" = [nr %u]", alist->nr_addrs);
+ return alist;
+
+bad_address:
+ kfree(alist);
+ return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Compare old and new address lists to see if there's been any change.
+ * - How to do this in better than O(Nlog(N)) time?
+ * - We don't really want to sort the address list, but would rather take the
+ * list as we got it so as not to undo record rotation by the DNS server.
+ */
+#if 0
+static int afs_cmp_addr_list(const struct afs_addr_list *a1,
+ const struct afs_addr_list *a2)
+{
+}
+#endif
+
+/*
+ * Perform a DNS query for VL servers and build a up an address list.
+ */
+struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+{
+ struct afs_addr_list *alist;
+ char *vllist = NULL;
+ int ret;
+
+ _enter("%s", cell->name);
+
+ ret = dns_query("afsdb", cell->name, cell->name_len,
+ "ipv4", &vllist, _expiry);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
+ VL_SERVICE, AFS_VL_PORT);
+ if (IS_ERR(alist)) {
+ kfree(vllist);
+ if (alist != ERR_PTR(-ENOMEM))
+ pr_err("Failed to parse DNS data\n");
+ return alist;
+ }
+
+ kfree(vllist);
+ return alist;
+}
+
+/*
+ * Merge an IPv4 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
+{
+ struct sockaddr_in6 *a;
+ __be16 xport = htons(port);
+ int i;
+
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ a = &alist->addrs[i].transport.sin6;
+ if (xdr == a->sin6_addr.s6_addr32[3] &&
+ xport == a->sin6_port)
+ return;
+ if (xdr == a->sin6_addr.s6_addr32[3] &&
+ xport < a->sin6_port)
+ break;
+ if (xdr < a->sin6_addr.s6_addr32[3])
+ break;
+ }
+
+ if (i < alist->nr_addrs)
+ memmove(alist->addrs + i + 1,
+ alist->addrs + i,
+ sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+ a = &alist->addrs[i].transport.sin6;
+ a->sin6_port = xport;
+ a->sin6_addr.s6_addr32[0] = 0;
+ a->sin6_addr.s6_addr32[1] = 0;
+ a->sin6_addr.s6_addr32[2] = htonl(0xffff);
+ a->sin6_addr.s6_addr32[3] = xdr;
+ alist->nr_ipv4++;
+ alist->nr_addrs++;
+}
+
+/*
+ * Merge an IPv6 entry into a fileserver address list.
+ */
+void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
+{
+ struct sockaddr_in6 *a;
+ __be16 xport = htons(port);
+ int i, diff;
+
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ a = &alist->addrs[i].transport.sin6;
+ diff = memcmp(xdr, &a->sin6_addr, 16);
+ if (diff == 0 &&
+ xport == a->sin6_port)
+ return;
+ if (diff == 0 &&
+ xport < a->sin6_port)
+ break;
+ if (diff < 0)
+ break;
+ }
+
+ if (i < alist->nr_addrs)
+ memmove(alist->addrs + i + 1,
+ alist->addrs + i,
+ sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
+
+ a = &alist->addrs[i].transport.sin6;
+ a->sin6_port = xport;
+ a->sin6_addr.s6_addr32[0] = xdr[0];
+ a->sin6_addr.s6_addr32[1] = xdr[1];
+ a->sin6_addr.s6_addr32[2] = xdr[2];
+ a->sin6_addr.s6_addr32[3] = xdr[3];
+ alist->nr_addrs++;
+}
+
+/*
+ * Get an address to try.
+ */
+bool afs_iterate_addresses(struct afs_addr_cursor *ac)
+{
+ _enter("%hu+%hd", ac->start, (short)ac->index);
+
+ if (!ac->alist)
+ return false;
+
+ if (ac->begun) {
+ ac->index++;
+ if (ac->index == ac->alist->nr_addrs)
+ ac->index = 0;
+
+ if (ac->index == ac->start) {
+ ac->error = -EDESTADDRREQ;
+ return false;
+ }
+ }
+
+ ac->begun = true;
+ ac->responded = false;
+ ac->addr = &ac->alist->addrs[ac->index];
+ return true;
+}
+
+/*
+ * Release an address list cursor.
+ */
+int afs_end_cursor(struct afs_addr_cursor *ac)
+{
+ if (ac->responded && ac->index != ac->start)
+ WRITE_ONCE(ac->alist->index, ac->index);
+
+ afs_put_addrlist(ac->alist);
+ ac->alist = NULL;
+ return ac->error;
+}
+
+/*
+ * Set the address cursor for iterating over VL servers.
+ */
+int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
+{
+ struct afs_addr_list *alist;
+ int ret;
+
+ if (!rcu_access_pointer(cell->vl_addrs)) {
+ ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+ TASK_INTERRUPTIBLE);
+ if (ret < 0)
+ return ret;
+
+ if (!rcu_access_pointer(cell->vl_addrs) &&
+ ktime_get_real_seconds() < cell->dns_expiry)
+ return cell->error;
+ }
+
+ read_lock(&cell->vl_addrs_lock);
+ alist = rcu_dereference_protected(cell->vl_addrs,
+ lockdep_is_held(&cell->vl_addrs_lock));
+ if (alist->nr_addrs > 0)
+ afs_get_addrlist(alist);
+ else
+ alist = NULL;
+ read_unlock(&cell->vl_addrs_lock);
+
+ if (!alist)
+ return -EDESTADDRREQ;
+
+ ac->alist = alist;
+ ac->addr = NULL;
+ ac->start = READ_ONCE(alist->index);
+ ac->index = ac->start;
+ ac->error = 0;
+ ac->begun = false;
+ return 0;
+}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 3c462ff6db63..b94d0edc2b78 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -14,11 +14,14 @@
#include <linux/in.h>
-#define AFS_MAXCELLNAME 64 /* maximum length of a cell name */
-#define AFS_MAXVOLNAME 64 /* maximum length of a volume name */
-#define AFSNAMEMAX 256 /* maximum length of a filename plus NUL */
-#define AFSPATHMAX 1024 /* maximum length of a pathname plus NUL */
-#define AFSOPAQUEMAX 1024 /* maximum length of an opaque field */
+#define AFS_MAXCELLNAME 64 /* Maximum length of a cell name */
+#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */
+#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */
+#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */
+#define AFS_MAXTYPES 3 /* Maximum number of volume types */
+#define AFSNAMEMAX 256 /* Maximum length of a filename plus NUL */
+#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
+#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
typedef unsigned afs_volid_t;
typedef unsigned afs_vnodeid_t;
@@ -72,6 +75,15 @@ struct afs_callback {
#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */
+struct afs_uuid {
+ __be32 time_low; /* low part of timestamp */
+ __be16 time_mid; /* mid part of timestamp */
+ __be16 time_hi_and_version; /* high part of timestamp and version */
+ __s8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
+ __s8 clock_seq_low; /* clock seq low */
+ __s8 node[6]; /* spatially unique node ID (MAC addr) */
+};
+
/*
* AFS volume information
*/
@@ -124,7 +136,6 @@ struct afs_file_status {
afs_access_t caller_access; /* access rights for authenticated caller */
afs_access_t anon_access; /* access rights for unauthenticated caller */
umode_t mode; /* UNIX mode */
- struct afs_fid parent; /* parent dir ID for non-dirs only */
time_t mtime_client; /* last time client changed data */
time_t mtime_server; /* last time server changed data */
s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
@@ -167,4 +178,16 @@ struct afs_volume_status {
#define AFS_BLOCK_SIZE 1024
+/*
+ * XDR encoding of UUID in AFS.
+ */
+struct afs_uuid__xdr {
+ __be32 time_low;
+ __be32 time_mid;
+ __be32 time_hi_and_version;
+ __be32 clock_seq_hi_and_reserved;
+ __be32 clock_seq_low;
+ __be32 node[6];
+};
+
#endif /* AFS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index eb647323d8f0..d47b6d01e4c0 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -37,9 +37,12 @@ enum AFS_FS_Operations {
FSLOOKUP = 161, /* AFS lookup file in directory */
FSFETCHDATA64 = 65537, /* AFS Fetch file data */
FSSTOREDATA64 = 65538, /* AFS Store file data */
+ FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */
+ FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */
};
enum AFS_FS_Errors {
+ VRESTARTING = -100, /* Server is restarting */
VSALVAGE = 101, /* volume needs salvaging */
VNOVNODE = 102, /* no such file/dir (vnode) */
VNOVOL = 103, /* no such volume or volume unavailable */
@@ -51,6 +54,9 @@ enum AFS_FS_Errors {
VOVERQUOTA = 109, /* volume's maximum quota exceeded */
VBUSY = 110, /* volume is temporarily unavailable */
VMOVED = 111, /* volume moved to new server - ask this FS where */
+ VIO = 112, /* I/O error in volume */
+ VSALVAGING = 113, /* Volume is being salvaged */
+ VRESTRICTED = 120, /* Volume is restricted from using */
};
#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 800f607ffaf5..e3c4688f573b 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -16,11 +16,17 @@
#define AFS_VL_PORT 7003 /* volume location service port */
#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
+#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
enum AFSVL_Operations {
- VLGETENTRYBYID = 503, /* AFS Get Cache Entry By ID operation ID */
- VLGETENTRYBYNAME = 504, /* AFS Get Cache Entry By Name operation ID */
- VLPROBE = 514, /* AFS Probe Volume Location Service operation ID */
+ VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
+ VLGETENTRYBYNAME = 504, /* AFS Get VLDB entry by name */
+ VLPROBE = 514, /* AFS probe VL service */
+ VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */
+ VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */
+ VLGETADDRSU = 533, /* AFS Get addrs for fileserver */
+ YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */
+ VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */
};
enum AFSVL_Errors {
@@ -54,6 +60,19 @@ enum AFSVL_Errors {
AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
};
+enum {
+ YFS_SERVER_INDEX = 0,
+ YFS_SERVER_UUID = 1,
+ YFS_SERVER_ENDPOINT = 2,
+};
+
+enum {
+ YFS_ENDPOINT_IPV4 = 0,
+ YFS_ENDPOINT_IPV6 = 1,
+};
+
+#define YFS_MAXENDPOINTS 16
+
/*
* maps to "struct vldbentry" in vvl-spec.pdf
*/
@@ -74,11 +93,57 @@ struct afs_vldbentry {
struct in_addr addr; /* server address */
unsigned partition; /* partition ID on this server */
unsigned flags; /* server specific flags */
-#define AFS_VLSF_NEWREPSITE 0x0001 /* unused */
+#define AFS_VLSF_NEWREPSITE 0x0001 /* Ignore all 'non-new' servers */
#define AFS_VLSF_ROVOL 0x0002 /* this server holds a R/O instance of the volume */
#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */
#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */
+#define AFS_VLSF_UUID 0x0010 /* This server is referred to by its UUID */
+#define AFS_VLSF_DONTUSE 0x0020 /* This server ref should be ignored */
} servers[8];
};
+#define AFS_VLDB_MAXNAMELEN 65
+
+
+struct afs_ListAddrByAttributes__xdr {
+ __be32 Mask;
+#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */
+#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */
+#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */
+ __be32 ipaddr;
+ __be32 index;
+ __be32 spare;
+ struct afs_uuid__xdr uuid;
+};
+
+struct afs_uvldbentry__xdr {
+ __be32 name[AFS_VLDB_MAXNAMELEN];
+ __be32 nServers;
+ struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS];
+ __be32 serverUnique[AFS_NMAXNSERVERS];
+ __be32 serverPartition[AFS_NMAXNSERVERS];
+ __be32 serverFlags[AFS_NMAXNSERVERS];
+ __be32 volumeId[AFS_MAXTYPES];
+ __be32 cloneId;
+ __be32 flags;
+ __be32 spares1;
+ __be32 spares2;
+ __be32 spares3;
+ __be32 spares4;
+ __be32 spares5;
+ __be32 spares6;
+ __be32 spares7;
+ __be32 spares8;
+ __be32 spares9;
+};
+
+struct afs_address_list {
+ refcount_t usage;
+ unsigned int version;
+ unsigned int nr_addrs;
+ struct sockaddr_rxrpc addrs[];
+};
+
+extern void afs_put_address_list(struct afs_address_list *alist);
+
#endif /* AFS_VL_H */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 1fe855191261..f62ff71d28c9 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -14,19 +14,6 @@
static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t buflen);
-static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
- const void *buffer,
- uint16_t buflen);
-
-static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
-static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t buflen);
-static enum fscache_checkaux afs_vlocation_cache_check_aux(
- void *cookie_netfs_data, const void *buffer, uint16_t buflen);
-
static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t buflen);
@@ -42,23 +29,13 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
struct fscache_netfs afs_cache_netfs = {
.name = "afs",
- .version = 0,
+ .version = 1,
};
struct fscache_cookie_def afs_cell_cache_index_def = {
.name = "AFS.cell",
.type = FSCACHE_COOKIE_TYPE_INDEX,
.get_key = afs_cell_cache_get_key,
- .get_aux = afs_cell_cache_get_aux,
- .check_aux = afs_cell_cache_check_aux,
-};
-
-struct fscache_cookie_def afs_vlocation_cache_index_def = {
- .name = "AFS.vldb",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = afs_vlocation_cache_get_key,
- .get_aux = afs_vlocation_cache_get_aux,
- .check_aux = afs_vlocation_cache_check_aux,
};
struct fscache_cookie_def afs_volume_cache_index_def = {
@@ -95,150 +72,26 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
return klen;
}
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_cell *cell = cookie_netfs_data;
- uint16_t dlen;
-
- _enter("%p,%p,%u", cell, buffer, bufmax);
-
- dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
- dlen = min(dlen, bufmax);
- dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
-
- memcpy(buffer, cell->vl_addrs, dlen);
- return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
- const void *buffer,
- uint16_t buflen)
-{
- _leave(" = OKAY");
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*****************************************************************************/
-/*
- * set the key for the index entry
- */
-static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_vlocation *vlocation = cookie_netfs_data;
- uint16_t klen;
-
- _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
-
- klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
- if (klen > bufmax)
- return 0;
-
- memcpy(buffer, vlocation->vldb.name, klen);
-
- _leave(" = %u", klen);
- return klen;
-}
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
-{
- const struct afs_vlocation *vlocation = cookie_netfs_data;
- uint16_t dlen;
-
- _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
-
- dlen = sizeof(struct afs_cache_vlocation);
- dlen -= offsetof(struct afs_cache_vlocation, nservers);
- if (dlen > bufmax)
- return 0;
-
- memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
-
- _leave(" = %u", dlen);
- return dlen;
-}
-
-/*
- * check that the auxiliary data indicates that the entry is still valid
- */
-static
-enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
- const void *buffer,
- uint16_t buflen)
-{
- const struct afs_cache_vlocation *cvldb;
- struct afs_vlocation *vlocation = cookie_netfs_data;
- uint16_t dlen;
-
- _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
-
- /* check the size of the data is what we're expecting */
- dlen = sizeof(struct afs_cache_vlocation);
- dlen -= offsetof(struct afs_cache_vlocation, nservers);
- if (dlen != buflen)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
-
- /* if what's on disk is more valid than what's in memory, then use the
- * VL record from the cache */
- if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
- memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
- vlocation->valid = 1;
- _leave(" = SUCCESS [c->m]");
- return FSCACHE_CHECKAUX_OKAY;
- }
-
- /* need to update the cache if the cached info differs */
- if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
- /* delete if the volume IDs for this name differ */
- if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
- sizeof(cvldb->vid)) != 0
- ) {
- _leave(" = OBSOLETE");
- return FSCACHE_CHECKAUX_OBSOLETE;
- }
-
- _leave(" = UPDATE");
- return FSCACHE_CHECKAUX_NEEDS_UPDATE;
- }
-
- _leave(" = OKAY");
- return FSCACHE_CHECKAUX_OKAY;
-}
-
/*****************************************************************************/
/*
* set the key for the volume index entry
*/
static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
- void *buffer, uint16_t bufmax)
+ void *buffer, uint16_t bufmax)
{
const struct afs_volume *volume = cookie_netfs_data;
- uint16_t klen;
+ struct {
+ u64 volid;
+ } __packed key;
_enter("{%u},%p,%u", volume->type, buffer, bufmax);
- klen = sizeof(volume->type);
- if (klen > bufmax)
+ if (bufmax < sizeof(key))
return 0;
- memcpy(buffer, &volume->type, sizeof(volume->type));
-
- _leave(" = %u", klen);
- return klen;
-
+ key.volid = volume->vid;
+ memcpy(buffer, &key, sizeof(key));
+ return sizeof(key);
}
/*****************************************************************************/
@@ -249,20 +102,25 @@ static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
const struct afs_vnode *vnode = cookie_netfs_data;
- uint16_t klen;
+ struct {
+ u32 vnode_id[3];
+ } __packed key;
_enter("{%x,%x,%llx},%p,%u",
vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
buffer, bufmax);
- klen = sizeof(vnode->fid.vnode);
- if (klen > bufmax)
- return 0;
+ /* Allow for a 96-bit key */
+ memset(&key, 0, sizeof(key));
+ key.vnode_id[0] = vnode->fid.vnode;
+ key.vnode_id[1] = 0;
+ key.vnode_id[2] = 0;
- memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
+ if (sizeof(key) > bufmax)
+ return 0;
- _leave(" = %u", klen);
- return klen;
+ memcpy(buffer, &key, sizeof(key));
+ return sizeof(key);
}
/*
@@ -280,6 +138,11 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
*size = vnode->status.size;
}
+struct afs_vnode_cache_aux {
+ u64 data_version;
+ u32 fid_unique;
+} __packed;
+
/*
* provide new auxiliary cache data
*/
@@ -287,23 +150,21 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
const struct afs_vnode *vnode = cookie_netfs_data;
- uint16_t dlen;
+ struct afs_vnode_cache_aux aux;
_enter("{%x,%x,%Lx},%p,%u",
vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
buffer, bufmax);
- dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
- if (dlen > bufmax)
- return 0;
+ memset(&aux, 0, sizeof(aux));
+ aux.data_version = vnode->status.data_version;
+ aux.fid_unique = vnode->fid.unique;
- memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
- buffer += sizeof(vnode->fid.unique);
- memcpy(buffer, &vnode->status.data_version,
- sizeof(vnode->status.data_version));
+ if (bufmax < sizeof(aux))
+ return 0;
- _leave(" = %u", dlen);
- return dlen;
+ memcpy(buffer, &aux, sizeof(aux));
+ return sizeof(aux);
}
/*
@@ -314,43 +175,29 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
uint16_t buflen)
{
struct afs_vnode *vnode = cookie_netfs_data;
- uint16_t dlen;
+ struct afs_vnode_cache_aux aux;
_enter("{%x,%x,%llx},%p,%u",
vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
buffer, buflen);
+ memcpy(&aux, buffer, sizeof(aux));
+
/* check the size of the data is what we're expecting */
- dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
- if (dlen != buflen) {
- _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
+ if (buflen != sizeof(aux)) {
+ _leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux));
return FSCACHE_CHECKAUX_OBSOLETE;
}
- if (memcmp(buffer,
- &vnode->fid.unique,
- sizeof(vnode->fid.unique)
- ) != 0) {
- unsigned unique;
-
- memcpy(&unique, buffer, sizeof(unique));
-
+ if (vnode->fid.unique != aux.fid_unique) {
_leave(" = OBSOLETE [uniq %x != %x]",
- unique, vnode->fid.unique);
+ aux.fid_unique, vnode->fid.unique);
return FSCACHE_CHECKAUX_OBSOLETE;
}
- if (memcmp(buffer + sizeof(vnode->fid.unique),
- &vnode->status.data_version,
- sizeof(vnode->status.data_version)
- ) != 0) {
- afs_dataversion_t version;
-
- memcpy(&version, buffer + sizeof(vnode->fid.unique),
- sizeof(version));
-
+ if (vnode->status.data_version != aux.data_version) {
_leave(" = OBSOLETE [vers %llx != %llx]",
- version, vnode->status.data_version);
+ aux.data_version, vnode->status.data_version);
return FSCACHE_CHECKAUX_OBSOLETE;
}
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 25d404d22cae..f4291b576054 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -20,118 +20,151 @@
#include <linux/sched.h>
#include "internal.h"
-#if 0
-unsigned afs_vnode_update_timeout = 10;
-#endif /* 0 */
-
-#define afs_breakring_space(server) \
- CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail, \
- ARRAY_SIZE((server)->cb_break))
-
-//static void afs_callback_updater(struct work_struct *);
-
-static struct workqueue_struct *afs_callback_update_worker;
-
/*
- * allow the fileserver to request callback state (re-)initialisation
+ * Set up an interest-in-callbacks record for a volume on a server and
+ * register it with the server.
+ * - Called with volume->server_sem held.
*/
-void afs_init_callback_state(struct afs_server *server)
+int afs_register_server_cb_interest(struct afs_vnode *vnode,
+ struct afs_server_entry *entry)
{
- struct afs_vnode *vnode;
-
- _enter("{%p}", server);
+ struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x;
+ struct afs_server *server = entry->server;
+
+again:
+ vcbi = vnode->cb_interest;
+ if (vcbi) {
+ if (vcbi == cbi)
+ return 0;
+
+ if (cbi && vcbi->server == cbi->server) {
+ write_seqlock(&vnode->cb_lock);
+ vnode->cb_interest = afs_get_cb_interest(cbi);
+ write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
+ return 0;
+ }
- spin_lock(&server->cb_lock);
+ if (!cbi && vcbi->server == server) {
+ afs_get_cb_interest(vcbi);
+ x = cmpxchg(&entry->cb_interest, cbi, vcbi);
+ if (x != cbi) {
+ cbi = x;
+ afs_put_cb_interest(afs_v2net(vnode), vcbi);
+ goto again;
+ }
+ return 0;
+ }
+ }
- /* kill all the promises on record from this server */
- while (!RB_EMPTY_ROOT(&server->cb_promises)) {
- vnode = rb_entry(server->cb_promises.rb_node,
- struct afs_vnode, cb_promise);
- _debug("UNPROMISE { vid=%x:%u uq=%u}",
- vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
- rb_erase(&vnode->cb_promise, &server->cb_promises);
- vnode->cb_promised = false;
+ if (!cbi) {
+ new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ refcount_set(&new->usage, 1);
+ new->sb = vnode->vfs_inode.i_sb;
+ new->vid = vnode->volume->vid;
+ new->server = afs_get_server(server);
+ INIT_LIST_HEAD(&new->cb_link);
+
+ write_lock(&server->cb_break_lock);
+ list_add_tail(&new->cb_link, &server->cb_interests);
+ write_unlock(&server->cb_break_lock);
+
+ x = cmpxchg(&entry->cb_interest, cbi, new);
+ if (x == cbi) {
+ cbi = new;
+ } else {
+ cbi = x;
+ afs_put_cb_interest(afs_v2net(vnode), new);
+ }
}
- spin_unlock(&server->cb_lock);
- _leave("");
+ ASSERT(cbi);
+
+ /* Change the server the vnode is using. This entails scrubbing any
+ * interest the vnode had in the previous server it was using.
+ */
+ write_seqlock(&vnode->cb_lock);
+
+ vnode->cb_interest = afs_get_cb_interest(cbi);
+ vnode->cb_s_break = cbi->server->cb_s_break;
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+ write_sequnlock(&vnode->cb_lock);
+ return 0;
}
/*
- * handle the data invalidation side of a callback being broken
+ * Set a vnode's interest on a server.
*/
-void afs_broken_callback_work(struct work_struct *work)
+void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi)
{
- struct afs_vnode *vnode =
- container_of(work, struct afs_vnode, cb_broken_work);
+ struct afs_cb_interest *old_cbi = NULL;
- _enter("");
-
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ if (vnode->cb_interest == cbi)
return;
- /* we're only interested in dealing with a broken callback on *this*
- * vnode and only if no-one else has dealt with it yet */
- if (!mutex_trylock(&vnode->validate_lock))
- return; /* someone else is dealing with it */
-
- if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
- if (S_ISDIR(vnode->vfs_inode.i_mode))
- afs_clear_permits(vnode);
-
- if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
- goto out;
-
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
- goto out;
-
- /* if the vnode's data version number changed then its contents
- * are different */
- if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
- afs_zap_data(vnode);
+ write_seqlock(&vnode->cb_lock);
+ if (vnode->cb_interest != cbi) {
+ afs_get_cb_interest(cbi);
+ old_cbi = vnode->cb_interest;
+ vnode->cb_interest = cbi;
}
+ write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
+}
-out:
- mutex_unlock(&vnode->validate_lock);
-
- /* avoid the potential race whereby the mutex_trylock() in this
- * function happens again between the clear_bit() and the
- * mutex_unlock() */
- if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
- _debug("requeue");
- queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+/*
+ * Remove an interest on a server.
+ */
+void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
+{
+ if (cbi && refcount_dec_and_test(&cbi->usage)) {
+ if (!list_empty(&cbi->cb_link)) {
+ write_lock(&cbi->server->cb_break_lock);
+ list_del_init(&cbi->cb_link);
+ write_unlock(&cbi->server->cb_break_lock);
+ afs_put_server(net, cbi->server);
+ }
+ kfree(cbi);
}
- _leave("");
+}
+
+/*
+ * allow the fileserver to request callback state (re-)initialisation
+ */
+void afs_init_callback_state(struct afs_server *server)
+{
+ if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
+ server->cb_s_break++;
}
/*
* actually break a callback
*/
-static void afs_break_callback(struct afs_server *server,
- struct afs_vnode *vnode)
+void afs_break_callback(struct afs_vnode *vnode)
{
_enter("");
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+ write_seqlock(&vnode->cb_lock);
+
+ if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ vnode->cb_break++;
+ afs_clear_permits(vnode);
- if (vnode->cb_promised) {
spin_lock(&vnode->lock);
_debug("break callback");
- spin_lock(&server->cb_lock);
- if (vnode->cb_promised) {
- rb_erase(&vnode->cb_promise, &server->cb_promises);
- vnode->cb_promised = false;
- }
- spin_unlock(&server->cb_lock);
-
- queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
if (list_empty(&vnode->granted_locks) &&
!list_empty(&vnode->pending_locks))
afs_lock_may_be_available(vnode);
spin_unlock(&vnode->lock);
}
+
+ write_sequnlock(&vnode->cb_lock);
}
/*
@@ -143,49 +176,31 @@ static void afs_break_callback(struct afs_server *server,
static void afs_break_one_callback(struct afs_server *server,
struct afs_fid *fid)
{
+ struct afs_cb_interest *cbi;
+ struct afs_iget_data data;
struct afs_vnode *vnode;
- struct rb_node *p;
-
- _debug("find");
- spin_lock(&server->fs_lock);
- p = server->fs_vnodes.rb_node;
- while (p) {
- vnode = rb_entry(p, struct afs_vnode, server_rb);
- if (fid->vid < vnode->fid.vid)
- p = p->rb_left;
- else if (fid->vid > vnode->fid.vid)
- p = p->rb_right;
- else if (fid->vnode < vnode->fid.vnode)
- p = p->rb_left;
- else if (fid->vnode > vnode->fid.vnode)
- p = p->rb_right;
- else if (fid->unique < vnode->fid.unique)
- p = p->rb_left;
- else if (fid->unique > vnode->fid.unique)
- p = p->rb_right;
- else
- goto found;
- }
-
- /* not found so we just ignore it (it may have moved to another
- * server) */
-not_available:
- _debug("not avail");
- spin_unlock(&server->fs_lock);
- _leave("");
- return;
+ struct inode *inode;
-found:
- _debug("found");
- ASSERTCMP(server, ==, vnode->server);
+ read_lock(&server->cb_break_lock);
- if (!igrab(AFS_VNODE_TO_I(vnode)))
- goto not_available;
- spin_unlock(&server->fs_lock);
+ /* Step through all interested superblocks. There may be more than one
+ * because of cell aliasing.
+ */
+ list_for_each_entry(cbi, &server->cb_interests, cb_link) {
+ if (cbi->vid != fid->vid)
+ continue;
+
+ data.volume = NULL;
+ data.fid = *fid;
+ inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data);
+ if (inode) {
+ vnode = AFS_FS_I(inode);
+ afs_break_callback(vnode);
+ iput(inode);
+ }
+ }
- afs_break_callback(server, vnode);
- iput(&vnode->vfs_inode);
- _leave("");
+ read_unlock(&server->cb_break_lock);
}
/*
@@ -216,261 +231,14 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
}
/*
- * record the callback for breaking
- * - the caller must hold server->cb_lock
+ * Clear the callback interests in a server list.
*/
-static void afs_do_give_up_callback(struct afs_server *server,
- struct afs_vnode *vnode)
+void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist)
{
- struct afs_callback *cb;
-
- _enter("%p,%p", server, vnode);
-
- cb = &server->cb_break[server->cb_break_head];
- cb->fid = vnode->fid;
- cb->version = vnode->cb_version;
- cb->expiry = vnode->cb_expiry;
- cb->type = vnode->cb_type;
- smp_wmb();
- server->cb_break_head =
- (server->cb_break_head + 1) &
- (ARRAY_SIZE(server->cb_break) - 1);
-
- /* defer the breaking of callbacks to try and collect as many as
- * possible to ship in one operation */
- switch (atomic_inc_return(&server->cb_break_n)) {
- case 1 ... AFSCBMAX - 1:
- queue_delayed_work(afs_callback_update_worker,
- &server->cb_break_work, HZ * 2);
- break;
- case AFSCBMAX:
- afs_flush_callback_breaks(server);
- break;
- default:
- break;
- }
-
- ASSERT(server->cb_promises.rb_node != NULL);
- rb_erase(&vnode->cb_promise, &server->cb_promises);
- vnode->cb_promised = false;
- _leave("");
-}
-
-/*
- * discard the callback on a deleted item
- */
-void afs_discard_callback_on_delete(struct afs_vnode *vnode)
-{
- struct afs_server *server = vnode->server;
+ int i;
- _enter("%d", vnode->cb_promised);
-
- if (!vnode->cb_promised) {
- _leave(" [not promised]");
- return;
- }
-
- ASSERT(server != NULL);
-
- spin_lock(&server->cb_lock);
- if (vnode->cb_promised) {
- ASSERT(server->cb_promises.rb_node != NULL);
- rb_erase(&vnode->cb_promise, &server->cb_promises);
- vnode->cb_promised = false;
+ for (i = 0; i < slist->nr_servers; i++) {
+ afs_put_cb_interest(net, slist->servers[i].cb_interest);
+ slist->servers[i].cb_interest = NULL;
}
- spin_unlock(&server->cb_lock);
- _leave("");
-}
-
-/*
- * give up the callback registered for a vnode on the file server when the
- * inode is being cleared
- */
-void afs_give_up_callback(struct afs_vnode *vnode)
-{
- struct afs_server *server = vnode->server;
-
- DECLARE_WAITQUEUE(myself, current);
-
- _enter("%d", vnode->cb_promised);
-
- _debug("GIVE UP INODE %p", &vnode->vfs_inode);
-
- if (!vnode->cb_promised) {
- _leave(" [not promised]");
- return;
- }
-
- ASSERT(server != NULL);
-
- spin_lock(&server->cb_lock);
- if (vnode->cb_promised && afs_breakring_space(server) == 0) {
- add_wait_queue(&server->cb_break_waitq, &myself);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!vnode->cb_promised ||
- afs_breakring_space(server) != 0)
- break;
- spin_unlock(&server->cb_lock);
- schedule();
- spin_lock(&server->cb_lock);
- }
- remove_wait_queue(&server->cb_break_waitq, &myself);
- __set_current_state(TASK_RUNNING);
- }
-
- /* of course, it's always possible for the server to break this vnode's
- * callback first... */
- if (vnode->cb_promised)
- afs_do_give_up_callback(server, vnode);
-
- spin_unlock(&server->cb_lock);
- _leave("");
-}
-
-/*
- * dispatch a deferred give up callbacks operation
- */
-void afs_dispatch_give_up_callbacks(struct work_struct *work)
-{
- struct afs_server *server =
- container_of(work, struct afs_server, cb_break_work.work);
-
- _enter("");
-
- /* tell the fileserver to discard the callback promises it has
- * - in the event of ENOMEM or some other error, we just forget that we
- * had callbacks entirely, and the server will call us later to break
- * them
- */
- afs_fs_give_up_callbacks(server, true);
-}
-
-/*
- * flush the outstanding callback breaks on a server
- */
-void afs_flush_callback_breaks(struct afs_server *server)
-{
- mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
-}
-
-#if 0
-/*
- * update a bunch of callbacks
- */
-static void afs_callback_updater(struct work_struct *work)
-{
- struct afs_server *server;
- struct afs_vnode *vnode, *xvnode;
- time64_t now;
- long timeout;
- int ret;
-
- server = container_of(work, struct afs_server, updater);
-
- _enter("");
-
- now = ktime_get_real_seconds();
-
- /* find the first vnode to update */
- spin_lock(&server->cb_lock);
- for (;;) {
- if (RB_EMPTY_ROOT(&server->cb_promises)) {
- spin_unlock(&server->cb_lock);
- _leave(" [nothing]");
- return;
- }
-
- vnode = rb_entry(rb_first(&server->cb_promises),
- struct afs_vnode, cb_promise);
- if (atomic_read(&vnode->usage) > 0)
- break;
- rb_erase(&vnode->cb_promise, &server->cb_promises);
- vnode->cb_promised = false;
- }
-
- timeout = vnode->update_at - now;
- if (timeout > 0) {
- queue_delayed_work(afs_vnode_update_worker,
- &afs_vnode_update, timeout * HZ);
- spin_unlock(&server->cb_lock);
- _leave(" [nothing]");
- return;
- }
-
- list_del_init(&vnode->update);
- atomic_inc(&vnode->usage);
- spin_unlock(&server->cb_lock);
-
- /* we can now perform the update */
- _debug("update %s", vnode->vldb.name);
- vnode->state = AFS_VL_UPDATING;
- vnode->upd_rej_cnt = 0;
- vnode->upd_busy_cnt = 0;
-
- ret = afs_vnode_update_record(vl, &vldb);
- switch (ret) {
- case 0:
- afs_vnode_apply_update(vl, &vldb);
- vnode->state = AFS_VL_UPDATING;
- break;
- case -ENOMEDIUM:
- vnode->state = AFS_VL_VOLUME_DELETED;
- break;
- default:
- vnode->state = AFS_VL_UNCERTAIN;
- break;
- }
-
- /* and then reschedule */
- _debug("reschedule");
- vnode->update_at = ktime_get_real_seconds() +
- afs_vnode_update_timeout;
-
- spin_lock(&server->cb_lock);
-
- if (!list_empty(&server->cb_promises)) {
- /* next update in 10 minutes, but wait at least 1 second more
- * than the newest record already queued so that we don't spam
- * the VL server suddenly with lots of requests
- */
- xvnode = list_entry(server->cb_promises.prev,
- struct afs_vnode, update);
- if (vnode->update_at <= xvnode->update_at)
- vnode->update_at = xvnode->update_at + 1;
- xvnode = list_entry(server->cb_promises.next,
- struct afs_vnode, update);
- timeout = xvnode->update_at - now;
- if (timeout < 0)
- timeout = 0;
- } else {
- timeout = afs_vnode_update_timeout;
- }
-
- list_add_tail(&vnode->update, &server->cb_promises);
-
- _debug("timeout %ld", timeout);
- queue_delayed_work(afs_vnode_update_worker,
- &afs_vnode_update, timeout * HZ);
- spin_unlock(&server->cb_lock);
- afs_put_vnode(vl);
-}
-#endif
-
-/*
- * initialise the callback update process
- */
-int __init afs_callback_update_init(void)
-{
- afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
- WQ_MEM_RECLAIM);
- return afs_callback_update_worker ? 0 : -ENOMEM;
-}
-
-/*
- * shut down the callback update process
- */
-void afs_callback_update_kill(void)
-{
- destroy_workqueue(afs_callback_update_worker);
}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index ca0a3cf93791..1858c91169e4 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,6 +1,6 @@
/* AFS cell and server record management
*
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -9,213 +9,296 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/key.h>
#include <linux/ctype.h>
#include <linux/dns_resolver.h>
#include <linux/sched.h>
+#include <linux/inet.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
-DECLARE_RWSEM(afs_proc_cells_sem);
-LIST_HEAD(afs_proc_cells);
+unsigned __read_mostly afs_cell_gc_delay = 10;
-static LIST_HEAD(afs_cells);
-static DEFINE_RWLOCK(afs_cells_lock);
-static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
-static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
-static struct afs_cell *afs_cell_root;
+static void afs_manage_cell(struct work_struct *);
+
+static void afs_dec_cells_outstanding(struct afs_net *net)
+{
+ if (atomic_dec_and_test(&net->cells_outstanding))
+ wake_up_atomic_t(&net->cells_outstanding);
+}
/*
- * allocate a cell record and fill in its name, VL server address list and
- * allocate an anonymous key
+ * Set the cell timer to fire after a given delay, assuming it's not already
+ * set for an earlier time.
*/
-static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen,
- char *vllist)
+static void afs_set_cell_timer(struct afs_net *net, time64_t delay)
{
- struct afs_cell *cell;
- struct key *key;
- char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
- char *dvllist = NULL, *_vllist = NULL;
- char delimiter = ':';
- int ret;
+ if (net->live) {
+ atomic_inc(&net->cells_outstanding);
+ if (timer_reduce(&net->cells_timer, jiffies + delay * HZ))
+ afs_dec_cells_outstanding(net);
+ }
+}
- _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist);
+/*
+ * Look up and get an activation reference on a cell record under RCU
+ * conditions. The caller must hold the RCU read lock.
+ */
+struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
+ const char *name, unsigned int namesz)
+{
+ struct afs_cell *cell = NULL;
+ struct rb_node *p;
+ int n, seq = 0, ret = 0;
- BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
+ _enter("%*.*s", namesz, namesz, name);
- if (namelen > AFS_MAXCELLNAME) {
- _leave(" = -ENAMETOOLONG");
+ if (name && namesz == 0)
+ return ERR_PTR(-EINVAL);
+ if (namesz > AFS_MAXCELLNAME)
return ERR_PTR(-ENAMETOOLONG);
- }
- /* allocate and initialise a cell record */
- cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
- if (!cell) {
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
- }
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ if (cell)
+ afs_put_cell(net, cell);
+ cell = NULL;
+ ret = -ENOENT;
- memcpy(cell->name, name, namelen);
- cell->name[namelen] = 0;
-
- atomic_set(&cell->usage, 1);
- INIT_LIST_HEAD(&cell->link);
- rwlock_init(&cell->servers_lock);
- INIT_LIST_HEAD(&cell->servers);
- init_rwsem(&cell->vl_sem);
- INIT_LIST_HEAD(&cell->vl_list);
- spin_lock_init(&cell->vl_lock);
-
- /* if the ip address is invalid, try dns query */
- if (!vllist || strlen(vllist) < 7) {
- ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL);
- if (ret < 0) {
- if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY)
- /* translate these errors into something
- * userspace might understand */
- ret = -EDESTADDRREQ;
- _leave(" = %d", ret);
- return ERR_PTR(ret);
+ read_seqbegin_or_lock(&net->cells_lock, &seq);
+
+ if (!name) {
+ cell = rcu_dereference_raw(net->ws_cell);
+ if (cell) {
+ afs_get_cell(cell);
+ continue;
+ }
+ ret = -EDESTADDRREQ;
+ continue;
}
- _vllist = dvllist;
- /* change the delimiter for user-space reply */
- delimiter = ',';
+ p = rcu_dereference_raw(net->cells.rb_node);
+ while (p) {
+ cell = rb_entry(p, struct afs_cell, net_node);
+
+ n = strncasecmp(cell->name, name,
+ min_t(size_t, cell->name_len, namesz));
+ if (n == 0)
+ n = cell->name_len - namesz;
+ if (n < 0) {
+ p = rcu_dereference_raw(p->rb_left);
+ } else if (n > 0) {
+ p = rcu_dereference_raw(p->rb_right);
+ } else {
+ if (atomic_inc_not_zero(&cell->usage)) {
+ ret = 0;
+ break;
+ }
+ /* We want to repeat the search, this time with
+ * the lock properly locked.
+ */
+ }
+ cell = NULL;
+ }
- } else {
- _vllist = vllist;
- }
+ } while (need_seqretry(&net->cells_lock, seq));
- /* fill in the VL server list from the rest of the string */
- do {
- unsigned a, b, c, d;
+ done_seqretry(&net->cells_lock, seq);
- next = strchr(_vllist, delimiter);
- if (next)
- *next++ = 0;
+ return ret == 0 ? cell : ERR_PTR(ret);
+}
- if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
- goto bad_address;
+/*
+ * Set up a cell record and fill in its name, VL server address list and
+ * allocate an anonymous key
+ */
+static struct afs_cell *afs_alloc_cell(struct afs_net *net,
+ const char *name, unsigned int namelen,
+ const char *vllist)
+{
+ struct afs_cell *cell;
+ int i, ret;
- if (a > 255 || b > 255 || c > 255 || d > 255)
- goto bad_address;
+ ASSERT(name);
+ if (namelen == 0)
+ return ERR_PTR(-EINVAL);
+ if (namelen > AFS_MAXCELLNAME) {
+ _leave(" = -ENAMETOOLONG");
+ return ERR_PTR(-ENAMETOOLONG);
+ }
- cell->vl_addrs[cell->vl_naddrs++].s_addr =
- htonl((a << 24) | (b << 16) | (c << 8) | d);
+ _enter("%*.*s,%s", namelen, namelen, name, vllist);
- } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next));
+ cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
+ if (!cell) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
- /* create a key to represent an anonymous user */
- memcpy(keyname, "afs@", 4);
- dp = keyname + 4;
- cp = cell->name;
- do {
- *dp++ = toupper(*cp);
- } while (*cp++);
+ cell->net = net;
+ cell->name_len = namelen;
+ for (i = 0; i < namelen; i++)
+ cell->name[i] = tolower(name[i]);
+
+ atomic_set(&cell->usage, 2);
+ INIT_WORK(&cell->manager, afs_manage_cell);
+ cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
+ (1 << AFS_CELL_FL_NO_LOOKUP_YET));
+ INIT_LIST_HEAD(&cell->proc_volumes);
+ rwlock_init(&cell->proc_lock);
+ rwlock_init(&cell->vl_addrs_lock);
+
+ /* Fill in the VL server list if we were given a list of addresses to
+ * use.
+ */
+ if (vllist) {
+ struct afs_addr_list *alist;
+
+ alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
+ VL_SERVICE, AFS_VL_PORT);
+ if (IS_ERR(alist)) {
+ ret = PTR_ERR(alist);
+ goto parse_failed;
+ }
- key = rxrpc_get_null_key(keyname);
- if (IS_ERR(key)) {
- _debug("no key");
- ret = PTR_ERR(key);
- goto error;
+ rcu_assign_pointer(cell->vl_addrs, alist);
+ cell->dns_expiry = TIME64_MAX;
}
- cell->anonymous_key = key;
-
- _debug("anon key %p{%x}",
- cell->anonymous_key, key_serial(cell->anonymous_key));
_leave(" = %p", cell);
return cell;
-bad_address:
- printk(KERN_ERR "kAFS: bad VL server IP address\n");
- ret = -EINVAL;
-error:
- key_put(cell->anonymous_key);
- kfree(dvllist);
+parse_failed:
+ if (ret == -EINVAL)
+ printk(KERN_ERR "kAFS: bad VL server IP address\n");
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
}
/*
- * afs_cell_crate() - create a cell record
- * @name: is the name of the cell.
- * @namsesz: is the strlen of the cell name.
- * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format.
- * @retref: is T to return the cell reference when the cell exists.
+ * afs_lookup_cell - Look up or create a cell record.
+ * @net: The network namespace
+ * @name: The name of the cell.
+ * @namesz: The strlen of the cell name.
+ * @vllist: A colon/comma separated list of numeric IP addresses or NULL.
+ * @excl: T if an error should be given if the cell name already exists.
+ *
+ * Look up a cell record by name and query the DNS for VL server addresses if
+ * needed. Note that that actual DNS query is punted off to the manager thread
+ * so that this function can return immediately if interrupted whilst allowing
+ * cell records to be shared even if not yet fully constructed.
*/
-struct afs_cell *afs_cell_create(const char *name, unsigned namesz,
- char *vllist, bool retref)
+struct afs_cell *afs_lookup_cell(struct afs_net *net,
+ const char *name, unsigned int namesz,
+ const char *vllist, bool excl)
{
- struct afs_cell *cell;
- int ret;
-
- _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist);
+ struct afs_cell *cell, *candidate, *cursor;
+ struct rb_node *parent, **pp;
+ int ret, n;
+
+ _enter("%s,%s", name, vllist);
+
+ if (!excl) {
+ rcu_read_lock();
+ cell = afs_lookup_cell_rcu(net, name, namesz);
+ rcu_read_unlock();
+ if (!IS_ERR(cell)) {
+ if (excl) {
+ afs_put_cell(net, cell);
+ return ERR_PTR(-EEXIST);
+ }
+ goto wait_for_cell;
+ }
+ }
- down_write(&afs_cells_sem);
- read_lock(&afs_cells_lock);
- list_for_each_entry(cell, &afs_cells, link) {
- if (strncasecmp(cell->name, name, namesz) == 0)
- goto duplicate_name;
+ /* Assume we're probably going to create a cell and preallocate and
+ * mostly set up a candidate record. We can then use this to stash the
+ * name, the net namespace and VL server addresses.
+ *
+ * We also want to do this before we hold any locks as it may involve
+ * upcalling to userspace to make DNS queries.
+ */
+ candidate = afs_alloc_cell(net, name, namesz, vllist);
+ if (IS_ERR(candidate)) {
+ _leave(" = %ld", PTR_ERR(candidate));
+ return candidate;
}
- read_unlock(&afs_cells_lock);
- cell = afs_cell_alloc(name, namesz, vllist);
- if (IS_ERR(cell)) {
- _leave(" = %ld", PTR_ERR(cell));
- up_write(&afs_cells_sem);
- return cell;
+ /* Find the insertion point and check to see if someone else added a
+ * cell whilst we were allocating.
+ */
+ write_seqlock(&net->cells_lock);
+
+ pp = &net->cells.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ cursor = rb_entry(parent, struct afs_cell, net_node);
+
+ n = strncasecmp(cursor->name, name,
+ min_t(size_t, cursor->name_len, namesz));
+ if (n == 0)
+ n = cursor->name_len - namesz;
+ if (n < 0)
+ pp = &(*pp)->rb_left;
+ else if (n > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto cell_already_exists;
}
- /* add a proc directory for this cell */
- ret = afs_proc_cell_setup(cell);
- if (ret < 0)
- goto error;
+ cell = candidate;
+ candidate = NULL;
+ rb_link_node_rcu(&cell->net_node, parent, pp);
+ rb_insert_color(&cell->net_node, &net->cells);
+ atomic_inc(&net->cells_outstanding);
+ write_sequnlock(&net->cells_lock);
-#ifdef CONFIG_AFS_FSCACHE
- /* put it up for caching (this never returns an error) */
- cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
- &afs_cell_cache_index_def,
- cell, true);
-#endif
+ queue_work(afs_wq, &cell->manager);
- /* add to the cell lists */
- write_lock(&afs_cells_lock);
- list_add_tail(&cell->link, &afs_cells);
- write_unlock(&afs_cells_lock);
+wait_for_cell:
+ _debug("wait_for_cell");
+ ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE);
+ smp_rmb();
- down_write(&afs_proc_cells_sem);
- list_add_tail(&cell->proc_link, &afs_proc_cells);
- up_write(&afs_proc_cells_sem);
- up_write(&afs_cells_sem);
+ switch (READ_ONCE(cell->state)) {
+ case AFS_CELL_FAILED:
+ ret = cell->error;
+ goto error;
+ default:
+ _debug("weird %u %d", cell->state, cell->error);
+ goto error;
+ case AFS_CELL_ACTIVE:
+ break;
+ }
- _leave(" = %p", cell);
+ _leave(" = %p [cell]", cell);
return cell;
+cell_already_exists:
+ _debug("cell exists");
+ cell = cursor;
+ if (excl) {
+ ret = -EEXIST;
+ } else {
+ afs_get_cell(cursor);
+ ret = 0;
+ }
+ write_sequnlock(&net->cells_lock);
+ kfree(candidate);
+ if (ret == 0)
+ goto wait_for_cell;
+ goto error_noput;
error:
- up_write(&afs_cells_sem);
- key_put(cell->anonymous_key);
- kfree(cell);
- _leave(" = %d", ret);
+ afs_put_cell(net, cell);
+error_noput:
+ _leave(" = %d [error]", ret);
return ERR_PTR(ret);
-
-duplicate_name:
- if (retref && !IS_ERR(cell))
- afs_get_cell(cell);
-
- read_unlock(&afs_cells_lock);
- up_write(&afs_cells_sem);
-
- if (retref) {
- _leave(" = %p", cell);
- return cell;
- }
-
- _leave(" = -EEXIST");
- return ERR_PTR(-EEXIST);
}
/*
@@ -223,10 +306,11 @@ duplicate_name:
* - can be called with a module parameter string
* - can be called from a write to /proc/fs/afs/rootcell
*/
-int afs_cell_init(char *rootcell)
+int afs_cell_init(struct afs_net *net, const char *rootcell)
{
struct afs_cell *old_root, *new_root;
- char *cp;
+ const char *cp, *vllist;
+ size_t len;
_enter("");
@@ -239,222 +323,453 @@ int afs_cell_init(char *rootcell)
}
cp = strchr(rootcell, ':');
- if (!cp)
+ if (!cp) {
_debug("kAFS: no VL server IP addresses specified");
- else
- *cp++ = 0;
+ vllist = NULL;
+ len = strlen(rootcell);
+ } else {
+ vllist = cp + 1;
+ len = cp - rootcell;
+ }
/* allocate a cell record for the root cell */
- new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false);
+ new_root = afs_lookup_cell(net, rootcell, len, vllist, false);
if (IS_ERR(new_root)) {
_leave(" = %ld", PTR_ERR(new_root));
return PTR_ERR(new_root);
}
+ set_bit(AFS_CELL_FL_NO_GC, &new_root->flags);
+ afs_get_cell(new_root);
+
/* install the new cell */
- write_lock(&afs_cells_lock);
- old_root = afs_cell_root;
- afs_cell_root = new_root;
- write_unlock(&afs_cells_lock);
- afs_put_cell(old_root);
+ write_seqlock(&net->cells_lock);
+ old_root = net->ws_cell;
+ net->ws_cell = new_root;
+ write_sequnlock(&net->cells_lock);
+ afs_put_cell(net, old_root);
_leave(" = 0");
return 0;
}
/*
- * lookup a cell record
+ * Update a cell's VL server address list from the DNS.
*/
-struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
- bool dns_cell)
+static void afs_update_cell(struct afs_cell *cell)
{
- struct afs_cell *cell;
-
- _enter("\"%*.*s\",", namesz, namesz, name ?: "");
-
- down_read(&afs_cells_sem);
- read_lock(&afs_cells_lock);
-
- if (name) {
- /* if the cell was named, look for it in the cell record list */
- list_for_each_entry(cell, &afs_cells, link) {
- if (strncmp(cell->name, name, namesz) == 0) {
- afs_get_cell(cell);
- goto found;
- }
+ struct afs_addr_list *alist, *old;
+ time64_t now, expiry;
+
+ _enter("%s", cell->name);
+
+ alist = afs_dns_query(cell, &expiry);
+ if (IS_ERR(alist)) {
+ switch (PTR_ERR(alist)) {
+ case -ENODATA:
+ /* The DNS said that the cell does not exist */
+ set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
+ clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+ cell->dns_expiry = ktime_get_real_seconds() + 61;
+ break;
+
+ case -EAGAIN:
+ case -ECONNREFUSED:
+ default:
+ set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+ cell->dns_expiry = ktime_get_real_seconds() + 10;
+ break;
}
- cell = ERR_PTR(-ENOENT);
- if (dns_cell)
- goto create_cell;
- found:
- ;
+
+ cell->error = -EDESTADDRREQ;
} else {
- cell = afs_cell_root;
- if (!cell) {
- /* this should not happen unless user tries to mount
- * when root cell is not set. Return an impossibly
- * bizarre errno to alert the user. Things like
- * ENOENT might be "more appropriate" but they happen
- * for other reasons.
- */
- cell = ERR_PTR(-EDESTADDRREQ);
- } else {
- afs_get_cell(cell);
- }
+ clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
+ clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
+
+ /* Exclusion on changing vl_addrs is achieved by a
+ * non-reentrant work item.
+ */
+ old = rcu_dereference_protected(cell->vl_addrs, true);
+ rcu_assign_pointer(cell->vl_addrs, alist);
+ cell->dns_expiry = expiry;
+ if (old)
+ afs_put_addrlist(old);
}
- read_unlock(&afs_cells_lock);
- up_read(&afs_cells_sem);
- _leave(" = %p", cell);
- return cell;
+ if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
+ wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
-create_cell:
- read_unlock(&afs_cells_lock);
- up_read(&afs_cells_sem);
+ now = ktime_get_real_seconds();
+ afs_set_cell_timer(cell->net, cell->dns_expiry - now);
+ _leave("");
+}
- cell = afs_cell_create(name, namesz, NULL, true);
+/*
+ * Destroy a cell record
+ */
+static void afs_cell_destroy(struct rcu_head *rcu)
+{
+ struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
- _leave(" = %p", cell);
- return cell;
+ _enter("%p{%s}", cell, cell->name);
+
+ ASSERTCMP(atomic_read(&cell->usage), ==, 0);
+
+ afs_put_addrlist(cell->vl_addrs);
+ key_put(cell->anonymous_key);
+ kfree(cell);
+
+ _leave(" [destroyed]");
}
-#if 0
/*
- * try and get a cell record
+ * Queue the cell manager.
*/
-struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
+static void afs_queue_cell_manager(struct afs_net *net)
{
- write_lock(&afs_cells_lock);
+ int outstanding = atomic_inc_return(&net->cells_outstanding);
- if (cell && !list_empty(&cell->link))
- afs_get_cell(cell);
- else
- cell = NULL;
+ _enter("%d", outstanding);
- write_unlock(&afs_cells_lock);
- return cell;
+ if (!queue_work(afs_wq, &net->cells_manager))
+ afs_dec_cells_outstanding(net);
}
-#endif /* 0 */
/*
- * destroy a cell record
+ * Cell management timer. We have an increment on cells_outstanding that we
+ * need to pass along to the work item.
*/
-void afs_put_cell(struct afs_cell *cell)
+void afs_cells_timer(struct timer_list *timer)
{
- if (!cell)
- return;
+ struct afs_net *net = container_of(timer, struct afs_net, cells_timer);
- _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
+ _enter("");
+ if (!queue_work(afs_wq, &net->cells_manager))
+ afs_dec_cells_outstanding(net);
+}
- ASSERTCMP(atomic_read(&cell->usage), >, 0);
+/*
+ * Get a reference on a cell record.
+ */
+struct afs_cell *afs_get_cell(struct afs_cell *cell)
+{
+ atomic_inc(&cell->usage);
+ return cell;
+}
- /* to prevent a race, the decrement and the dequeue must be effectively
- * atomic */
- write_lock(&afs_cells_lock);
+/*
+ * Drop a reference on a cell record.
+ */
+void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
+{
+ time64_t now, expire_delay;
- if (likely(!atomic_dec_and_test(&cell->usage))) {
- write_unlock(&afs_cells_lock);
- _leave("");
+ if (!cell)
return;
- }
- ASSERT(list_empty(&cell->servers));
- ASSERT(list_empty(&cell->vl_list));
+ _enter("%s", cell->name);
- write_unlock(&afs_cells_lock);
+ now = ktime_get_real_seconds();
+ cell->last_inactive = now;
+ expire_delay = 0;
+ if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
+ !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+ expire_delay = afs_cell_gc_delay;
- wake_up(&afs_cells_freeable_wq);
+ if (atomic_dec_return(&cell->usage) > 1)
+ return;
- _leave(" [unused]");
+ /* 'cell' may now be garbage collected. */
+ afs_set_cell_timer(net, expire_delay);
}
/*
- * destroy a cell record
- * - must be called with the afs_cells_sem write-locked
- * - cell->link should have been broken by the caller
+ * Allocate a key to use as a placeholder for anonymous user security.
*/
-static void afs_cell_destroy(struct afs_cell *cell)
+static int afs_alloc_anon_key(struct afs_cell *cell)
{
- _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
+ struct key *key;
+ char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp;
- ASSERTCMP(atomic_read(&cell->usage), >=, 0);
- ASSERT(list_empty(&cell->link));
+ /* Create a key to represent an anonymous user. */
+ memcpy(keyname, "afs@", 4);
+ dp = keyname + 4;
+ cp = cell->name;
+ do {
+ *dp++ = tolower(*cp);
+ } while (*cp++);
- /* wait for everyone to stop using the cell */
- if (atomic_read(&cell->usage) > 0) {
- DECLARE_WAITQUEUE(myself, current);
+ key = rxrpc_get_null_key(keyname);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
- _debug("wait for cell %s", cell->name);
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&afs_cells_freeable_wq, &myself);
+ cell->anonymous_key = key;
- while (atomic_read(&cell->usage) > 0) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
+ _debug("anon key %p{%x}",
+ cell->anonymous_key, key_serial(cell->anonymous_key));
+ return 0;
+}
- remove_wait_queue(&afs_cells_freeable_wq, &myself);
- set_current_state(TASK_RUNNING);
+/*
+ * Activate a cell.
+ */
+static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+ int ret;
+
+ if (!cell->anonymous_key) {
+ ret = afs_alloc_anon_key(cell);
+ if (ret < 0)
+ return ret;
}
- _debug("cell dead");
- ASSERTCMP(atomic_read(&cell->usage), ==, 0);
- ASSERT(list_empty(&cell->servers));
- ASSERT(list_empty(&cell->vl_list));
+#ifdef CONFIG_AFS_FSCACHE
+ cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
+ &afs_cell_cache_index_def,
+ cell, true);
+#endif
+ ret = afs_proc_cell_setup(net, cell);
+ if (ret < 0)
+ return ret;
+ spin_lock(&net->proc_cells_lock);
+ list_add_tail(&cell->proc_link, &net->proc_cells);
+ spin_unlock(&net->proc_cells_lock);
+ return 0;
+}
- afs_proc_cell_remove(cell);
+/*
+ * Deactivate a cell.
+ */
+static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
+{
+ _enter("%s", cell->name);
+
+ afs_proc_cell_remove(net, cell);
- down_write(&afs_proc_cells_sem);
+ spin_lock(&net->proc_cells_lock);
list_del_init(&cell->proc_link);
- up_write(&afs_proc_cells_sem);
+ spin_unlock(&net->proc_cells_lock);
#ifdef CONFIG_AFS_FSCACHE
fscache_relinquish_cookie(cell->cache, 0);
+ cell->cache = NULL;
#endif
- key_put(cell->anonymous_key);
- kfree(cell);
- _leave(" [destroyed]");
+ _leave("");
}
/*
- * purge in-memory cell database on module unload or afs_init() failure
- * - the timeout daemon is stopped before calling this
+ * Manage a cell record, initialising and destroying it, maintaining its DNS
+ * records.
*/
-void afs_cell_purge(void)
+static void afs_manage_cell(struct work_struct *work)
{
- struct afs_cell *cell;
+ struct afs_cell *cell = container_of(work, struct afs_cell, manager);
+ struct afs_net *net = cell->net;
+ bool deleted;
+ int ret, usage;
+
+ _enter("%s", cell->name);
+
+again:
+ _debug("state %u", cell->state);
+ switch (cell->state) {
+ case AFS_CELL_INACTIVE:
+ case AFS_CELL_FAILED:
+ write_seqlock(&net->cells_lock);
+ usage = 1;
+ deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0);
+ if (deleted)
+ rb_erase(&cell->net_node, &net->cells);
+ write_sequnlock(&net->cells_lock);
+ if (deleted)
+ goto final_destruction;
+ if (cell->state == AFS_CELL_FAILED)
+ goto done;
+ cell->state = AFS_CELL_UNSET;
+ goto again;
+
+ case AFS_CELL_UNSET:
+ cell->state = AFS_CELL_ACTIVATING;
+ goto again;
+
+ case AFS_CELL_ACTIVATING:
+ ret = afs_activate_cell(net, cell);
+ if (ret < 0)
+ goto activation_failed;
+
+ cell->state = AFS_CELL_ACTIVE;
+ smp_wmb();
+ clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
+ wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ goto again;
+
+ case AFS_CELL_ACTIVE:
+ if (atomic_read(&cell->usage) > 1) {
+ time64_t now = ktime_get_real_seconds();
+ if (cell->dns_expiry <= now && net->live)
+ afs_update_cell(cell);
+ goto done;
+ }
+ cell->state = AFS_CELL_DEACTIVATING;
+ goto again;
+
+ case AFS_CELL_DEACTIVATING:
+ set_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
+ if (atomic_read(&cell->usage) > 1)
+ goto reverse_deactivation;
+ afs_deactivate_cell(net, cell);
+ cell->state = AFS_CELL_INACTIVE;
+ goto again;
+
+ default:
+ break;
+ }
+ _debug("bad state %u", cell->state);
+ BUG(); /* Unhandled state */
+
+activation_failed:
+ cell->error = ret;
+ afs_deactivate_cell(net, cell);
+
+ cell->state = AFS_CELL_FAILED;
+ smp_wmb();
+ if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags))
+ wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ goto again;
+
+reverse_deactivation:
+ cell->state = AFS_CELL_ACTIVE;
+ smp_wmb();
+ clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
+ wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ _leave(" [deact->act]");
+ return;
+
+done:
+ _leave(" [done %u]", cell->state);
+ return;
+
+final_destruction:
+ call_rcu(&cell->rcu, afs_cell_destroy);
+ afs_dec_cells_outstanding(net);
+ _leave(" [destruct %d]", atomic_read(&net->cells_outstanding));
+}
+
+/*
+ * Manage the records of cells known to a network namespace. This includes
+ * updating the DNS records and garbage collecting unused cells that were
+ * automatically added.
+ *
+ * Note that constructed cell records may only be removed from net->cells by
+ * this work item, so it is safe for this work item to stash a cursor pointing
+ * into the tree and then return to caller (provided it skips cells that are
+ * still under construction).
+ *
+ * Note also that we were given an increment on net->cells_outstanding by
+ * whoever queued us that we need to deal with before returning.
+ */
+void afs_manage_cells(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, cells_manager);
+ struct rb_node *cursor;
+ time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
+ bool purging = !net->live;
_enter("");
- afs_put_cell(afs_cell_root);
+ /* Trawl the cell database looking for cells that have expired from
+ * lack of use and cells whose DNS results have expired and dispatch
+ * their managers.
+ */
+ read_seqlock_excl(&net->cells_lock);
- down_write(&afs_cells_sem);
+ for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) {
+ struct afs_cell *cell =
+ rb_entry(cursor, struct afs_cell, net_node);
+ unsigned usage;
+ bool sched_cell = false;
- while (!list_empty(&afs_cells)) {
- cell = NULL;
+ usage = atomic_read(&cell->usage);
+ _debug("manage %s %u", cell->name, usage);
+
+ ASSERTCMP(usage, >=, 1);
- /* remove the next cell from the front of the list */
- write_lock(&afs_cells_lock);
+ if (purging) {
+ if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+ usage = atomic_dec_return(&cell->usage);
+ ASSERTCMP(usage, ==, 1);
+ }
+
+ if (usage == 1) {
+ time64_t expire_at = cell->last_inactive;
+
+ if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
+ !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+ expire_at += afs_cell_gc_delay;
+ if (purging || expire_at <= now)
+ sched_cell = true;
+ else if (expire_at < next_manage)
+ next_manage = expire_at;
+ }
- if (!list_empty(&afs_cells)) {
- cell = list_entry(afs_cells.next,
- struct afs_cell, link);
- list_del_init(&cell->link);
+ if (!purging) {
+ if (cell->dns_expiry <= now)
+ sched_cell = true;
+ else if (cell->dns_expiry <= next_manage)
+ next_manage = cell->dns_expiry;
}
- write_unlock(&afs_cells_lock);
+ if (sched_cell)
+ queue_work(afs_wq, &cell->manager);
+ }
+
+ read_sequnlock_excl(&net->cells_lock);
- if (cell) {
- _debug("PURGING CELL %s (%d)",
- cell->name, atomic_read(&cell->usage));
+ /* Update the timer on the way out. We have to pass an increment on
+ * cells_outstanding in the namespace that we are in to the timer or
+ * the work scheduler.
+ */
+ if (!purging && next_manage < TIME64_MAX) {
+ now = ktime_get_real_seconds();
- /* now the cell should be left with no references */
- afs_cell_destroy(cell);
+ if (next_manage - now <= 0) {
+ if (queue_work(afs_wq, &net->cells_manager))
+ atomic_inc(&net->cells_outstanding);
+ } else {
+ afs_set_cell_timer(net, next_manage - now);
}
}
- up_write(&afs_cells_sem);
+ afs_dec_cells_outstanding(net);
+ _leave(" [%d]", atomic_read(&net->cells_outstanding));
+}
+
+/*
+ * Purge in-memory cell database.
+ */
+void afs_cell_purge(struct afs_net *net)
+{
+ struct afs_cell *ws;
+
+ _enter("");
+
+ write_seqlock(&net->cells_lock);
+ ws = net->ws_cell;
+ net->ws_cell = NULL;
+ write_sequnlock(&net->cells_lock);
+ afs_put_cell(net, ws);
+
+ _debug("del timer");
+ if (del_timer_sync(&net->cells_timer))
+ atomic_dec(&net->cells_outstanding);
+
+ _debug("kick mgr");
+ afs_queue_cell_manager(net);
+
+ _debug("wait");
+ wait_on_atomic_t(&net->cells_outstanding, atomic_t_wait,
+ TASK_UNINTERRUPTIBLE);
_leave("");
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 782d4d05a53b..41e277f57b20 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -41,7 +41,6 @@ static CM_NAME(CallBack);
static const struct afs_call_type afs_SRXCBCallBack = {
.name = afs_SRXCBCallBack_name,
.deliver = afs_deliver_cb_callback,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_CallBack,
};
@@ -53,7 +52,6 @@ static CM_NAME(InitCallBackState);
static const struct afs_call_type afs_SRXCBInitCallBackState = {
.name = afs_SRXCBInitCallBackState_name,
.deliver = afs_deliver_cb_init_call_back_state,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_InitCallBackState,
};
@@ -65,7 +63,6 @@ static CM_NAME(InitCallBackState3);
static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
.name = afs_SRXCBInitCallBackState3_name,
.deliver = afs_deliver_cb_init_call_back_state3,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_InitCallBackState,
};
@@ -77,7 +74,6 @@ static CM_NAME(Probe);
static const struct afs_call_type afs_SRXCBProbe = {
.name = afs_SRXCBProbe_name,
.deliver = afs_deliver_cb_probe,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_Probe,
};
@@ -89,7 +85,6 @@ static CM_NAME(ProbeUuid);
static const struct afs_call_type afs_SRXCBProbeUuid = {
.name = afs_SRXCBProbeUuid_name,
.deliver = afs_deliver_cb_probe_uuid,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_ProbeUuid,
};
@@ -101,7 +96,6 @@ static CM_NAME(TellMeAboutYourself);
static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
.name = afs_SRXCBTellMeAboutYourself_name,
.deliver = afs_deliver_cb_tell_me_about_yourself,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_TellMeAboutYourself,
};
@@ -127,6 +121,9 @@ bool afs_cm_incoming_call(struct afs_call *call)
case CBProbe:
call->type = &afs_SRXCBProbe;
return true;
+ case CBProbeUuid:
+ call->type = &afs_SRXCBProbeUuid;
+ return true;
case CBTellMeAboutYourself:
call->type = &afs_SRXCBTellMeAboutYourself;
return true;
@@ -147,18 +144,16 @@ static void afs_cm_destructor(struct afs_call *call)
* afs_deliver_cb_callback().
*/
if (call->unmarshall == 5) {
- ASSERT(call->server && call->count && call->request);
- afs_break_callbacks(call->server, call->count, call->request);
+ ASSERT(call->cm_server && call->count && call->request);
+ afs_break_callbacks(call->cm_server, call->count, call->request);
}
- afs_put_server(call->server);
- call->server = NULL;
kfree(call->buffer);
call->buffer = NULL;
}
/*
- * allow the fileserver to see if the cache manager is still alive
+ * The server supplied a list of callbacks that it wanted to break.
*/
static void SRXAFSCB_CallBack(struct work_struct *work)
{
@@ -173,7 +168,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
* yet */
afs_send_empty_reply(call);
- afs_break_callbacks(call->server, call->count, call->request);
+ afs_break_callbacks(call->cm_server, call->count, call->request);
afs_put_call(call);
_leave("");
}
@@ -193,7 +188,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
switch (call->unmarshall) {
case 0:
- rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
call->offset = 0;
call->unmarshall++;
@@ -286,14 +280,16 @@ static int afs_deliver_cb_callback(struct afs_call *call)
break;
}
- call->state = AFS_CALL_REPLYING;
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return -EIO;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- server = afs_find_server(&srx);
+ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+ server = afs_find_server(call->net, &srx);
if (!server)
return -ENOTCONN;
- call->server = server;
+ call->cm_server = server;
return afs_queue_call_work(call);
}
@@ -305,9 +301,9 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
{
struct afs_call *call = container_of(work, struct afs_call, work);
- _enter("{%p}", call->server);
+ _enter("{%p}", call->cm_server);
- afs_init_callback_state(call->server);
+ afs_init_callback_state(call->cm_server);
afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
@@ -324,21 +320,18 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
_enter("");
- rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
- /* no unmarshalling required */
- call->state = AFS_CALL_REPLYING;
-
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- server = afs_find_server(&srx);
+ server = afs_find_server(call->net, &srx);
if (!server)
return -ENOTCONN;
- call->server = server;
+ call->cm_server = server;
return afs_queue_call_work(call);
}
@@ -357,8 +350,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
_enter("");
- rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
-
_enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
@@ -402,15 +393,16 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
break;
}
- /* no unmarshalling required */
- call->state = AFS_CALL_REPLYING;
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return -EIO;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- server = afs_find_server(&srx);
+ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+ server = afs_find_server(call->net, &srx);
if (!server)
return -ENOTCONN;
- call->server = server;
+ call->cm_server = server;
return afs_queue_call_work(call);
}
@@ -441,8 +433,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
if (ret < 0)
return ret;
- /* no unmarshalling required */
- call->state = AFS_CALL_REPLYING;
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return -EIO;
return afs_queue_call_work(call);
}
@@ -461,7 +453,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
_enter("");
- if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
+ if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
reply.match = htonl(0);
else
reply.match = htonl(1);
@@ -524,7 +516,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
break;
}
- call->state = AFS_CALL_REPLYING;
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return -EIO;
return afs_queue_call_work(call);
}
@@ -568,13 +561,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
memset(&reply, 0, sizeof(reply));
reply.ia.nifs = htonl(nifs);
- reply.ia.uuid[0] = afs_uuid.time_low;
- reply.ia.uuid[1] = htonl(ntohs(afs_uuid.time_mid));
- reply.ia.uuid[2] = htonl(ntohs(afs_uuid.time_hi_and_version));
- reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved);
- reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low);
+ reply.ia.uuid[0] = call->net->uuid.time_low;
+ reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
+ reply.ia.uuid[2] = htonl(ntohs(call->net->uuid.time_hi_and_version));
+ reply.ia.uuid[3] = htonl((s8) call->net->uuid.clock_seq_hi_and_reserved);
+ reply.ia.uuid[4] = htonl((s8) call->net->uuid.clock_seq_low);
for (loop = 0; loop < 6; loop++)
- reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]);
+ reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
if (ifs) {
for (loop = 0; loop < nifs; loop++) {
@@ -605,8 +598,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
if (ret < 0)
return ret;
- /* no unmarshalling required */
- call->state = AFS_CALL_REPLYING;
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return -EIO;
return afs_queue_call_work(call);
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 613a77058263..ab618d32554c 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -130,10 +130,11 @@ struct afs_lookup_cookie {
/*
* check that a directory page is valid
*/
-static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
+bool afs_dir_check_page(struct inode *dir, struct page *page)
{
struct afs_dir_page *dbuf;
- loff_t latter;
+ struct afs_vnode *vnode = AFS_FS_I(dir);
+ loff_t latter, i_size, off;
int tmp, qty;
#if 0
@@ -150,8 +151,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
}
#endif
- /* determine how many magic numbers there should be in this page */
- latter = dir->i_size - page_offset(page);
+ /* Determine how many magic numbers there should be in this page, but
+ * we must take care because the directory may change size under us.
+ */
+ off = page_offset(page);
+ i_size = i_size_read(dir);
+ if (i_size <= off)
+ goto checked;
+
+ latter = i_size - off;
if (latter >= PAGE_SIZE)
qty = PAGE_SIZE;
else
@@ -162,13 +170,15 @@ static inline bool afs_dir_check_page(struct inode *dir, struct page *page)
dbuf = page_address(page);
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
- printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
+ printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
__func__, dir->i_ino, tmp, qty,
ntohs(dbuf->blocks[tmp].pagehdr.magic));
+ trace_afs_dir_check_failed(vnode, off, i_size);
goto error;
}
}
+checked:
SetPageChecked(page);
return true;
@@ -183,6 +193,7 @@ error:
static inline void afs_dir_put_page(struct page *page)
{
kunmap(page);
+ unlock_page(page);
put_page(page);
}
@@ -197,9 +208,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
if (!IS_ERR(page)) {
+ lock_page(page);
kmap(page);
if (unlikely(!PageChecked(page))) {
- if (PageError(page) || !afs_dir_check_page(dir, page))
+ if (PageError(page))
goto fail;
}
}
@@ -384,8 +396,7 @@ out:
*/
static int afs_readdir(struct file *file, struct dir_context *ctx)
{
- return afs_dir_iterate(file_inode(file),
- ctx, file->private_data);
+ return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file));
}
/*
@@ -553,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
/* instantiate the dentry */
- inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL);
+ inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL);
key_put(key);
if (IS_ERR(inode)) {
_leave(" = %ld", PTR_ERR(inode));
@@ -581,6 +592,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
struct afs_vnode *vnode, *dir;
struct afs_fid uninitialized_var(fid);
struct dentry *parent;
+ struct inode *inode;
struct key *key;
void *dir_version;
int ret;
@@ -588,30 +600,39 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- vnode = AFS_FS_I(d_inode(dentry));
-
- if (d_really_is_positive(dentry))
+ if (d_really_is_positive(dentry)) {
+ vnode = AFS_FS_I(d_inode(dentry));
_enter("{v={%x:%u} n=%pd fl=%lx},",
vnode->fid.vid, vnode->fid.vnode, dentry,
vnode->flags);
- else
+ } else {
_enter("{neg n=%pd}", dentry);
+ }
key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
if (IS_ERR(key))
key = NULL;
+ if (d_really_is_positive(dentry)) {
+ inode = d_inode(dentry);
+ if (inode) {
+ vnode = AFS_FS_I(inode);
+ afs_validate(vnode, key);
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ goto out_bad;
+ }
+ }
+
/* lock down the parent dentry so we can peer at it */
parent = dget_parent(dentry);
dir = AFS_FS_I(d_inode(parent));
/* validate the parent directory */
- if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
- afs_validate(dir, key);
+ afs_validate(dir, key);
if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
_debug("%pd: parent dir deleted", dentry);
- goto out_bad;
+ goto out_bad_parent;
}
dir_version = (void *) (unsigned long) dir->status.data_version;
@@ -626,13 +647,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
case 0:
/* the filename maps to something */
if (d_really_is_negative(dentry))
- goto out_bad;
- if (is_bad_inode(d_inode(dentry))) {
+ goto out_bad_parent;
+ inode = d_inode(dentry);
+ if (is_bad_inode(inode)) {
printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
dentry);
- goto out_bad;
+ goto out_bad_parent;
}
+ vnode = AFS_FS_I(inode);
+
/* if the vnode ID has changed, then the dirent points to a
* different file */
if (fid.vnode != vnode->fid.vnode) {
@@ -649,10 +673,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
_debug("%pd: file deleted (uq %u -> %u I:%u)",
dentry, fid.unique,
vnode->fid.unique,
- d_inode(dentry)->i_generation);
- spin_lock(&vnode->lock);
+ vnode->vfs_inode.i_generation);
+ write_seqlock(&vnode->cb_lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- spin_unlock(&vnode->lock);
+ write_sequnlock(&vnode->cb_lock);
goto not_found;
}
goto out_valid;
@@ -667,7 +691,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
default:
_debug("failed to iterate dir %pd: %d",
parent, ret);
- goto out_bad;
+ goto out_bad_parent;
}
out_valid:
@@ -683,9 +707,10 @@ not_found:
dentry->d_flags |= DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock);
-out_bad:
+out_bad_parent:
_debug("dropping dentry %pd2", dentry);
dput(parent);
+out_bad:
key_put(key);
_leave(" = 0 [bad]");
@@ -727,20 +752,48 @@ static void afs_d_release(struct dentry *dentry)
}
/*
+ * Create a new inode for create/mkdir/symlink
+ */
+static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
+ struct dentry *new_dentry,
+ struct afs_fid *newfid,
+ struct afs_file_status *newstatus,
+ struct afs_callback *newcb)
+{
+ struct inode *inode;
+
+ if (fc->ac.error < 0)
+ return;
+
+ inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
+ newfid, newstatus, newcb, fc->cbi);
+ if (IS_ERR(inode)) {
+ /* ENOMEM or EINTR at a really inconvenient time - just abandon
+ * the new directory on the server.
+ */
+ fc->ac.error = PTR_ERR(inode);
+ return;
+ }
+
+ d_instantiate(new_dentry, inode);
+ if (d_unhashed(new_dentry))
+ d_rehash(new_dentry);
+}
+
+/*
* create a directory on an AFS filesystem
*/
static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- struct afs_file_status status;
- struct afs_callback cb;
- struct afs_server *server;
- struct afs_vnode *dvnode, *vnode;
- struct afs_fid fid;
- struct inode *inode;
+ struct afs_file_status newstatus;
+ struct afs_fs_cursor fc;
+ struct afs_callback newcb;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_fid newfid;
struct key *key;
int ret;
- dvnode = AFS_FS_I(dir);
+ mode |= S_IFDIR;
_enter("{%x:%u},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@ -751,40 +804,27 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto error;
}
- mode |= S_IFDIR;
- ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
- mode, &fid, &status, &cb, &server);
- if (ret < 0)
- goto mkdir_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ afs_fs_create(&fc, dentry->d_name.name, mode,
+ &newfid, &newstatus, &newcb);
+ }
- inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
- if (IS_ERR(inode)) {
- /* ENOMEM at a really inconvenient time - just abandon the new
- * directory on the server */
- ret = PTR_ERR(inode);
- goto iget_error;
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret < 0)
+ goto error_key;
}
- /* apply the status report we've got for the new vnode */
- vnode = AFS_FS_I(inode);
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
-
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry)) {
- _debug("not hashed");
- d_rehash(dentry);
- }
key_put(key);
_leave(" = 0");
return 0;
-iget_error:
- afs_put_server(server);
-mkdir_error:
+error_key:
key_put(key);
error:
d_drop(dentry);
@@ -793,16 +833,29 @@ error:
}
/*
+ * Remove a subdir from a directory.
+ */
+static void afs_dir_remove_subdir(struct dentry *dentry)
+{
+ if (d_really_is_positive(dentry)) {
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+ clear_nlink(&vnode->vfs_inode);
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+}
+
+/*
* remove a directory from an AFS filesystem
*/
static int afs_rmdir(struct inode *dir, struct dentry *dentry)
{
- struct afs_vnode *dvnode, *vnode;
+ struct afs_fs_cursor fc;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
int ret;
- dvnode = AFS_FS_I(dir);
-
_enter("{%x:%u},{%pd}",
dvnode->fid.vid, dvnode->fid.vnode, dentry);
@@ -812,45 +865,69 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
goto error;
}
- ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true);
- if (ret < 0)
- goto rmdir_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ afs_fs_remove(&fc, dentry->d_name.name, true);
+ }
- if (d_really_is_positive(dentry)) {
- vnode = AFS_FS_I(d_inode(dentry));
- clear_nlink(&vnode->vfs_inode);
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- afs_discard_callback_on_delete(vnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret == 0)
+ afs_dir_remove_subdir(dentry);
}
key_put(key);
- _leave(" = 0");
- return 0;
-
-rmdir_error:
- key_put(key);
error:
- _leave(" = %d", ret);
return ret;
}
/*
- * remove a file from an AFS filesystem
+ * Remove a link to a file or symlink from a directory.
+ *
+ * If the file was not deleted due to excess hard links, the fileserver will
+ * break the callback promise on the file - if it had one - before it returns
+ * to us, and if it was deleted, it won't
+ *
+ * However, if we didn't have a callback promise outstanding, or it was
+ * outstanding on a different server, then it won't break it either...
+ */
+static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
+{
+ int ret = 0;
+
+ if (d_really_is_positive(dentry)) {
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ kdebug("AFS_VNODE_DELETED");
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+ ret = afs_validate(vnode, key);
+ if (ret == -ESTALE)
+ ret = 0;
+ _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+ }
+
+ return ret;
+}
+
+/*
+ * Remove a file or symlink from an AFS filesystem.
*/
static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
- struct afs_vnode *dvnode, *vnode;
+ struct afs_fs_cursor fc;
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct key *key;
int ret;
- dvnode = AFS_FS_I(dir);
-
_enter("{%x:%u},{%pd}",
dvnode->fid.vid, dvnode->fid.vnode, dentry);
- ret = -ENAMETOOLONG;
if (dentry->d_name.len >= AFSNAMEMAX)
- goto error;
+ return -ENAMETOOLONG;
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
@@ -858,44 +935,28 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
goto error;
}
+ /* Try to make sure we have a callback promise on the victim. */
if (d_really_is_positive(dentry)) {
vnode = AFS_FS_I(d_inode(dentry));
-
- /* make sure we have a callback promise on the victim */
ret = afs_validate(vnode, key);
if (ret < 0)
- goto error;
+ goto error_key;
}
- ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false);
- if (ret < 0)
- goto remove_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ afs_fs_remove(&fc, dentry->d_name.name, false);
+ }
- if (d_really_is_positive(dentry)) {
- /* if the file wasn't deleted due to excess hard links, the
- * fileserver will break the callback promise on the file - if
- * it had one - before it returns to us, and if it was deleted,
- * it won't
- *
- * however, if we didn't have a callback promise outstanding,
- * or it was outstanding on a different server, then it won't
- * break it either...
- */
- vnode = AFS_FS_I(d_inode(dentry));
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
- _debug("AFS_VNODE_DELETED");
- if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
- _debug("AFS_VNODE_CB_BROKEN");
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- ret = afs_validate(vnode, key);
- _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret == 0)
+ ret = afs_dir_remove_link(dentry, key);
}
- key_put(key);
- _leave(" = 0");
- return 0;
-
-remove_error:
+error_key:
key_put(key);
error:
_leave(" = %d", ret);
@@ -908,60 +969,50 @@ error:
static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
- struct afs_file_status status;
- struct afs_callback cb;
- struct afs_server *server;
- struct afs_vnode *dvnode, *vnode;
- struct afs_fid fid;
- struct inode *inode;
+ struct afs_fs_cursor fc;
+ struct afs_file_status newstatus;
+ struct afs_callback newcb;
+ struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir);
+ struct afs_fid newfid;
struct key *key;
int ret;
- dvnode = AFS_FS_I(dir);
+ mode |= S_IFREG;
_enter("{%x:%u},{%pd},%ho,",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
+ ret = -ENAMETOOLONG;
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
goto error;
}
- mode |= S_IFREG;
- ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
- mode, &fid, &status, &cb, &server);
- if (ret < 0)
- goto create_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ afs_fs_create(&fc, dentry->d_name.name, mode,
+ &newfid, &newstatus, &newcb);
+ }
- inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
- if (IS_ERR(inode)) {
- /* ENOMEM at a really inconvenient time - just abandon the new
- * directory on the server */
- ret = PTR_ERR(inode);
- goto iget_error;
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret < 0)
+ goto error_key;
}
- /* apply the status report we've got for the new vnode */
- vnode = AFS_FS_I(inode);
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
-
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry)) {
- _debug("not hashed");
- d_rehash(dentry);
- }
key_put(key);
_leave(" = 0");
return 0;
-iget_error:
- afs_put_server(server);
-create_error:
+error_key:
key_put(key);
error:
d_drop(dentry);
@@ -975,6 +1026,7 @@ error:
static int afs_link(struct dentry *from, struct inode *dir,
struct dentry *dentry)
{
+ struct afs_fs_cursor fc;
struct afs_vnode *dvnode, *vnode;
struct key *key;
int ret;
@@ -987,23 +1039,45 @@ static int afs_link(struct dentry *from, struct inode *dir,
dvnode->fid.vid, dvnode->fid.vnode,
dentry);
+ ret = -ENAMETOOLONG;
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
goto error;
}
- ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name);
- if (ret < 0)
- goto link_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
+ afs_end_vnode_operation(&fc);
+ return -ERESTARTSYS;
+ }
+
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_link(&fc, vnode, dentry->d_name.name);
+ }
+
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break_2);
+ ihold(&vnode->vfs_inode);
+ d_instantiate(dentry, &vnode->vfs_inode);
+
+ mutex_unlock(&vnode->io_lock);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret < 0)
+ goto error_key;
+ }
- ihold(&vnode->vfs_inode);
- d_instantiate(dentry, &vnode->vfs_inode);
key_put(key);
_leave(" = 0");
return 0;
-link_error:
+error_key:
key_put(key);
error:
d_drop(dentry);
@@ -1017,20 +1091,21 @@ error:
static int afs_symlink(struct inode *dir, struct dentry *dentry,
const char *content)
{
- struct afs_file_status status;
- struct afs_server *server;
- struct afs_vnode *dvnode, *vnode;
- struct afs_fid fid;
- struct inode *inode;
+ struct afs_fs_cursor fc;
+ struct afs_file_status newstatus;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_fid newfid;
struct key *key;
int ret;
- dvnode = AFS_FS_I(dir);
-
_enter("{%x:%u},{%pd},%s",
dvnode->fid.vid, dvnode->fid.vnode, dentry,
content);
+ ret = -ENAMETOOLONG;
+ if (dentry->d_name.len >= AFSNAMEMAX)
+ goto error;
+
ret = -EINVAL;
if (strlen(content) >= AFSPATHMAX)
goto error;
@@ -1041,39 +1116,27 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
goto error;
}
- ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content,
- &fid, &status, &server);
- if (ret < 0)
- goto create_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
+ afs_fs_symlink(&fc, dentry->d_name.name, content,
+ &newfid, &newstatus);
+ }
- inode = afs_iget(dir->i_sb, key, &fid, &status, NULL);
- if (IS_ERR(inode)) {
- /* ENOMEM at a really inconvenient time - just abandon the new
- * directory on the server */
- ret = PTR_ERR(inode);
- goto iget_error;
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret < 0)
+ goto error_key;
}
- /* apply the status report we've got for the new vnode */
- vnode = AFS_FS_I(inode);
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
-
- d_instantiate(dentry, inode);
- if (d_unhashed(dentry)) {
- _debug("not hashed");
- d_rehash(dentry);
- }
key_put(key);
_leave(" = 0");
return 0;
-iget_error:
- afs_put_server(server);
-create_error:
+error_key:
key_put(key);
error:
d_drop(dentry);
@@ -1088,6 +1151,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
+ struct afs_fs_cursor fc;
struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
struct key *key;
int ret;
@@ -1111,16 +1175,35 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto error;
}
- ret = afs_vnode_rename(orig_dvnode, new_dvnode, key,
- old_dentry->d_name.name,
- new_dentry->d_name.name);
- if (ret < 0)
- goto rename_error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
+ if (orig_dvnode != new_dvnode) {
+ if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
+ afs_end_vnode_operation(&fc);
+ return -ERESTARTSYS;
+ }
+ }
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
+ fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
+ afs_fs_rename(&fc, old_dentry->d_name.name,
+ new_dvnode, new_dentry->d_name.name);
+ }
+
+ afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2);
+ if (orig_dvnode != new_dvnode)
+ mutex_unlock(&new_dvnode->io_lock);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret < 0)
+ goto error_key;
+ }
+
key_put(key);
_leave(" = 0");
return 0;
-rename_error:
+error_key:
key_put(key);
error:
d_drop(new_dentry);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 510cba15fa56..a39192ced99e 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -19,11 +19,11 @@
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
static int afs_readpage(struct file *file, struct page *page);
static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
static int afs_releasepage(struct page *page, gfp_t gfp_flags);
-static int afs_launder_page(struct page *page);
static int afs_readpages(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
@@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.write_iter = afs_file_write,
- .mmap = generic_file_readonly_mmap,
+ .mmap = afs_file_mmap,
.splice_read = generic_file_splice_read,
.fsync = afs_fsync,
.lock = afs_lock,
@@ -62,12 +62,63 @@ const struct address_space_operations afs_fs_aops = {
.writepages = afs_writepages,
};
+static const struct vm_operations_struct afs_vm_ops = {
+ .fault = filemap_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = afs_page_mkwrite,
+};
+
+/*
+ * Discard a pin on a writeback key.
+ */
+void afs_put_wb_key(struct afs_wb_key *wbk)
+{
+ if (refcount_dec_and_test(&wbk->usage)) {
+ key_put(wbk->key);
+ kfree(wbk);
+ }
+}
+
+/*
+ * Cache key for writeback.
+ */
+int afs_cache_wb_key(struct afs_vnode *vnode, struct afs_file *af)
+{
+ struct afs_wb_key *wbk, *p;
+
+ wbk = kzalloc(sizeof(struct afs_wb_key), GFP_KERNEL);
+ if (!wbk)
+ return -ENOMEM;
+ refcount_set(&wbk->usage, 2);
+ wbk->key = af->key;
+
+ spin_lock(&vnode->wb_lock);
+ list_for_each_entry(p, &vnode->wb_keys, vnode_link) {
+ if (p->key == wbk->key)
+ goto found;
+ }
+
+ key_get(wbk->key);
+ list_add_tail(&wbk->vnode_link, &vnode->wb_keys);
+ spin_unlock(&vnode->wb_lock);
+ af->wb = wbk;
+ return 0;
+
+found:
+ refcount_inc(&p->usage);
+ spin_unlock(&vnode->wb_lock);
+ af->wb = p;
+ kfree(wbk);
+ return 0;
+}
+
/*
* open an AFS file or directory and attach a key to it
*/
int afs_open(struct inode *inode, struct file *file)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_file *af;
struct key *key;
int ret;
@@ -75,19 +126,38 @@ int afs_open(struct inode *inode, struct file *file)
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
- _leave(" = %ld [key]", PTR_ERR(key));
- return PTR_ERR(key);
+ ret = PTR_ERR(key);
+ goto error;
}
- ret = afs_validate(vnode, key);
- if (ret < 0) {
- _leave(" = %d [val]", ret);
- return ret;
+ af = kzalloc(sizeof(*af), GFP_KERNEL);
+ if (!af) {
+ ret = -ENOMEM;
+ goto error_key;
}
+ af->key = key;
+
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error_af;
- file->private_data = key;
+ if (file->f_mode & FMODE_WRITE) {
+ ret = afs_cache_wb_key(vnode, af);
+ if (ret < 0)
+ goto error_af;
+ }
+
+ file->private_data = af;
_leave(" = 0");
return 0;
+
+error_af:
+ kfree(af);
+error_key:
+ key_put(key);
+error:
+ _leave(" = %d", ret);
+ return ret;
}
/*
@@ -96,10 +166,16 @@ int afs_open(struct inode *inode, struct file *file)
int afs_release(struct inode *inode, struct file *file)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct afs_file *af = file->private_data;
_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
- key_put(file->private_data);
+ file->private_data = NULL;
+ if (af->wb)
+ afs_put_wb_key(af->wb);
+ key_put(af->key);
+ kfree(af);
+ afs_prune_wb_keys(vnode);
_leave(" = 0");
return 0;
}
@@ -138,6 +214,37 @@ static void afs_file_readpage_read_complete(struct page *page,
#endif
/*
+ * Fetch file data from the volume.
+ */
+int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc)
+{
+ struct afs_fs_cursor fc;
+ int ret;
+
+ _enter("%s{%x:%u.%u},%x,,,",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key));
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_fetch_data(&fc, desc);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* read page from file, directory or symlink, given a key to use
*/
int afs_page_filler(void *data, struct page *page)
@@ -199,8 +306,13 @@ int afs_page_filler(void *data, struct page *page)
/* read the contents of the file from the server into the
* page */
- ret = afs_vnode_fetch_data(vnode, key, req);
+ ret = afs_fetch_data(vnode, key, req);
afs_put_read(req);
+
+ if (ret >= 0 && S_ISDIR(inode->i_mode) &&
+ !afs_dir_check_page(inode, page))
+ ret = -EIO;
+
if (ret < 0) {
if (ret == -ENOENT) {
_debug("got NOENT from server"
@@ -259,12 +371,12 @@ static int afs_readpage(struct file *file, struct page *page)
int ret;
if (file) {
- key = file->private_data;
+ key = afs_file_key(file);
ASSERT(key != NULL);
ret = afs_page_filler(key, page);
} else {
struct inode *inode = page->mapping->host;
- key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
+ key = afs_request_key(AFS_FS_S(inode->i_sb)->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
} else {
@@ -281,7 +393,7 @@ static int afs_readpage(struct file *file, struct page *page)
static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
{
#ifdef CONFIG_AFS_FSCACHE
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
#endif
struct page *page = req->pages[req->index];
@@ -310,7 +422,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
struct afs_read *req;
struct list_head *p;
struct page *first, *page;
- struct key *key = file->private_data;
+ struct key *key = afs_file_key(file);
pgoff_t index;
int ret, n, i;
@@ -369,7 +481,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
return 0;
}
- ret = afs_vnode_fetch_data(vnode, key, req);
+ ret = afs_fetch_data(vnode, key, req);
if (ret < 0)
goto error;
@@ -406,7 +518,7 @@ error:
static int afs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct key *key = file->private_data;
+ struct key *key = afs_file_key(file);
struct afs_vnode *vnode;
int ret = 0;
@@ -464,16 +576,6 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
}
/*
- * write back a dirty page
- */
-static int afs_launder_page(struct page *page)
-{
- _enter("{%lu}", page->index);
-
- return 0;
-}
-
-/*
* invalidate part or all of a page
* - release a page and clean up its private data if offset is 0 (indicating
* the entire page)
@@ -481,7 +583,8 @@ static int afs_launder_page(struct page *page)
static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
- struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
+ struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+ unsigned long priv;
_enter("{%lu},%u,%u", page->index, offset, length);
@@ -498,13 +601,11 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
#endif
if (PagePrivate(page)) {
- if (wb && !PageWriteback(page)) {
- set_page_private(page, 0);
- afs_put_writeback(wb);
- }
-
- if (!page_private(page))
- ClearPagePrivate(page);
+ priv = page_private(page);
+ trace_afs_page_dirty(vnode, tracepoint_string("inval"),
+ page->index, priv);
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
}
}
@@ -517,8 +618,8 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
*/
static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{
- struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+ unsigned long priv;
_enter("{{%x:%u}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
@@ -534,10 +635,10 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
#endif
if (PagePrivate(page)) {
- if (wb) {
- set_page_private(page, 0);
- afs_put_writeback(wb);
- }
+ priv = page_private(page);
+ trace_afs_page_dirty(vnode, tracepoint_string("rel"),
+ page->index, priv);
+ set_page_private(page, 0);
ClearPagePrivate(page);
}
@@ -545,3 +646,16 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
_leave(" = T");
return 1;
}
+
+/*
+ * Handle setting up a memory mapping on an AFS file.
+ */
+static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int ret;
+
+ ret = generic_file_mmap(file, vma);
+ if (ret == 0)
+ vma->vm_ops = &afs_vm_ops;
+ return ret;
+}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3191dff2c156..7571a5dfd5a3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -14,48 +14,17 @@
#define AFS_LOCK_GRANTED 0
#define AFS_LOCK_PENDING 1
+struct workqueue_struct *afs_lock_manager;
+
static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
static void afs_fl_release_private(struct file_lock *fl);
-static struct workqueue_struct *afs_lock_manager;
-static DEFINE_MUTEX(afs_lock_manager_mutex);
-
static const struct file_lock_operations afs_lock_ops = {
.fl_copy_lock = afs_fl_copy_lock,
.fl_release_private = afs_fl_release_private,
};
/*
- * initialise the lock manager thread if it isn't already running
- */
-static int afs_init_lock_manager(void)
-{
- int ret;
-
- ret = 0;
- if (!afs_lock_manager) {
- mutex_lock(&afs_lock_manager_mutex);
- if (!afs_lock_manager) {
- afs_lock_manager = alloc_workqueue("kafs_lockd",
- WQ_MEM_RECLAIM, 0);
- if (!afs_lock_manager)
- ret = -ENOMEM;
- }
- mutex_unlock(&afs_lock_manager_mutex);
- }
- return ret;
-}
-
-/*
- * destroy the lock manager thread if it's running
- */
-void __exit afs_kill_lock_manager(void)
-{
- if (afs_lock_manager)
- destroy_workqueue(afs_lock_manager);
-}
-
-/*
* if the callback is broken on this vnode, then the lock may now be available
*/
void afs_lock_may_be_available(struct afs_vnode *vnode)
@@ -99,6 +68,100 @@ static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
}
/*
+ * Get a lock on a file
+ */
+static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
+ afs_lock_type_t type)
+{
+ struct afs_fs_cursor fc;
+ int ret;
+
+ _enter("%s{%x:%u.%u},%x,%u",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key), type);
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_set_lock(&fc, type);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Extend a lock on a file
+ */
+static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_fs_cursor fc;
+ int ret;
+
+ _enter("%s{%x:%u.%u},%x",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key));
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_current_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_extend_lock(&fc);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Release a lock on a file
+ */
+static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_fs_cursor fc;
+ int ret;
+
+ _enter("%s{%x:%u.%u},%x",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key));
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_current_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_release_lock(&fc);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* do work for a lock, including:
* - probing for a lock we're waiting on but didn't get immediately
* - extending a lock that's close to timing out
@@ -122,7 +185,7 @@ void afs_lock_work(struct work_struct *work)
/* attempt to release the server lock; if it fails, we just
* wait 5 minutes and it'll time out anyway */
- ret = afs_vnode_release_lock(vnode, vnode->unlock_key);
+ ret = afs_release_lock(vnode, vnode->unlock_key);
if (ret < 0)
printk(KERN_WARNING "AFS:"
" Failed to release lock on {%x:%x} error %d\n",
@@ -143,10 +206,10 @@ void afs_lock_work(struct work_struct *work)
BUG();
fl = list_entry(vnode->granted_locks.next,
struct file_lock, fl_u.afs.link);
- key = key_get(fl->fl_file->private_data);
+ key = key_get(afs_file_key(fl->fl_file));
spin_unlock(&vnode->lock);
- ret = afs_vnode_extend_lock(vnode, key);
+ ret = afs_extend_lock(vnode, key);
clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
key_put(key);
switch (ret) {
@@ -177,12 +240,12 @@ void afs_lock_work(struct work_struct *work)
BUG();
fl = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- key = key_get(fl->fl_file->private_data);
+ key = key_get(afs_file_key(fl->fl_file));
type = (fl->fl_type == F_RDLCK) ?
AFS_LOCK_READ : AFS_LOCK_WRITE;
spin_unlock(&vnode->lock);
- ret = afs_vnode_set_lock(vnode, key, type);
+ ret = afs_set_lock(vnode, key, type);
clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
switch (ret) {
case -EWOULDBLOCK:
@@ -213,7 +276,7 @@ void afs_lock_work(struct work_struct *work)
clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
spin_unlock(&vnode->lock);
- afs_vnode_release_lock(vnode, key);
+ afs_release_lock(vnode, key);
if (!list_empty(&vnode->pending_locks))
afs_lock_may_be_available(vnode);
}
@@ -255,7 +318,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_lock_type_t type;
- struct key *key = file->private_data;
+ struct key *key = afs_file_key(file);
int ret;
_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -264,10 +327,6 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
return -EINVAL;
- ret = afs_init_lock_manager();
- if (ret < 0)
- return ret;
-
fl->fl_ops = &afs_lock_ops;
INIT_LIST_HEAD(&fl->fl_u.afs.link);
fl->fl_u.afs.state = AFS_LOCK_PENDING;
@@ -278,7 +337,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
/* make sure we've got a callback on this file and that our view of the
* data version is up to date */
- ret = afs_vnode_fetch_status(vnode, NULL, key);
+ ret = afs_validate(vnode, key);
if (ret < 0)
goto error;
@@ -315,7 +374,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
set_bit(AFS_VNODE_LOCKING, &vnode->flags);
spin_unlock(&vnode->lock);
- ret = afs_vnode_set_lock(vnode, key, type);
+ ret = afs_set_lock(vnode, key, type);
clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
switch (ret) {
case 0:
@@ -418,7 +477,7 @@ given_lock:
/* again, make sure we've got a callback on this file and, again, make
* sure that our view of the data version is up to date (we ignore
* errors incurred here and deal with the consequences elsewhere) */
- afs_vnode_fetch_status(vnode, NULL, key);
+ afs_validate(vnode, key);
error:
spin_unlock(&inode->i_lock);
@@ -441,7 +500,7 @@ vfs_rejected_lock:
static int afs_do_unlk(struct file *file, struct file_lock *fl)
{
struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
- struct key *key = file->private_data;
+ struct key *key = afs_file_key(file);
int ret;
_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
@@ -476,7 +535,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
static int afs_do_getlk(struct file *file, struct file_lock *fl)
{
struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
- struct key *key = file->private_data;
+ struct key *key = afs_file_key(file);
int ret, lock_count;
_enter("");
@@ -490,7 +549,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
posix_test_lock(file, fl);
if (fl->fl_type == F_UNLCK) {
/* no local locks; consult the server */
- ret = afs_vnode_fetch_status(vnode, NULL, key);
+ ret = afs_fetch_status(vnode, key);
if (ret < 0)
goto error;
lock_count = vnode->status.lock_count;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 19f76ae36982..b90ef39ae914 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,12 +16,19 @@
#include "internal.h"
#include "afs_fs.h"
+static const struct afs_fid afs_zero_fid;
+
/*
* We need somewhere to discard into in case the server helpfully returns more
* than we asked for in FS.FetchData{,64}.
*/
static u8 afs_discard_buffer[64];
+static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
+{
+ call->cbi = afs_get_cb_interest(cbi);
+}
+
/*
* decode an AFSFid block
*/
@@ -47,14 +54,18 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
const __be32 *bp = *_bp;
umode_t mode;
u64 data_version, size;
- u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
+ bool changed = false;
kuid_t owner;
kgid_t group;
+ if (vnode)
+ write_seqlock(&vnode->cb_lock);
+
#define EXTRACT(DST) \
do { \
u32 x = ntohl(*bp++); \
- changed |= DST - x; \
+ if (DST != x) \
+ changed |= true; \
DST = x; \
} while (0)
@@ -70,8 +81,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
EXTRACT(status->caller_access); /* call ticket dependent */
EXTRACT(status->anon_access);
EXTRACT(status->mode);
- EXTRACT(status->parent.vnode);
- EXTRACT(status->parent.unique);
+ bp++; /* parent.vnode */
+ bp++; /* parent.unique */
bp++; /* seg size */
status->mtime_client = ntohl(*bp++);
status->mtime_server = ntohl(*bp++);
@@ -95,7 +106,6 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
status->mtime_client, status->mtime_server);
if (vnode) {
- status->parent.vid = vnode->fid.vid;
if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
_debug("vnode changed");
i_size_write(&vnode->vfs_inode, size);
@@ -127,25 +137,47 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
_debug("vnode modified %llx on {%x:%u}",
(unsigned long long) data_version,
vnode->fid.vid, vnode->fid.vnode);
- set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+ set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
}
} else if (store_version) {
status->data_version = data_version;
}
+
+ if (vnode)
+ write_sequnlock(&vnode->cb_lock);
}
/*
* decode an AFSCallBack block
*/
-static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
+static void xdr_decode_AFSCallBack(struct afs_call *call,
+ struct afs_vnode *vnode,
+ const __be32 **_bp)
{
+ struct afs_cb_interest *old, *cbi = call->cbi;
const __be32 *bp = *_bp;
+ u32 cb_expiry;
+
+ write_seqlock(&vnode->cb_lock);
+
+ if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) {
+ vnode->cb_version = ntohl(*bp++);
+ cb_expiry = ntohl(*bp++);
+ vnode->cb_type = ntohl(*bp++);
+ vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
+ old = vnode->cb_interest;
+ if (old != call->cbi) {
+ vnode->cb_interest = cbi;
+ cbi = old;
+ }
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ } else {
+ bp += 3;
+ }
- vnode->cb_version = ntohl(*bp++);
- vnode->cb_expiry = ntohl(*bp++);
- vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds();
+ write_sequnlock(&vnode->cb_lock);
+ call->cbi = cbi;
*_bp = bp;
}
@@ -243,22 +275,22 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
*/
static int afs_deliver_fs_fetch_status(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
- _enter("");
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
+ _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
- xdr_decode_AFSCallBack(&bp, vnode);
- if (call->reply2)
- xdr_decode_AFSVolSync(&bp, call->reply2);
+ xdr_decode_AFSCallBack(call, vnode, &bp);
+ if (call->reply[1])
+ xdr_decode_AFSVolSync(&bp, call->reply[1]);
_leave(" = 0 [done]");
return 0;
@@ -269,35 +301,33 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSFetchStatus = {
.name = "FS.FetchStatus",
+ .op = afs_FS_FetchStatus,
.deliver = afs_deliver_fs_fetch_status,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* fetch the status information for a file
*/
-int afs_fs_fetch_file_status(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- struct afs_volsync *volsync,
- bool async)
+int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter(",%x,{%x:%u},,",
- key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
- call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
- if (!call)
+ call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
+ if (!call) {
+ fc->ac.error = -ENOMEM;
return -ENOMEM;
+ }
- call->key = key;
- call->reply = vnode;
- call->reply2 = volsync;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = volsync;
/* marshall the parameters */
bp = call->request;
@@ -306,7 +336,10 @@ int afs_fs_fetch_file_status(struct afs_server *server,
bp[2] = htonl(vnode->fid.vnode);
bp[3] = htonl(vnode->fid.unique);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -314,8 +347,8 @@ int afs_fs_fetch_file_status(struct afs_server *server,
*/
static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply;
- struct afs_read *req = call->reply3;
+ struct afs_vnode *vnode = call->reply[0];
+ struct afs_read *req = call->reply[2];
const __be32 *bp;
unsigned int size;
void *buffer;
@@ -431,9 +464,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
- xdr_decode_AFSCallBack(&bp, vnode);
- if (call->reply2)
- xdr_decode_AFSVolSync(&bp, call->reply2);
+ xdr_decode_AFSCallBack(call, vnode, &bp);
+ if (call->reply[1])
+ xdr_decode_AFSVolSync(&bp, call->reply[1]);
call->offset = 0;
call->unmarshall++;
@@ -457,7 +490,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
static void afs_fetch_data_destructor(struct afs_call *call)
{
- struct afs_read *req = call->reply3;
+ struct afs_read *req = call->reply[2];
afs_put_read(req);
afs_flat_call_destructor(call);
@@ -468,43 +501,38 @@ static void afs_fetch_data_destructor(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSFetchData = {
.name = "FS.FetchData",
+ .op = afs_FS_FetchData,
.deliver = afs_deliver_fs_fetch_data,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_fetch_data_destructor,
};
static const struct afs_call_type afs_RXFSFetchData64 = {
.name = "FS.FetchData64",
+ .op = afs_FS_FetchData64,
.deliver = afs_deliver_fs_fetch_data,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_fetch_data_destructor,
};
/*
* fetch data from a very large file
*/
-static int afs_fs_fetch_data64(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- struct afs_read *req,
- bool async)
+static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter("");
- call = afs_alloc_flat_call(&afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->reply2 = NULL; /* volsync */
- call->reply3 = req;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
- call->operation_ID = FSFETCHDATA64;
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = NULL; /* volsync */
+ call->reply[2] = req;
/* marshall the parameters */
bp = call->request;
@@ -518,39 +546,37 @@ static int afs_fs_fetch_data64(struct afs_server *server,
bp[7] = htonl(lower_32_bits(req->len));
atomic_inc(&req->usage);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
* fetch data from a file
*/
-int afs_fs_fetch_data(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- struct afs_read *req,
- bool async)
+int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
if (upper_32_bits(req->pos) ||
upper_32_bits(req->len) ||
upper_32_bits(req->pos + req->len))
- return afs_fs_fetch_data64(server, key, vnode, req, async);
+ return afs_fs_fetch_data64(fc, req);
_enter("");
- call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->reply2 = NULL; /* volsync */
- call->reply3 = req;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
- call->operation_ID = FSFETCHDATA;
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = NULL; /* volsync */
+ call->reply[2] = req;
/* marshall the parameters */
bp = call->request;
@@ -562,90 +588,10 @@ int afs_fs_fetch_data(struct afs_server *server,
bp[5] = htonl(lower_32_bits(req->len));
atomic_inc(&req->usage);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
-}
-
-/*
- * deliver reply data to an FS.GiveUpCallBacks
- */
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
-{
- _enter("");
-
- /* shouldn't be any reply data */
- return afs_extract_data(call, NULL, 0, false);
-}
-
-/*
- * FS.GiveUpCallBacks operation type
- */
-static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
- .name = "FS.GiveUpCallBacks",
- .deliver = afs_deliver_fs_give_up_callbacks,
- .abort_to_error = afs_abort_to_error,
- .destructor = afs_flat_call_destructor,
-};
-
-/*
- * give up a set of callbacks
- * - the callbacks are held in the server->cb_break ring
- */
-int afs_fs_give_up_callbacks(struct afs_server *server,
- bool async)
-{
- struct afs_call *call;
- size_t ncallbacks;
- __be32 *bp, *tp;
- int loop;
-
- ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
- ARRAY_SIZE(server->cb_break));
-
- _enter("{%zu},", ncallbacks);
-
- if (ncallbacks == 0)
- return 0;
- if (ncallbacks > AFSCBMAX)
- ncallbacks = AFSCBMAX;
-
- _debug("break %zu callbacks", ncallbacks);
-
- call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
- 12 + ncallbacks * 6 * 4, 0);
- if (!call)
- return -ENOMEM;
-
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
-
- /* marshall the parameters */
- bp = call->request;
- tp = bp + 2 + ncallbacks * 3;
- *bp++ = htonl(FSGIVEUPCALLBACKS);
- *bp++ = htonl(ncallbacks);
- *tp++ = htonl(ncallbacks);
-
- atomic_sub(ncallbacks, &server->cb_break_n);
- for (loop = ncallbacks; loop > 0; loop--) {
- struct afs_callback *cb =
- &server->cb_break[server->cb_break_tail];
-
- *bp++ = htonl(cb->fid.vid);
- *bp++ = htonl(cb->fid.vnode);
- *bp++ = htonl(cb->fid.unique);
- *tp++ = htonl(cb->version);
- *tp++ = htonl(cb->expiry);
- *tp++ = htonl(cb->type);
- smp_mb();
- server->cb_break_tail =
- (server->cb_break_tail + 1) &
- (ARRAY_SIZE(server->cb_break) - 1);
- }
-
- ASSERT(ncallbacks > 0);
- wake_up_nr(&server->cb_break_waitq, ncallbacks);
-
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -653,7 +599,7 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
*/
static int afs_deliver_fs_create_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -665,11 +611,11 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFid(&bp, call->reply2);
- xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
+ xdr_decode_AFSFid(&bp, call->reply[1]);
+ xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
- xdr_decode_AFSCallBack_raw(&bp, call->reply4);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -678,27 +624,33 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
/*
* FS.CreateFile and FS.MakeDir operation type
*/
-static const struct afs_call_type afs_RXFSCreateXXXX = {
- .name = "FS.CreateXXXX",
+static const struct afs_call_type afs_RXFSCreateFile = {
+ .name = "FS.CreateFile",
+ .op = afs_FS_CreateFile,
+ .deliver = afs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSMakeDir = {
+ .name = "FS.MakeDir",
+ .op = afs_FS_MakeDir,
.deliver = afs_deliver_fs_create_vnode,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* create a file or make a directory
*/
-int afs_fs_create(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
+int afs_fs_create(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
struct afs_fid *newfid,
struct afs_file_status *newstatus,
- struct afs_callback *newcb,
- bool async)
+ struct afs_callback *newcb)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
size_t namesz, reqsz, padsz;
__be32 *bp;
@@ -708,18 +660,17 @@ int afs_fs_create(struct afs_server *server,
padsz = (4 - (namesz & 3)) & 3;
reqsz = (5 * 4) + namesz + padsz + (6 * 4);
- call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz,
- (3 + 21 + 21 + 3 + 6) * 4);
+ call = afs_alloc_flat_call(
+ net, S_ISDIR(mode) ? &afs_RXFSMakeDir : &afs_RXFSCreateFile,
+ reqsz, (3 + 21 + 21 + 3 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->reply2 = newfid;
- call->reply3 = newstatus;
- call->reply4 = newcb;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = newfid;
+ call->reply[2] = newstatus;
+ call->reply[3] = newcb;
/* marshall the parameters */
bp = call->request;
@@ -741,7 +692,9 @@ int afs_fs_create(struct afs_server *server,
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -749,7 +702,7 @@ int afs_fs_create(struct afs_server *server,
*/
static int afs_deliver_fs_remove(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -762,7 +715,7 @@ static int afs_deliver_fs_remove(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -771,24 +724,28 @@ static int afs_deliver_fs_remove(struct afs_call *call)
/*
* FS.RemoveDir/FS.RemoveFile operation type
*/
-static const struct afs_call_type afs_RXFSRemoveXXXX = {
- .name = "FS.RemoveXXXX",
+static const struct afs_call_type afs_RXFSRemoveFile = {
+ .name = "FS.RemoveFile",
+ .op = afs_FS_RemoveFile,
+ .deliver = afs_deliver_fs_remove,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSRemoveDir = {
+ .name = "FS.RemoveDir",
+ .op = afs_FS_RemoveDir,
.deliver = afs_deliver_fs_remove,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* remove a file or directory
*/
-int afs_fs_remove(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- const char *name,
- bool isdir,
- bool async)
+int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
size_t namesz, reqsz, padsz;
__be32 *bp;
@@ -798,14 +755,14 @@ int afs_fs_remove(struct afs_server *server,
padsz = (4 - (namesz & 3)) & 3;
reqsz = (5 * 4) + namesz + padsz;
- call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
+ call = afs_alloc_flat_call(
+ net, isdir ? &afs_RXFSRemoveDir : &afs_RXFSRemoveFile,
+ reqsz, (21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
/* marshall the parameters */
bp = call->request;
@@ -821,7 +778,9 @@ int afs_fs_remove(struct afs_server *server,
bp = (void *) bp + padsz;
}
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -829,7 +788,7 @@ int afs_fs_remove(struct afs_server *server,
*/
static int afs_deliver_fs_link(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
+ struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
const __be32 *bp;
int ret;
@@ -843,7 +802,7 @@ static int afs_deliver_fs_link(struct afs_call *call)
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -854,22 +813,20 @@ static int afs_deliver_fs_link(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSLink = {
.name = "FS.Link",
+ .op = afs_FS_Link,
.deliver = afs_deliver_fs_link,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* make a hard link
*/
-int afs_fs_link(struct afs_server *server,
- struct key *key,
- struct afs_vnode *dvnode,
- struct afs_vnode *vnode,
- const char *name,
- bool async)
+int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+ const char *name)
{
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
size_t namesz, reqsz, padsz;
__be32 *bp;
@@ -879,15 +836,13 @@ int afs_fs_link(struct afs_server *server,
padsz = (4 - (namesz & 3)) & 3;
reqsz = (5 * 4) + namesz + padsz + (3 * 4);
- call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = dvnode;
- call->reply2 = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = dvnode;
+ call->reply[1] = vnode;
/* marshall the parameters */
bp = call->request;
@@ -906,7 +861,9 @@ int afs_fs_link(struct afs_server *server,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -914,7 +871,7 @@ int afs_fs_link(struct afs_server *server,
*/
static int afs_deliver_fs_symlink(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -926,10 +883,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFid(&bp, call->reply2);
- xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
+ xdr_decode_AFSFid(&bp, call->reply[1]);
+ xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -940,24 +897,23 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSSymlink = {
.name = "FS.Symlink",
+ .op = afs_FS_Symlink,
.deliver = afs_deliver_fs_symlink,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* create a symbolic link
*/
-int afs_fs_symlink(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
+int afs_fs_symlink(struct afs_fs_cursor *fc,
const char *name,
const char *contents,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- bool async)
+ struct afs_file_status *newstatus)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
size_t namesz, reqsz, padsz, c_namesz, c_padsz;
__be32 *bp;
@@ -971,17 +927,15 @@ int afs_fs_symlink(struct afs_server *server,
reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
- call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz,
+ call = afs_alloc_flat_call(net, &afs_RXFSSymlink, reqsz,
(3 + 21 + 21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->reply2 = newfid;
- call->reply3 = newstatus;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = newfid;
+ call->reply[2] = newstatus;
/* marshall the parameters */
bp = call->request;
@@ -1010,7 +964,9 @@ int afs_fs_symlink(struct afs_server *server,
*bp++ = htonl(S_IRWXUGO); /* unix mode */
*bp++ = 0; /* segment size */
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -1018,7 +974,7 @@ int afs_fs_symlink(struct afs_server *server,
*/
static int afs_deliver_fs_rename(struct afs_call *call)
{
- struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
+ struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1];
const __be32 *bp;
int ret;
@@ -1034,7 +990,7 @@ static int afs_deliver_fs_rename(struct afs_call *call)
if (new_dvnode != orig_dvnode)
xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
NULL);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -1045,23 +1001,22 @@ static int afs_deliver_fs_rename(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSRename = {
.name = "FS.Rename",
+ .op = afs_FS_Rename,
.deliver = afs_deliver_fs_rename,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* create a symbolic link
*/
-int afs_fs_rename(struct afs_server *server,
- struct key *key,
- struct afs_vnode *orig_dvnode,
+int afs_fs_rename(struct afs_fs_cursor *fc,
const char *orig_name,
struct afs_vnode *new_dvnode,
- const char *new_name,
- bool async)
+ const char *new_name)
{
+ struct afs_vnode *orig_dvnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(orig_dvnode);
size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
__be32 *bp;
@@ -1078,15 +1033,13 @@ int afs_fs_rename(struct afs_server *server,
(3 * 4) +
4 + n_namesz + n_padsz;
- call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = orig_dvnode;
- call->reply2 = new_dvnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = orig_dvnode;
+ call->reply[1] = new_dvnode;
/* marshall the parameters */
bp = call->request;
@@ -1113,7 +1066,9 @@ int afs_fs_rename(struct afs_server *server,
bp = (void *) bp + n_padsz;
}
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &orig_dvnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -1121,7 +1076,7 @@ int afs_fs_rename(struct afs_server *server,
*/
static int afs_deliver_fs_store_data(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -1135,7 +1090,7 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
&call->store_version);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
afs_pages_written_back(vnode, call);
@@ -1148,47 +1103,44 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSStoreData = {
.name = "FS.StoreData",
+ .op = afs_FS_StoreData,
.deliver = afs_deliver_fs_store_data,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSStoreData64 = {
.name = "FS.StoreData64",
+ .op = afs_FS_StoreData64,
.deliver = afs_deliver_fs_store_data,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
* store a set of pages to a very large file
*/
-static int afs_fs_store_data64(struct afs_server *server,
- struct afs_writeback *wb,
+static int afs_fs_store_data64(struct afs_fs_cursor *fc,
+ struct address_space *mapping,
pgoff_t first, pgoff_t last,
unsigned offset, unsigned to,
- loff_t size, loff_t pos, loff_t i_size,
- bool async)
+ loff_t size, loff_t pos, loff_t i_size)
{
- struct afs_vnode *vnode = wb->vnode;
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter(",%x,{%x:%u},,",
- key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
- call = afs_alloc_flat_call(&afs_RXFSStoreData64,
+ call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
(4 + 6 + 3 * 2) * 4,
(21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->wb = wb;
- call->key = wb->key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
- call->mapping = vnode->vfs_inode.i_mapping;
+ call->key = fc->key;
+ call->mapping = mapping;
+ call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
@@ -1217,24 +1169,25 @@ static int afs_fs_store_data64(struct afs_server *server,
*bp++ = htonl(i_size >> 32);
*bp++ = htonl((u32) i_size);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
* store a set of pages
*/
-int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
+int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to,
- bool async)
+ unsigned offset, unsigned to)
{
- struct afs_vnode *vnode = wb->vnode;
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
loff_t size, pos, i_size;
__be32 *bp;
_enter(",%x,{%x:%u},,",
- key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
size = (loff_t)to - (loff_t)offset;
if (first != last)
@@ -1251,21 +1204,18 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
(unsigned long long) i_size);
if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
- return afs_fs_store_data64(server, wb, first, last, offset, to,
- size, pos, i_size, async);
+ return afs_fs_store_data64(fc, mapping, first, last, offset, to,
+ size, pos, i_size);
- call = afs_alloc_flat_call(&afs_RXFSStoreData,
+ call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
(4 + 6 + 3) * 4,
(21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->wb = wb;
- call->key = wb->key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
- call->mapping = vnode->vfs_inode.i_mapping;
+ call->key = fc->key;
+ call->mapping = mapping;
+ call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
@@ -1291,7 +1241,9 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
*bp++ = htonl(size);
*bp++ = htonl(i_size);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -1300,7 +1252,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
static int afs_deliver_fs_store_status(struct afs_call *call)
{
afs_dataversion_t *store_version;
- struct afs_vnode *vnode = call->reply;
+ struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -1317,7 +1269,7 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -1328,22 +1280,22 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSStoreStatus = {
.name = "FS.StoreStatus",
+ .op = afs_FS_StoreStatus,
.deliver = afs_deliver_fs_store_status,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSStoreData_as_Status = {
.name = "FS.StoreData",
+ .op = afs_FS_StoreData,
.deliver = afs_deliver_fs_store_status,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
.name = "FS.StoreData64",
+ .op = afs_FS_StoreData64,
.deliver = afs_deliver_fs_store_status,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
@@ -1351,30 +1303,27 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
* set the attributes on a very large file, using FS.StoreData rather than
* FS.StoreStatus so as to alter the file size also
*/
-static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
- struct afs_vnode *vnode, struct iattr *attr,
- bool async)
+static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter(",%x,{%x:%u},,",
- key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
ASSERT(attr->ia_valid & ATTR_SIZE);
- call = afs_alloc_flat_call(&afs_RXFSStoreData64_as_Status,
+ call = afs_alloc_flat_call(net, &afs_RXFSStoreData64_as_Status,
(4 + 6 + 3 * 2) * 4,
(21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
call->store_version = vnode->status.data_version + 1;
- call->operation_ID = FSSTOREDATA;
/* marshall the parameters */
bp = call->request;
@@ -1392,40 +1341,38 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key,
*bp++ = htonl(attr->ia_size >> 32); /* new file length */
*bp++ = htonl((u32) attr->ia_size);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
* set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
* so as to alter the file size also
*/
-static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
- struct afs_vnode *vnode, struct iattr *attr,
- bool async)
+static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter(",%x,{%x:%u},,",
- key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
ASSERT(attr->ia_valid & ATTR_SIZE);
if (attr->ia_size >> 32)
- return afs_fs_setattr_size64(server, key, vnode, attr,
- async);
+ return afs_fs_setattr_size64(fc, attr);
- call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status,
+ call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
(4 + 6 + 3) * 4,
(21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
call->store_version = vnode->status.data_version + 1;
- call->operation_ID = FSSTOREDATA;
/* marshall the parameters */
bp = call->request;
@@ -1440,38 +1387,36 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
*bp++ = 0; /* size of write */
*bp++ = htonl(attr->ia_size); /* new file length */
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
* set the attributes on a file, using FS.StoreData if there's a change in file
* size, and FS.StoreStatus otherwise
*/
-int afs_fs_setattr(struct afs_server *server, struct key *key,
- struct afs_vnode *vnode, struct iattr *attr,
- bool async)
+int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
if (attr->ia_valid & ATTR_SIZE)
- return afs_fs_setattr_size(server, key, vnode, attr,
- async);
+ return afs_fs_setattr_size(fc, attr);
_enter(",%x,{%x:%u},,",
- key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
- call = afs_alloc_flat_call(&afs_RXFSStoreStatus,
+ call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
(4 + 6) * 4,
(21 + 6) * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
- call->operation_ID = FSSTORESTATUS;
+ call->key = fc->key;
+ call->reply[0] = vnode;
/* marshall the parameters */
bp = call->request;
@@ -1482,7 +1427,9 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
xdr_encode_AFS_StoreStatus(&bp, attr);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -1510,7 +1457,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
+ xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
call->offset = 0;
call->unmarshall++;
@@ -1531,13 +1478,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
case 3:
_debug("extract volname");
if (call->count > 0) {
- ret = afs_extract_data(call, call->reply3,
+ ret = afs_extract_data(call, call->reply[2],
call->count, true);
if (ret < 0)
return ret;
}
- p = call->reply3;
+ p = call->reply[2];
p[call->count] = 0;
_debug("volname '%s'", p);
@@ -1578,13 +1525,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
case 6:
_debug("extract offline");
if (call->count > 0) {
- ret = afs_extract_data(call, call->reply3,
+ ret = afs_extract_data(call, call->reply[2],
call->count, true);
if (ret < 0)
return ret;
}
- p = call->reply3;
+ p = call->reply[2];
p[call->count] = 0;
_debug("offline '%s'", p);
@@ -1625,13 +1572,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
case 9:
_debug("extract motd");
if (call->count > 0) {
- ret = afs_extract_data(call, call->reply3,
+ ret = afs_extract_data(call, call->reply[2],
call->count, true);
if (ret < 0)
return ret;
}
- p = call->reply3;
+ p = call->reply[2];
p[call->count] = 0;
_debug("motd '%s'", p);
@@ -1662,8 +1609,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
*/
static void afs_get_volume_status_call_destructor(struct afs_call *call)
{
- kfree(call->reply3);
- call->reply3 = NULL;
+ kfree(call->reply[2]);
+ call->reply[2] = NULL;
afs_flat_call_destructor(call);
}
@@ -1672,21 +1619,20 @@ static void afs_get_volume_status_call_destructor(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSGetVolumeStatus = {
.name = "FS.GetVolumeStatus",
+ .op = afs_FS_GetVolumeStatus,
.deliver = afs_deliver_fs_get_volume_status,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_get_volume_status_call_destructor,
};
/*
* fetch the status of a volume
*/
-int afs_fs_get_volume_status(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- struct afs_volume_status *vs,
- bool async)
+int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
+ struct afs_volume_status *vs)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
void *tmpbuf;
@@ -1696,25 +1642,25 @@ int afs_fs_get_volume_status(struct afs_server *server,
if (!tmpbuf)
return -ENOMEM;
- call = afs_alloc_flat_call(&afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
if (!call) {
kfree(tmpbuf);
return -ENOMEM;
}
- call->key = key;
- call->reply = vnode;
- call->reply2 = vs;
- call->reply3 = tmpbuf;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = vs;
+ call->reply[2] = tmpbuf;
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSGETVOLUMESTATUS);
bp[1] = htonl(vnode->fid.vid);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
@@ -1733,7 +1679,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+ /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
return 0;
@@ -1744,8 +1690,8 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
*/
static const struct afs_call_type afs_RXFSSetLock = {
.name = "FS.SetLock",
+ .op = afs_FS_SetLock,
.deliver = afs_deliver_fs_xxxx_lock,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
@@ -1754,8 +1700,8 @@ static const struct afs_call_type afs_RXFSSetLock = {
*/
static const struct afs_call_type afs_RXFSExtendLock = {
.name = "FS.ExtendLock",
+ .op = afs_FS_ExtendLock,
.deliver = afs_deliver_fs_xxxx_lock,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
@@ -1764,33 +1710,29 @@ static const struct afs_call_type afs_RXFSExtendLock = {
*/
static const struct afs_call_type afs_RXFSReleaseLock = {
.name = "FS.ReleaseLock",
+ .op = afs_FS_ReleaseLock,
.deliver = afs_deliver_fs_xxxx_lock,
- .abort_to_error = afs_abort_to_error,
.destructor = afs_flat_call_destructor,
};
/*
- * get a lock on a file
+ * Set a lock on a file
*/
-int afs_fs_set_lock(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- afs_lock_type_t type,
- bool async)
+int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter("");
- call = afs_alloc_flat_call(&afs_RXFSSetLock, 5 * 4, 6 * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
/* marshall the parameters */
bp = call->request;
@@ -1800,30 +1742,29 @@ int afs_fs_set_lock(struct afs_server *server,
*bp++ = htonl(vnode->fid.unique);
*bp++ = htonl(type);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
* extend a lock on a file
*/
-int afs_fs_extend_lock(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- bool async)
+int afs_fs_extend_lock(struct afs_fs_cursor *fc)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter("");
- call = afs_alloc_flat_call(&afs_RXFSExtendLock, 4 * 4, 6 * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
/* marshall the parameters */
bp = call->request;
@@ -1832,30 +1773,29 @@ int afs_fs_extend_lock(struct afs_server *server,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
/*
* release a lock on a file
*/
-int afs_fs_release_lock(struct afs_server *server,
- struct key *key,
- struct afs_vnode *vnode,
- bool async)
+int afs_fs_release_lock(struct afs_fs_cursor *fc)
{
+ struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
_enter("");
- call = afs_alloc_flat_call(&afs_RXFSReleaseLock, 4 * 4, 6 * 4);
+ call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
if (!call)
return -ENOMEM;
- call->key = key;
- call->reply = vnode;
- call->service_id = FS_SERVICE;
- call->port = htons(AFS_FS_PORT);
+ call->key = fc->key;
+ call->reply[0] = vnode;
/* marshall the parameters */
bp = call->request;
@@ -1864,5 +1804,145 @@ int afs_fs_release_lock(struct afs_server *server,
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- return afs_make_call(&server->addr, call, GFP_NOFS, async);
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.GiveUpAllCallBacks operation.
+ */
+static int afs_deliver_fs_give_up_all_callbacks(struct afs_call *call)
+{
+ return afs_transfer_reply(call);
+}
+
+/*
+ * FS.GiveUpAllCallBacks operation type
+ */
+static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = {
+ .name = "FS.GiveUpAllCallBacks",
+ .op = afs_FS_GiveUpAllCallBacks,
+ .deliver = afs_deliver_fs_give_up_all_callbacks,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Flush all the callbacks we have on a server.
+ */
+int afs_fs_give_up_all_callbacks(struct afs_net *net,
+ struct afs_server *server,
+ struct afs_addr_cursor *ac,
+ struct key *key)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = key;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSGIVEUPALLCALLBACKS);
+
+ /* Can't take a ref on server */
+ return afs_make_call(ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.GetCapabilities operation.
+ */
+static int afs_deliver_fs_get_capabilities(struct afs_call *call)
+{
+ u32 count;
+ int ret;
+
+ _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract the capabilities word count */
+ case 1:
+ ret = afs_extract_data(call, &call->tmp,
+ 1 * sizeof(__be32),
+ true);
+ if (ret < 0)
+ return ret;
+
+ count = ntohl(call->tmp);
+
+ call->count = count;
+ call->count2 = count;
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract capabilities words */
+ case 2:
+ count = min(call->count, 16U);
+ ret = afs_extract_data(call, call->buffer,
+ count * sizeof(__be32),
+ call->count > 16);
+ if (ret < 0)
+ return ret;
+
+ /* TODO: Examine capabilities */
+
+ call->count -= count;
+ if (call->count > 0)
+ goto again;
+ call->offset = 0;
+ call->unmarshall++;
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXFSGetCapabilities = {
+ .name = "FS.GetCapabilities",
+ .op = afs_FS_GetCapabilities,
+ .deliver = afs_deliver_fs_get_capabilities,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports. This can
+ * return up to 196 words.
+ */
+int afs_fs_get_capabilities(struct afs_net *net,
+ struct afs_server *server,
+ struct afs_addr_cursor *ac,
+ struct key *key)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = key;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSGETCAPABILITIES);
+
+ /* Can't take a ref on server */
+ trace_afs_make_fs_call(call, NULL);
+ return afs_make_call(ac, call, GFP_NOFS, false);
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 342316a9e3e0..3415eb7484f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -23,11 +23,6 @@
#include <linux/namei.h>
#include "internal.h"
-struct afs_iget_data {
- struct afs_fid fid;
- struct afs_volume *volume; /* volume on which resides */
-};
-
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = page_get_link,
.listxattr = afs_listxattr,
@@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = {
static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
{
struct inode *inode = AFS_VNODE_TO_I(vnode);
+ bool changed;
_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
vnode->status.type,
@@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
vnode->status.data_version,
vnode->status.mode);
+ read_seqlock_excl(&vnode->cb_lock);
+
switch (vnode->status.type) {
case AFS_FTYPE_FILE:
inode->i_mode = S_IFREG | vnode->status.mode;
@@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
if ((vnode->status.mode & 0777) == 0644) {
inode->i_flags |= S_AUTOMOUNT;
- spin_lock(&vnode->lock);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
- spin_unlock(&vnode->lock);
inode->i_mode = S_IFDIR | 0555;
inode->i_op = &afs_mntpt_inode_operations;
@@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
break;
default:
printk("kAFS: AFS vnode with undefined type\n");
+ read_sequnlock_excl(&vnode->cb_lock);
return -EBADMSG;
}
-#ifdef CONFIG_AFS_FSCACHE
- if (vnode->status.size != inode->i_size)
- fscache_attr_changed(vnode->cache);
-#endif
+ changed = (vnode->status.size != inode->i_size);
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
@@ -97,13 +91,49 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_generation = vnode->fid.unique;
inode->i_version = vnode->status.data_version;
inode->i_mapping->a_ops = &afs_fs_aops;
+
+ read_sequnlock_excl(&vnode->cb_lock);
+
+#ifdef CONFIG_AFS_FSCACHE
+ if (changed)
+ fscache_attr_changed(vnode->cache);
+#endif
return 0;
}
/*
+ * Fetch file status from the volume.
+ */
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_fs_cursor fc;
+ int ret;
+
+ _enter("%s,{%x:%u.%u,S=%lx}",
+ vnode->volume->name,
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ vnode->flags);
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_fetch_file_status(&fc, NULL);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* iget5() comparator
*/
-static int afs_iget5_test(struct inode *inode, void *opaque)
+int afs_iget5_test(struct inode *inode, void *opaque)
{
struct afs_iget_data *data = opaque;
@@ -204,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
*/
struct inode *afs_iget(struct super_block *sb, struct key *key,
struct afs_fid *fid, struct afs_file_status *status,
- struct afs_callback *cb)
+ struct afs_callback *cb, struct afs_cb_interest *cbi)
{
struct afs_iget_data data = { .fid = *fid };
struct afs_super_info *as;
@@ -237,8 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
if (!status) {
/* it's a remotely extant inode */
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- ret = afs_vnode_fetch_status(vnode, NULL, key);
+ ret = afs_fetch_status(vnode, key);
if (ret < 0)
goto bad_inode;
} else {
@@ -249,16 +278,17 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
vnode->cb_version = 0;
- vnode->cb_expiry = 0;
vnode->cb_type = 0;
- vnode->cb_expires = ktime_get_real_seconds();
+ vnode->cb_expires_at = 0;
} else {
vnode->cb_version = cb->version;
- vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
- vnode->cb_expires = vnode->cb_expiry +
- ktime_get_real_seconds();
+ vnode->cb_expires_at = cb->expiry;
+ vnode->cb_interest = afs_get_cb_interest(cbi);
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
+
+ vnode->cb_expires_at += ktime_get_real_seconds();
}
/* set up caching before mapping the status, as map-status reads the
@@ -320,25 +350,34 @@ void afs_zap_data(struct afs_vnode *vnode)
*/
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
+ time64_t now = ktime_get_real_seconds();
+ bool valid = false;
int ret;
_enter("{v={%x:%u} fl=%lx},%x",
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
- if (vnode->cb_promised &&
- !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
- !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
- !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
- _debug("callback expired");
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- } else {
- goto valid;
+ /* Quickly check the callback state. Ideally, we'd use read_seqbegin
+ * here, but we have no way to pass the net namespace to the RCU
+ * cleanup for the server record.
+ */
+ read_seqlock_excl(&vnode->cb_lock);
+
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+ vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+ } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) &&
+ !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+ vnode->cb_expires_at - 10 > now) {
+ valid = true;
}
+ } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ valid = true;
}
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ read_sequnlock_excl(&vnode->cb_lock);
+ if (valid)
goto valid;
mutex_lock(&vnode->validate_lock);
@@ -347,12 +386,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* a new promise - note that if the (parent) directory's metadata was
* changed then the security may be different and we may no longer have
* access */
- if (!vnode->cb_promised ||
- test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_debug("not promised");
- ret = afs_vnode_fetch_status(vnode, NULL, key);
- if (ret < 0)
+ ret = afs_fetch_status(vnode, key);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ ret = -ESTALE;
+ }
goto error_unlock;
+ }
_debug("new promise [fl=%lx]", vnode->flags);
}
@@ -367,7 +410,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
- clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+ clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
mutex_unlock(&vnode->validate_lock);
valid:
_leave(" = 0");
@@ -386,10 +429,17 @@ int afs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ int seq = 0;
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
- generic_fillattr(inode, stat);
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ generic_fillattr(inode, stat);
+ } while (need_seqretry(&vnode->cb_lock, seq));
+
+ done_seqretry(&vnode->cb_lock, seq);
return 0;
}
@@ -411,18 +461,14 @@ int afs_drop_inode(struct inode *inode)
*/
void afs_evict_inode(struct inode *inode)
{
- struct afs_permits *permits;
struct afs_vnode *vnode;
vnode = AFS_FS_I(inode);
- _enter("{%x:%u.%d} v=%u x=%u t=%u }",
+ _enter("{%x:%u.%d}",
vnode->fid.vid,
vnode->fid.vnode,
- vnode->fid.unique,
- vnode->cb_version,
- vnode->cb_expiry,
- vnode->cb_type);
+ vnode->fid.unique);
_debug("CLEAR INODE %p", inode);
@@ -431,31 +477,24 @@ void afs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- afs_give_up_callback(vnode);
-
- if (vnode->server) {
- spin_lock(&vnode->server->fs_lock);
- rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
- spin_unlock(&vnode->server->fs_lock);
- afs_put_server(vnode->server);
- vnode->server = NULL;
+ if (vnode->cb_interest) {
+ afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
+ vnode->cb_interest = NULL;
}
- ASSERT(list_empty(&vnode->writebacks));
- ASSERT(!vnode->cb_promised);
+ while (!list_empty(&vnode->wb_keys)) {
+ struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
+ struct afs_wb_key, vnode_link);
+ list_del(&wbk->vnode_link);
+ afs_put_wb_key(wbk);
+ }
#ifdef CONFIG_AFS_FSCACHE
fscache_relinquish_cookie(vnode->cache, 0);
vnode->cache = NULL;
#endif
- mutex_lock(&vnode->permits_lock);
- permits = vnode->permits;
- RCU_INIT_POINTER(vnode->permits, NULL);
- mutex_unlock(&vnode->permits_lock);
- if (permits)
- call_rcu(&permits->rcu, afs_zap_permits);
-
+ afs_put_permits(vnode->permit_cache);
_leave("");
}
@@ -464,6 +503,7 @@ void afs_evict_inode(struct inode *inode)
*/
int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
+ struct afs_fs_cursor fc;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
int ret;
@@ -479,13 +519,11 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
}
/* flush any dirty data outstanding on a regular file */
- if (S_ISREG(vnode->vfs_inode.i_mode)) {
+ if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
- afs_writeback_all(vnode);
- }
if (attr->ia_valid & ATTR_FILE) {
- key = attr->ia_file->private_data;
+ key = afs_file_key(attr->ia_file);
} else {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
@@ -494,7 +532,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- ret = afs_vnode_setattr(vnode, key, attr);
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_setattr(&fc, attr);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
if (!(attr->ia_valid & ATTR_FILE))
key_put(key);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3f03f7888302..bd8dcee7e066 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
#include <linux/fscache.h>
#include <linux/backing-dev.h>
#include <linux/uuid.h>
+#include <net/net_namespace.h>
#include <net/af_rxrpc.h>
#include "afs.h"
@@ -31,16 +32,6 @@
struct pagevec;
struct afs_call;
-typedef enum {
- AFS_VL_NEW, /* new, uninitialised record */
- AFS_VL_CREATING, /* creating record */
- AFS_VL_VALID, /* record is pending */
- AFS_VL_NO_VOLUME, /* no such volume available */
- AFS_VL_UPDATING, /* update in progress */
- AFS_VL_VOLUME_DELETED, /* volume was deleted */
- AFS_VL_UNCERTAIN, /* uncertain state (update failed) */
-} __attribute__((packed)) afs_vlocation_state_t;
-
struct afs_mount_params {
bool rwpath; /* T if the parent should be considered R/W */
bool force; /* T to force cell type */
@@ -48,20 +39,43 @@ struct afs_mount_params {
afs_voltype_t type; /* type of volume requested */
int volnamesz; /* size of volume name */
const char *volname; /* name of volume to mount */
+ struct afs_net *net; /* Network namespace in effect */
struct afs_cell *cell; /* cell in which to find volume */
struct afs_volume *volume; /* volume record */
struct key *key; /* key to use for secure mounting */
};
+struct afs_iget_data {
+ struct afs_fid fid;
+ struct afs_volume *volume; /* volume on which resides */
+};
+
enum afs_call_state {
- AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
- AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
- AFS_CALL_AWAIT_OP_ID, /* awaiting op ID on incoming call */
- AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
- AFS_CALL_REPLYING, /* replying to incoming call */
- AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */
- AFS_CALL_COMPLETE, /* Completed or failed */
+ AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */
+ AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */
+ AFS_CALL_CL_PROC_REPLY, /* Client: rxrpc call complete; processing reply */
+ AFS_CALL_SV_AWAIT_OP_ID, /* Server: Awaiting op ID */
+ AFS_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */
+ AFS_CALL_SV_REPLYING, /* Server: Replying */
+ AFS_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */
+ AFS_CALL_COMPLETE, /* Completed or failed */
};
+
+/*
+ * List of server addresses.
+ */
+struct afs_addr_list {
+ struct rcu_head rcu; /* Must be first */
+ refcount_t usage;
+ u32 version; /* Version */
+ unsigned short nr_addrs;
+ unsigned short index; /* Address currently in use */
+ unsigned short nr_ipv4; /* Number of IPv4 addresses */
+ unsigned long probed; /* Mask of servers that have been probed */
+ unsigned long yfs; /* Mask of servers that are YFS */
+ struct sockaddr_rxrpc addrs[];
+};
+
/*
* a record of an in-progress RxRPC call
*/
@@ -72,25 +86,25 @@ struct afs_call {
struct work_struct work; /* actual work processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
- struct afs_server *server; /* server affected by incoming CM call */
+ struct afs_net *net; /* The network namespace */
+ struct afs_server *cm_server; /* Server affected by incoming CM call */
+ struct afs_cb_interest *cbi; /* Callback interest for server used */
void *request; /* request data (first part) */
- struct address_space *mapping; /* page set */
- struct afs_writeback *wb; /* writeback being performed */
+ struct address_space *mapping; /* Pages being written from */
void *buffer; /* reply receive buffer */
- void *reply; /* reply buffer (first part) */
- void *reply2; /* reply buffer (second part) */
- void *reply3; /* reply buffer (third part) */
- void *reply4; /* reply buffer (fourth part) */
+ void *reply[4]; /* Where to put the reply */
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
size_t offset; /* offset into received data store */
atomic_t usage;
enum afs_call_state state;
+ spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
+ unsigned int cb_break; /* cb_break + cb_s_break before the call */
union {
unsigned last_to; /* amount of mapping[last] */
unsigned count2; /* count used in unmarshalling */
@@ -100,9 +114,9 @@ struct afs_call {
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
+ bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
- u16 service_id; /* RxRPC service ID to call */
- __be16 port; /* target UDP port */
+ u16 service_id; /* Actual service ID (after upgrade) */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
@@ -111,15 +125,13 @@ struct afs_call {
struct afs_call_type {
const char *name;
+ unsigned int op; /* Really enum afs_fs_operation */
/* deliver request or reply data to an call
* - returning an error will cause the call to be aborted
*/
int (*deliver)(struct afs_call *call);
- /* map an abort code to an error number */
- int (*abort_to_error)(u32 abort_code);
-
/* clean up a call */
void (*destructor)(struct afs_call *call);
@@ -128,6 +140,30 @@ struct afs_call_type {
};
/*
+ * Key available for writeback on a file.
+ */
+struct afs_wb_key {
+ refcount_t usage;
+ struct key *key;
+ struct list_head vnode_link; /* Link in vnode->wb_keys */
+};
+
+/*
+ * AFS open file information record. Pointed to by file->private_data.
+ */
+struct afs_file {
+ struct key *key; /* The key this file was opened with */
+ struct afs_wb_key *wb; /* Writeback key record for this file */
+};
+
+static inline struct key *afs_file_key(struct file *file)
+{
+ struct afs_file *af = file->private_data;
+
+ return af->key;
+}
+
+/*
* Record of an outstanding read operation on a vnode.
*/
struct afs_read {
@@ -143,38 +179,13 @@ struct afs_read {
};
/*
- * record of an outstanding writeback on a vnode
- */
-struct afs_writeback {
- struct list_head link; /* link in vnode->writebacks */
- struct work_struct writer; /* work item to perform the writeback */
- struct afs_vnode *vnode; /* vnode to which this write applies */
- struct key *key; /* owner of this write */
- wait_queue_head_t waitq; /* completion and ready wait queue */
- pgoff_t first; /* first page in batch */
- pgoff_t point; /* last page in current store op */
- pgoff_t last; /* last page in batch (inclusive) */
- unsigned offset_first; /* offset into first page of start of write */
- unsigned to_last; /* offset into last page of end of write */
- int num_conflicts; /* count of conflicting writes in list */
- int usage;
- bool conflicts; /* T if has dependent conflicts */
- enum {
- AFS_WBACK_SYNCING, /* synchronisation being performed */
- AFS_WBACK_PENDING, /* write pending */
- AFS_WBACK_CONFLICTING, /* conflicting writes posted */
- AFS_WBACK_WRITING, /* writing back */
- AFS_WBACK_COMPLETE /* the writeback record has been unlinked */
- } state __attribute__((packed));
-};
-
-/*
* AFS superblock private data
* - there's one superblock per volume
*/
struct afs_super_info {
+ struct afs_net *net; /* Network namespace */
+ struct afs_cell *cell; /* The cell in which the volume resides */
struct afs_volume *volume; /* volume record */
- char rwparent; /* T if parent is R/W AFS volume */
};
static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
@@ -185,149 +196,238 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
extern struct file_system_type afs_fs_type;
/*
- * entry in the cached cell catalogue
+ * AFS network namespace record.
*/
-struct afs_cache_cell {
- char name[AFS_MAXCELLNAME]; /* cell name (padded with NULs) */
- struct in_addr vl_servers[15]; /* cached cell VL servers */
+struct afs_net {
+ struct afs_uuid uuid;
+ bool live; /* F if this namespace is being removed */
+
+ /* AF_RXRPC I/O stuff */
+ struct socket *socket;
+ struct afs_call *spare_incoming_call;
+ struct work_struct charge_preallocation_work;
+ struct mutex socket_mutex;
+ atomic_t nr_outstanding_calls;
+ atomic_t nr_superblocks;
+
+ /* Cell database */
+ struct rb_root cells;
+ struct afs_cell *ws_cell;
+ struct work_struct cells_manager;
+ struct timer_list cells_timer;
+ atomic_t cells_outstanding;
+ seqlock_t cells_lock;
+
+ spinlock_t proc_cells_lock;
+ struct list_head proc_cells;
+
+ /* Known servers. Theoretically each fileserver can only be in one
+ * cell, but in practice, people create aliases and subsets and there's
+ * no easy way to distinguish them.
+ */
+ seqlock_t fs_lock; /* For fs_servers */
+ struct rb_root fs_servers; /* afs_server (by server UUID or address) */
+ struct list_head fs_updates; /* afs_server (by update_at) */
+ struct hlist_head fs_proc; /* procfs servers list */
+
+ struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */
+ struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */
+ seqlock_t fs_addr_lock; /* For fs_addresses[46] */
+
+ struct work_struct fs_manager;
+ struct timer_list fs_timer;
+ atomic_t servers_outstanding;
+
+ /* File locking renewal management */
+ struct mutex lock_manager_mutex;
+
+ /* Misc */
+ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */
+};
+
+extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns
+
+enum afs_cell_state {
+ AFS_CELL_UNSET,
+ AFS_CELL_ACTIVATING,
+ AFS_CELL_ACTIVE,
+ AFS_CELL_DEACTIVATING,
+ AFS_CELL_INACTIVE,
+ AFS_CELL_FAILED,
};
/*
- * AFS cell record
+ * AFS cell record.
+ *
+ * This is a tricky concept to get right as it is possible to create aliases
+ * simply by pointing AFSDB/SRV records for two names at the same set of VL
+ * servers; it is also possible to do things like setting up two sets of VL
+ * servers, one of which provides a superset of the volumes provided by the
+ * other (for internal/external division, for example).
+ *
+ * Cells only exist in the sense that (a) a cell's name maps to a set of VL
+ * servers and (b) a cell's name is used by the client to select the key to use
+ * for authentication and encryption. The cell name is not typically used in
+ * the protocol.
+ *
+ * There is no easy way to determine if two cells are aliases or one is a
+ * subset of another.
*/
struct afs_cell {
- atomic_t usage;
- struct list_head link; /* main cell list link */
+ union {
+ struct rcu_head rcu;
+ struct rb_node net_node; /* Node in net->cells */
+ };
+ struct afs_net *net;
struct key *anonymous_key; /* anonymous user key for this cell */
+ struct work_struct manager; /* Manager for init/deinit/dns */
struct list_head proc_link; /* /proc cell list link */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
-
- /* server record management */
- rwlock_t servers_lock; /* active server list lock */
- struct list_head servers; /* active server list */
-
- /* volume location record management */
- struct rw_semaphore vl_sem; /* volume management serialisation semaphore */
- struct list_head vl_list; /* cell's active VL record list */
- spinlock_t vl_lock; /* vl_list lock */
- unsigned short vl_naddrs; /* number of VL servers in addr list */
- unsigned short vl_curr_svix; /* current server index */
- struct in_addr vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */
-
- char name[0]; /* cell name - must go last */
+ time64_t dns_expiry; /* Time AFSDB/SRV record expires */
+ time64_t last_inactive; /* Time of last drop of usage count */
+ atomic_t usage;
+ unsigned long flags;
+#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */
+#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */
+#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */
+#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */
+#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */
+ enum afs_cell_state state;
+ short error;
+
+ /* Active fileserver interaction state. */
+ struct list_head proc_volumes; /* procfs volume list */
+ rwlock_t proc_lock;
+
+ /* VL server list. */
+ rwlock_t vl_addrs_lock; /* Lock on vl_addrs */
+ struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */
+ u8 name_len; /* Length of name */
+ char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
};
/*
- * entry in the cached volume location catalogue
+ * Cached VLDB entry.
+ *
+ * This is pointed to by cell->vldb_entries, indexed by name.
*/
-struct afs_cache_vlocation {
- /* volume name (lowercase, padded with NULs) */
- uint8_t name[AFS_MAXVOLNAME + 1];
+struct afs_vldb_entry {
+ afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */
- uint8_t nservers; /* number of entries used in servers[] */
- uint8_t vidmask; /* voltype mask for vid[] */
- uint8_t srvtmask[8]; /* voltype masks for servers[] */
+ unsigned long flags;
+#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */
+#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */
+#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */
+#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */
+#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */
+
+ uuid_t fs_server[AFS_NMAXNSERVERS];
+ u8 fs_mask[AFS_NMAXNSERVERS];
#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
-
- afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */
- struct in_addr servers[8]; /* fileserver addresses */
- time_t rtime; /* last retrieval time */
+ short error;
+ u8 nr_servers; /* Number of server records */
+ u8 name_len;
+ u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
};
/*
- * volume -> vnode hash table entry
+ * Record of fileserver with which we're actively communicating.
*/
-struct afs_cache_vhash {
- afs_voltype_t vtype; /* which volume variation */
- uint8_t hash_bucket; /* which hash bucket this represents */
-} __attribute__((packed));
+struct afs_server {
+ struct rcu_head rcu;
+ union {
+ uuid_t uuid; /* Server ID */
+ struct afs_uuid _uuid;
+ };
-/*
- * AFS volume location record
- */
-struct afs_vlocation {
+ struct afs_addr_list __rcu *addresses;
+ struct rb_node uuid_rb; /* Link in net->servers */
+ struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
+ struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
+ struct hlist_node proc_link; /* Link in net->fs_proc */
+ struct afs_server *gc_next; /* Next server in manager's list */
+ time64_t put_time; /* Time at which last put */
+ time64_t update_at; /* Time at which to next update the record */
+ unsigned long flags;
+#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */
+#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */
+#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */
+#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */
+#define AFS_SERVER_FL_UPDATING 4
+#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */
+#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */
atomic_t usage;
- time64_t time_of_death; /* time at which put reduced usage to 0 */
- struct list_head link; /* link in cell volume location list */
- struct list_head grave; /* link in master graveyard list */
- struct list_head update; /* link in master update list */
- struct afs_cell *cell; /* cell to which volume belongs */
-#ifdef CONFIG_AFS_FSCACHE
- struct fscache_cookie *cache; /* caching cookie */
-#endif
- struct afs_cache_vlocation vldb; /* volume information DB record */
- struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
- wait_queue_head_t waitq; /* status change waitqueue */
- time64_t update_at; /* time at which record should be updated */
- spinlock_t lock; /* access lock */
- afs_vlocation_state_t state; /* volume location state */
- unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
- unsigned short upd_busy_cnt; /* EBUSY count during update */
- bool valid; /* T if valid */
+ u32 addr_version; /* Address list version */
+
+ /* file service access */
+ rwlock_t fs_lock; /* access lock */
+
+ /* callback promise management */
+ struct list_head cb_interests; /* List of superblocks using this server */
+ unsigned cb_s_break; /* Break-everything counter. */
+ rwlock_t cb_break_lock; /* Volume finding lock */
};
/*
- * AFS fileserver record
+ * Interest by a superblock on a server.
*/
-struct afs_server {
- atomic_t usage;
- time64_t time_of_death; /* time at which put reduced usage to 0 */
- struct in_addr addr; /* server address */
- struct afs_cell *cell; /* cell in which server resides */
- struct list_head link; /* link in cell's server list */
- struct list_head grave; /* link in master graveyard list */
- struct rb_node master_rb; /* link in master by-addr tree */
- struct rw_semaphore sem; /* access lock */
+struct afs_cb_interest {
+ struct list_head cb_link; /* Link in server->cb_interests */
+ struct afs_server *server; /* Server on which this interest resides */
+ struct super_block *sb; /* Superblock on which inodes reside */
+ afs_volid_t vid; /* Volume ID to match */
+ refcount_t usage;
+};
- /* file service access */
- struct rb_root fs_vnodes; /* vnodes backed by this server (ordered by FID) */
- unsigned long fs_act_jif; /* time at which last activity occurred */
- unsigned long fs_dead_jif; /* time at which no longer to be considered dead */
- spinlock_t fs_lock; /* access lock */
- int fs_state; /* 0 or reason FS currently marked dead (-errno) */
+/*
+ * Replaceable server list.
+ */
+struct afs_server_entry {
+ struct afs_server *server;
+ struct afs_cb_interest *cb_interest;
+};
- /* callback promise management */
- struct rb_root cb_promises; /* vnode expiration list (ordered earliest first) */
- struct delayed_work cb_updater; /* callback updater */
- struct delayed_work cb_break_work; /* collected break dispatcher */
- wait_queue_head_t cb_break_waitq; /* space available in cb_break waitqueue */
- spinlock_t cb_lock; /* access lock */
- struct afs_callback cb_break[64]; /* ring of callbacks awaiting breaking */
- atomic_t cb_break_n; /* number of pending breaks */
- u8 cb_break_head; /* head of callback breaking ring */
- u8 cb_break_tail; /* tail of callback breaking ring */
+struct afs_server_list {
+ refcount_t usage;
+ unsigned short nr_servers;
+ unsigned short index; /* Server currently in use */
+ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
+ unsigned int seq; /* Set to ->servers_seq when installed */
+ struct afs_server_entry servers[];
};
/*
- * AFS volume access record
+ * Live AFS volume management.
*/
struct afs_volume {
+ afs_volid_t vid; /* volume ID */
atomic_t usage;
- struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */
- struct afs_vlocation *vlocation; /* volume location */
+ time64_t update_at; /* Time at which to next update */
+ struct afs_cell *cell; /* Cell to which belongs (pins ref) */
+ struct list_head proc_link; /* Link in cell->vl_proc */
+ unsigned long flags;
+#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */
+#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */
+#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */
+#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */
+#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
+#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
- afs_volid_t vid; /* volume ID */
+ struct afs_server_list *servers; /* List of servers on which volume resides */
+ rwlock_t servers_lock; /* Lock for ->servers */
+ unsigned int servers_seq; /* Incremented each time ->servers changes */
+
afs_voltype_t type; /* type of volume */
+ short error;
char type_force; /* force volume type (suppress R/O -> R/W) */
- unsigned short nservers; /* number of server slots filled */
- unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
- struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
- struct rw_semaphore server_sem; /* lock for accessing current server */
-};
-
-/*
- * vnode catalogue entry
- */
-struct afs_cache_vnode {
- afs_vnodeid_t vnode_id; /* vnode ID */
- unsigned vnode_unique; /* vnode ID uniquifier */
- afs_dataversion_t data_version; /* data version */
+ u8 name_len;
+ u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
};
/*
@@ -337,24 +437,20 @@ struct afs_vnode {
struct inode vfs_inode; /* the VFS's inode record */
struct afs_volume *volume; /* volume on which vnode resides */
- struct afs_server *server; /* server currently supplying this file */
struct afs_fid fid; /* the file identifier for this inode */
struct afs_file_status status; /* AFS status info for this file */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
- struct afs_permits *permits; /* cache of permits so far obtained */
- struct mutex permits_lock; /* lock for altering permits list */
+ struct afs_permits *permit_cache; /* cache of permits so far obtained */
+ struct mutex io_lock; /* Lock for serialising I/O on this mutex */
struct mutex validate_lock; /* lock for validating this vnode */
- wait_queue_head_t update_waitq; /* status fetch waitqueue */
- int update_cnt; /* number of outstanding ops that will update the
- * status */
- spinlock_t writeback_lock; /* lock for writebacks */
+ spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
-#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */
+#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
-#define AFS_VNODE_MODIFIED 2 /* set if vnode's data modified */
+#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
@@ -365,24 +461,21 @@ struct afs_vnode {
#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
- long acl_order; /* ACL check count (callback break count) */
-
- struct list_head writebacks; /* alterations in pagecache that need writing */
+ struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
struct delayed_work lock_work; /* work to be done in locking */
struct key *unlock_key; /* key to be used in unlocking */
/* outstanding callback notification on this file */
- struct rb_node server_rb; /* link in server->fs_vnodes */
- struct rb_node cb_promise; /* link in server->cb_promises */
- struct work_struct cb_broken_work; /* work to be done on callback break */
- time64_t cb_expires; /* time at which callback expires */
- time64_t cb_expires_at; /* time used to order cb_promise */
+ struct afs_cb_interest *cb_interest; /* Server on which this resides */
+ unsigned int cb_s_break; /* Mass break counter on ->server */
+ unsigned int cb_break; /* Break counter on vnode */
+ seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
+
+ time64_t cb_expires_at; /* time at which callback expires */
unsigned cb_version; /* callback version */
- unsigned cb_expiry; /* callback expiry time */
afs_callback_type_t cb_type; /* type of callback */
- bool cb_promised; /* true if promise still holds */
};
/*
@@ -390,16 +483,21 @@ struct afs_vnode {
*/
struct afs_permit {
struct key *key; /* RxRPC ticket holding a security context */
- afs_access_t access_mask; /* access mask for this key */
+ afs_access_t access; /* CallerAccess value for this key */
};
/*
- * cache of security records from attempts to access a vnode
+ * Immutable cache of CallerAccess records from attempts to access vnodes.
+ * These may be shared between multiple vnodes.
*/
struct afs_permits {
- struct rcu_head rcu; /* disposal procedure */
- int count; /* number of records */
- struct afs_permit permits[0]; /* the permits so far examined */
+ struct rcu_head rcu;
+ struct hlist_node hash_node; /* Link in hash */
+ unsigned long h; /* Hash value for this permit list */
+ refcount_t usage;
+ unsigned short nr_permits; /* Number of records */
+ bool invalidated; /* Invalidated due to key change */
+ struct afs_permit permits[]; /* List of permits sorted by key pointer */
};
/*
@@ -411,28 +509,78 @@ struct afs_interface {
unsigned mtu; /* MTU of interface */
};
-struct afs_uuid {
- __be32 time_low; /* low part of timestamp */
- __be16 time_mid; /* mid part of timestamp */
- __be16 time_hi_and_version; /* high part of timestamp and version */
- __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
- __u8 clock_seq_low; /* clock seq low */
- __u8 node[6]; /* spatially unique node ID (MAC addr) */
+/*
+ * Cursor for iterating over a server's address list.
+ */
+struct afs_addr_cursor {
+ struct afs_addr_list *alist; /* Current address list (pins ref) */
+ struct sockaddr_rxrpc *addr;
+ u32 abort_code;
+ unsigned short start; /* Starting point in alist->addrs[] */
+ unsigned short index; /* Wrapping offset from start to current addr */
+ short error;
+ bool begun; /* T if we've begun iteration */
+ bool responded; /* T if the current address responded */
+};
+
+/*
+ * Cursor for iterating over a set of fileservers.
+ */
+struct afs_fs_cursor {
+ struct afs_addr_cursor ac;
+ struct afs_vnode *vnode;
+ struct afs_server_list *server_list; /* Current server list (pins ref) */
+ struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */
+ struct key *key; /* Key for the server */
+ unsigned int cb_break; /* cb_break + cb_s_break before the call */
+ unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
+ unsigned char start; /* Initial index in server list */
+ unsigned char index; /* Number of servers tried beyond start */
+ unsigned short flags;
+#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
+#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */
+#define AFS_FS_CURSOR_VMOVED 0x0004 /* Set if seen VMOVED */
+#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */
+#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
+#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
};
+#include <trace/events/afs.h>
+
/*****************************************************************************/
/*
+ * addr_list.c
+ */
+static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
+{
+ if (alist)
+ refcount_inc(&alist->usage);
+ return alist;
+}
+extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
+ unsigned short,
+ unsigned short);
+extern void afs_put_addrlist(struct afs_addr_list *);
+extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
+ unsigned short, unsigned short);
+extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern bool afs_iterate_addresses(struct afs_addr_cursor *);
+extern int afs_end_cursor(struct afs_addr_cursor *);
+extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
+
+extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
+extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
+
+/*
* cache.c
*/
#ifdef CONFIG_AFS_FSCACHE
extern struct fscache_netfs afs_cache_netfs;
extern struct fscache_cookie_def afs_cell_cache_index_def;
-extern struct fscache_cookie_def afs_vlocation_cache_index_def;
extern struct fscache_cookie_def afs_volume_cache_index_def;
extern struct fscache_cookie_def afs_vnode_cache_index_def;
#else
#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL)
-#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL)
#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL)
#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL)
#endif
@@ -441,29 +589,31 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
* callback.c
*/
extern void afs_init_callback_state(struct afs_server *);
-extern void afs_broken_callback_work(struct work_struct *);
-extern void afs_break_callbacks(struct afs_server *, size_t,
- struct afs_callback[]);
-extern void afs_discard_callback_on_delete(struct afs_vnode *);
-extern void afs_give_up_callback(struct afs_vnode *);
-extern void afs_dispatch_give_up_callbacks(struct work_struct *);
-extern void afs_flush_callback_breaks(struct afs_server *);
-extern int __init afs_callback_update_init(void);
-extern void afs_callback_update_kill(void);
+extern void afs_break_callback(struct afs_vnode *);
+extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
+
+extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
+extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
+extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *);
+
+static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
+{
+ refcount_inc(&cbi->usage);
+ return cbi;
+}
/*
* cell.c
*/
-extern struct rw_semaphore afs_proc_cells_sem;
-extern struct list_head afs_proc_cells;
-
-#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
-extern int afs_cell_init(char *);
-extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool);
-extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool);
-extern struct afs_cell *afs_grab_cell(struct afs_cell *);
-extern void afs_put_cell(struct afs_cell *);
-extern void afs_cell_purge(void);
+extern int afs_cell_init(struct afs_net *, const char *);
+extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
+extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
+ const char *, bool);
+extern struct afs_cell *afs_get_cell(struct afs_cell *);
+extern void afs_put_cell(struct afs_net *, struct afs_cell *);
+extern void afs_manage_cells(struct work_struct *);
+extern void afs_cells_timer(struct timer_list *);
+extern void __net_exit afs_cell_purge(struct afs_net *);
/*
* cmservice.c
@@ -473,6 +623,7 @@ extern bool afs_cm_incoming_call(struct afs_call *);
/*
* dir.c
*/
+extern bool afs_dir_check_page(struct inode *, struct page *);
extern const struct inode_operations afs_dir_inode_operations;
extern const struct dentry_operations afs_fs_dentry_operations;
extern const struct file_operations afs_dir_file_operations;
@@ -484,15 +635,19 @@ extern const struct address_space_operations afs_fs_aops;
extern const struct inode_operations afs_file_inode_operations;
extern const struct file_operations afs_file_operations;
+extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
+extern void afs_put_wb_key(struct afs_wb_key *);
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
+extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *);
extern int afs_page_filler(void *, struct page *);
extern void afs_put_read(struct afs_read *);
/*
* flock.c
*/
-extern void __exit afs_kill_lock_manager(void);
+extern struct workqueue_struct *afs_lock_manager;
+
extern void afs_lock_work(struct work_struct *);
extern void afs_lock_may_be_available(struct afs_vnode *);
extern int afs_lock(struct file *, int, struct file_lock *);
@@ -501,48 +656,40 @@ extern int afs_flock(struct file *, int, struct file_lock *);
/*
* fsclient.c
*/
-extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
- struct afs_vnode *, struct afs_volsync *,
- bool);
-extern int afs_fs_give_up_callbacks(struct afs_server *, bool);
-extern int afs_fs_fetch_data(struct afs_server *, struct key *,
- struct afs_vnode *, struct afs_read *, bool);
-extern int afs_fs_create(struct afs_server *, struct key *,
- struct afs_vnode *, const char *, umode_t,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, bool);
-extern int afs_fs_remove(struct afs_server *, struct key *,
- struct afs_vnode *, const char *, bool, bool);
-extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
- struct afs_vnode *, const char *, bool);
-extern int afs_fs_symlink(struct afs_server *, struct key *,
- struct afs_vnode *, const char *, const char *,
- struct afs_fid *, struct afs_file_status *, bool);
-extern int afs_fs_rename(struct afs_server *, struct key *,
- struct afs_vnode *, const char *,
- struct afs_vnode *, const char *, bool);
-extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
- pgoff_t, pgoff_t, unsigned, unsigned, bool);
-extern int afs_fs_setattr(struct afs_server *, struct key *,
- struct afs_vnode *, struct iattr *, bool);
-extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
- struct afs_vnode *,
- struct afs_volume_status *, bool);
-extern int afs_fs_set_lock(struct afs_server *, struct key *,
- struct afs_vnode *, afs_lock_type_t, bool);
-extern int afs_fs_extend_lock(struct afs_server *, struct key *,
- struct afs_vnode *, bool);
-extern int afs_fs_release_lock(struct afs_server *, struct key *,
- struct afs_vnode *, bool);
+extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *);
+extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
+extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
+extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t,
+ struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool);
+extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *);
+extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+ struct afs_fid *, struct afs_file_status *);
+extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
+ struct afs_vnode *, const char *);
+extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
+ pgoff_t, pgoff_t, unsigned, unsigned);
+extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
+extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
+extern int afs_fs_extend_lock(struct afs_fs_cursor *);
+extern int afs_fs_release_lock(struct afs_fs_cursor *);
+extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
+ struct afs_addr_cursor *, struct key *);
+extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
+ struct afs_addr_cursor *, struct key *);
/*
* inode.c
*/
+extern int afs_fetch_status(struct afs_vnode *, struct key *);
+extern int afs_iget5_test(struct inode *, void *);
extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
struct key *);
extern struct inode *afs_iget(struct super_block *, struct key *,
struct afs_fid *, struct afs_file_status *,
- struct afs_callback *);
+ struct afs_callback *,
+ struct afs_cb_interest *);
extern void afs_zap_data(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -554,7 +701,35 @@ extern int afs_drop_inode(struct inode *);
* main.c
*/
extern struct workqueue_struct *afs_wq;
-extern struct afs_uuid afs_uuid;
+
+static inline struct afs_net *afs_d2net(struct dentry *dentry)
+{
+ return &__afs_net;
+}
+
+static inline struct afs_net *afs_i2net(struct inode *inode)
+{
+ return &__afs_net;
+}
+
+static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
+{
+ return &__afs_net;
+}
+
+static inline struct afs_net *afs_sock2net(struct sock *sk)
+{
+ return &__afs_net;
+}
+
+static inline struct afs_net *afs_get_net(struct afs_net *net)
+{
+ return net;
+}
+
+static inline void afs_put_net(struct afs_net *net)
+{
+}
/*
* misc.c
@@ -579,23 +754,33 @@ extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
/*
* proc.c
*/
-extern int afs_proc_init(void);
-extern void afs_proc_cleanup(void);
-extern int afs_proc_cell_setup(struct afs_cell *);
-extern void afs_proc_cell_remove(struct afs_cell *);
+extern int __net_init afs_proc_init(struct afs_net *);
+extern void __net_exit afs_proc_cleanup(struct afs_net *);
+extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *);
+extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *);
+
+/*
+ * rotate.c
+ */
+extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *,
+ struct key *);
+extern bool afs_select_fileserver(struct afs_fs_cursor *);
+extern bool afs_select_current_fileserver(struct afs_fs_cursor *);
+extern int afs_end_vnode_operation(struct afs_fs_cursor *);
/*
* rxrpc.c
*/
-extern struct socket *afs_socket;
-extern atomic_t afs_outstanding_calls;
+extern struct workqueue_struct *afs_async_calls;
-extern int afs_open_socket(void);
-extern void afs_close_socket(void);
+extern int __net_init afs_open_socket(struct afs_net *);
+extern void __net_exit afs_close_socket(struct afs_net *);
+extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
extern int afs_queue_call_work(struct afs_call *);
-extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, bool);
-extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
+extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
+extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
+ const struct afs_call_type *,
size_t, size_t);
extern void afs_flat_call_destructor(struct afs_call *);
extern void afs_send_empty_reply(struct afs_call *);
@@ -607,117 +792,135 @@ static inline int afs_transfer_reply(struct afs_call *call)
return afs_extract_data(call, call->buffer, call->reply_max, false);
}
+static inline bool afs_check_call_state(struct afs_call *call,
+ enum afs_call_state state)
+{
+ return READ_ONCE(call->state) == state;
+}
+
+static inline bool afs_set_call_state(struct afs_call *call,
+ enum afs_call_state from,
+ enum afs_call_state to)
+{
+ bool ok = false;
+
+ spin_lock_bh(&call->state_lock);
+ if (call->state == from) {
+ call->state = to;
+ trace_afs_call_state(call, from, to, 0, 0);
+ ok = true;
+ }
+ spin_unlock_bh(&call->state_lock);
+ return ok;
+}
+
+static inline void afs_set_call_complete(struct afs_call *call,
+ int error, u32 remote_abort)
+{
+ enum afs_call_state state;
+ bool ok = false;
+
+ spin_lock_bh(&call->state_lock);
+ state = call->state;
+ if (state != AFS_CALL_COMPLETE) {
+ call->abort_code = remote_abort;
+ call->error = error;
+ call->state = AFS_CALL_COMPLETE;
+ trace_afs_call_state(call, state, AFS_CALL_COMPLETE,
+ error, remote_abort);
+ ok = true;
+ }
+ spin_unlock_bh(&call->state_lock);
+ if (ok)
+ trace_afs_call_done(call);
+}
+
/*
* security.c
*/
+extern void afs_put_permits(struct afs_permits *);
extern void afs_clear_permits(struct afs_vnode *);
-extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
extern int afs_permission(struct inode *, int);
+extern void __exit afs_clean_up_permit_cache(void);
/*
* server.c
*/
extern spinlock_t afs_server_peer_lock;
-#define afs_get_server(S) \
-do { \
- _debug("GET SERVER %d", atomic_read(&(S)->usage)); \
- atomic_inc(&(S)->usage); \
-} while(0)
+static inline struct afs_server *afs_get_server(struct afs_server *server)
+{
+ atomic_inc(&server->usage);
+ return server;
+}
-extern struct afs_server *afs_lookup_server(struct afs_cell *,
- const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *);
-extern void afs_put_server(struct afs_server *);
-extern void __exit afs_purge_servers(void);
+extern struct afs_server *afs_find_server(struct afs_net *,
+ const struct sockaddr_rxrpc *);
+extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
+extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *);
+extern void afs_put_server(struct afs_net *, struct afs_server *);
+extern void afs_manage_servers(struct work_struct *);
+extern void afs_servers_timer(struct timer_list *);
+extern void __net_exit afs_purge_servers(struct afs_net *);
+extern bool afs_probe_fileserver(struct afs_fs_cursor *);
+extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
/*
- * super.c
+ * server_list.c
*/
-extern int afs_fs_init(void);
-extern void afs_fs_exit(void);
+static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist)
+{
+ refcount_inc(&slist->usage);
+ return slist;
+}
-/*
- * vlclient.c
- */
-extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
- const char *, struct afs_cache_vlocation *,
- bool);
-extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
- afs_volid_t, afs_voltype_t,
- struct afs_cache_vlocation *, bool);
+extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *);
+extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *,
+ struct afs_vldb_entry *,
+ u8);
+extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *);
/*
- * vlocation.c
+ * super.c
*/
-#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
-
-extern int __init afs_vlocation_update_init(void);
-extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
- struct key *,
- const char *, size_t);
-extern void afs_put_vlocation(struct afs_vlocation *);
-extern void afs_vlocation_purge(void);
+extern int __init afs_fs_init(void);
+extern void __exit afs_fs_exit(void);
/*
- * vnode.c
+ * vlclient.c
*/
-static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
-{
- return container_of(inode, struct afs_vnode, vfs_inode);
-}
-
-static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
-{
- return &vnode->vfs_inode;
-}
-
-extern void afs_vnode_finalise_status_update(struct afs_vnode *,
- struct afs_server *);
-extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
- struct key *);
-extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
- struct afs_read *);
-extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
- umode_t, struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, struct afs_server **);
-extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
- bool);
-extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
- const char *);
-extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
- const char *, struct afs_fid *,
- struct afs_file_status *, struct afs_server **);
-extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
- struct key *, const char *, const char *);
-extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
- unsigned, unsigned);
-extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
-extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *,
- struct afs_volume_status *);
-extern int afs_vnode_set_lock(struct afs_vnode *, struct key *,
- afs_lock_type_t);
-extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *);
-extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
+ struct afs_addr_cursor *,
+ struct key *, const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
+ struct key *, const uuid_t *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
+ struct key *, const uuid_t *);
/*
* volume.c
*/
-#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
+static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
+{
+ if (volume)
+ atomic_inc(&volume->usage);
+ return volume;
+}
-extern void afs_put_volume(struct afs_volume *);
-extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
-extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
-extern int afs_volume_release_fileserver(struct afs_vnode *,
- struct afs_server *, int);
+extern struct afs_volume *afs_create_volume(struct afs_mount_params *);
+extern void afs_activate_volume(struct afs_volume *);
+extern void afs_deactivate_volume(struct afs_volume *);
+extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
+extern int afs_check_volume_status(struct afs_volume *, struct key *);
/*
* write.c
*/
extern int afs_set_page_dirty(struct page *);
-extern void afs_put_writeback(struct afs_writeback *);
extern int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
@@ -728,9 +931,11 @@ extern int afs_writepage(struct page *, struct writeback_control *);
extern int afs_writepages(struct address_space *, struct writeback_control *);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
-extern int afs_writeback_all(struct afs_vnode *);
extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
+extern int afs_page_mkwrite(struct vm_fault *);
+extern void afs_prune_wb_keys(struct afs_vnode *);
+extern int afs_launder_page(struct page *);
/*
* xattr.c
@@ -738,12 +943,42 @@ extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern const struct xattr_handler *afs_xattr_handlers[];
extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
+
+/*
+ * Miscellaneous inline functions.
+ */
+static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+{
+ return container_of(inode, struct afs_vnode, vfs_inode);
+}
+
+static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+{
+ return &vnode->vfs_inode;
+}
+
+static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ unsigned int cb_break)
+{
+ if (fc->ac.error == 0)
+ afs_cache_permit(vnode, fc->key, cb_break);
+}
+
+static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode)
+{
+ if (fc->ac.error == -ENOENT) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ afs_break_callback(vnode);
+ }
+}
+
+
/*****************************************************************************/
/*
* debug tracing
*/
-#include <trace/events/afs.h>
-
extern unsigned afs_debug;
#define dbgprintk(FMT,...) \
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 9944770849da..15a02a05ff40 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -31,57 +31,112 @@ static char *rootcell;
module_param(rootcell, charp, 0);
MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
-struct afs_uuid afs_uuid;
struct workqueue_struct *afs_wq;
+struct afs_net __afs_net;
/*
- * initialise the AFS client FS module
+ * Initialise an AFS network namespace record.
*/
-static int __init afs_init(void)
+static int __net_init afs_net_init(struct afs_net *net)
{
int ret;
- printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+ net->live = true;
+ generate_random_uuid((unsigned char *)&net->uuid);
- generate_random_uuid((unsigned char *)&afs_uuid);
+ INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+ mutex_init(&net->socket_mutex);
- /* create workqueue */
- ret = -ENOMEM;
- afs_wq = alloc_workqueue("afs", 0, 0);
- if (!afs_wq)
- return ret;
+ net->cells = RB_ROOT;
+ seqlock_init(&net->cells_lock);
+ INIT_WORK(&net->cells_manager, afs_manage_cells);
+ timer_setup(&net->cells_timer, afs_cells_timer, 0);
- /* register the /proc stuff */
- ret = afs_proc_init();
- if (ret < 0)
- goto error_proc;
+ spin_lock_init(&net->proc_cells_lock);
+ INIT_LIST_HEAD(&net->proc_cells);
-#ifdef CONFIG_AFS_FSCACHE
- /* we want to be able to cache */
- ret = fscache_register_netfs(&afs_cache_netfs);
+ seqlock_init(&net->fs_lock);
+ net->fs_servers = RB_ROOT;
+ INIT_LIST_HEAD(&net->fs_updates);
+ INIT_HLIST_HEAD(&net->fs_proc);
+
+ INIT_HLIST_HEAD(&net->fs_addresses4);
+ INIT_HLIST_HEAD(&net->fs_addresses6);
+ seqlock_init(&net->fs_addr_lock);
+
+ INIT_WORK(&net->fs_manager, afs_manage_servers);
+ timer_setup(&net->fs_timer, afs_servers_timer, 0);
+
+ /* Register the /proc stuff */
+ ret = afs_proc_init(net);
if (ret < 0)
- goto error_cache;
-#endif
+ goto error_proc;
- /* initialise the cell DB */
- ret = afs_cell_init(rootcell);
+ /* Initialise the cell DB */
+ ret = afs_cell_init(net, rootcell);
if (ret < 0)
goto error_cell_init;
- /* initialise the VL update process */
- ret = afs_vlocation_update_init();
+ /* Create the RxRPC transport */
+ ret = afs_open_socket(net);
if (ret < 0)
- goto error_vl_update_init;
+ goto error_open_socket;
- /* initialise the callback update process */
- ret = afs_callback_update_init();
+ return 0;
+
+error_open_socket:
+ net->live = false;
+ afs_cell_purge(net);
+ afs_purge_servers(net);
+error_cell_init:
+ net->live = false;
+ afs_proc_cleanup(net);
+error_proc:
+ net->live = false;
+ return ret;
+}
+
+/*
+ * Clean up and destroy an AFS network namespace record.
+ */
+static void __net_exit afs_net_exit(struct afs_net *net)
+{
+ net->live = false;
+ afs_cell_purge(net);
+ afs_purge_servers(net);
+ afs_close_socket(net);
+ afs_proc_cleanup(net);
+}
+
+/*
+ * initialise the AFS client FS module
+ */
+static int __init afs_init(void)
+{
+ int ret = -ENOMEM;
+
+ printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
+
+ afs_wq = alloc_workqueue("afs", 0, 0);
+ if (!afs_wq)
+ goto error_afs_wq;
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
+ if (!afs_async_calls)
+ goto error_async;
+ afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
+ if (!afs_lock_manager)
+ goto error_lockmgr;
+
+#ifdef CONFIG_AFS_FSCACHE
+ /* we want to be able to cache */
+ ret = fscache_register_netfs(&afs_cache_netfs);
if (ret < 0)
- goto error_callback_update_init;
+ goto error_cache;
+#endif
- /* create the RxRPC transport */
- ret = afs_open_socket();
+ ret = afs_net_init(&__afs_net);
if (ret < 0)
- goto error_open_socket;
+ goto error_net;
/* register the filesystems */
ret = afs_fs_init();
@@ -91,21 +146,18 @@ static int __init afs_init(void)
return ret;
error_fs:
- afs_close_socket();
-error_open_socket:
- afs_callback_update_kill();
-error_callback_update_init:
- afs_vlocation_purge();
-error_vl_update_init:
- afs_cell_purge();
-error_cell_init:
+ afs_net_exit(&__afs_net);
+error_net:
#ifdef CONFIG_AFS_FSCACHE
fscache_unregister_netfs(&afs_cache_netfs);
error_cache:
#endif
- afs_proc_cleanup();
-error_proc:
+ destroy_workqueue(afs_lock_manager);
+error_lockmgr:
+ destroy_workqueue(afs_async_calls);
+error_async:
destroy_workqueue(afs_wq);
+error_afs_wq:
rcu_barrier();
printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
return ret;
@@ -124,17 +176,14 @@ static void __exit afs_exit(void)
printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
afs_fs_exit();
- afs_kill_lock_manager();
- afs_close_socket();
- afs_purge_servers();
- afs_callback_update_kill();
- afs_vlocation_purge();
- destroy_workqueue(afs_wq);
- afs_cell_purge();
+ afs_net_exit(&__afs_net);
#ifdef CONFIG_AFS_FSCACHE
fscache_unregister_netfs(&afs_cache_netfs);
#endif
- afs_proc_cleanup();
+ destroy_workqueue(afs_lock_manager);
+ destroy_workqueue(afs_async_calls);
+ destroy_workqueue(afs_wq);
+ afs_clean_up_permit_cache();
rcu_barrier();
}
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index c05f1f1c0d41..700a5fa7f4ec 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -21,12 +21,12 @@
int afs_abort_to_error(u32 abort_code)
{
switch (abort_code) {
- /* low errno codes inserted into abort namespace */
+ /* Low errno codes inserted into abort namespace */
case 13: return -EACCES;
case 27: return -EFBIG;
case 30: return -EROFS;
- /* VICE "special error" codes; 101 - 111 */
+ /* VICE "special error" codes; 101 - 111 */
case VSALVAGE: return -EIO;
case VNOVNODE: return -ENOENT;
case VNOVOL: return -ENOMEDIUM;
@@ -39,7 +39,37 @@ int afs_abort_to_error(u32 abort_code)
case VBUSY: return -EBUSY;
case VMOVED: return -ENXIO;
- /* Unified AFS error table; ET "uae" == 0x2f6df00 */
+ /* Volume Location server errors */
+ case AFSVL_IDEXIST: return -EEXIST;
+ case AFSVL_IO: return -EREMOTEIO;
+ case AFSVL_NAMEEXIST: return -EEXIST;
+ case AFSVL_CREATEFAIL: return -EREMOTEIO;
+ case AFSVL_NOENT: return -ENOMEDIUM;
+ case AFSVL_EMPTY: return -ENOMEDIUM;
+ case AFSVL_ENTDELETED: return -ENOMEDIUM;
+ case AFSVL_BADNAME: return -EINVAL;
+ case AFSVL_BADINDEX: return -EINVAL;
+ case AFSVL_BADVOLTYPE: return -EINVAL;
+ case AFSVL_BADSERVER: return -EINVAL;
+ case AFSVL_BADPARTITION: return -EINVAL;
+ case AFSVL_REPSFULL: return -EFBIG;
+ case AFSVL_NOREPSERVER: return -ENOENT;
+ case AFSVL_DUPREPSERVER: return -EEXIST;
+ case AFSVL_RWNOTFOUND: return -ENOENT;
+ case AFSVL_BADREFCOUNT: return -EINVAL;
+ case AFSVL_SIZEEXCEEDED: return -EINVAL;
+ case AFSVL_BADENTRY: return -EINVAL;
+ case AFSVL_BADVOLIDBUMP: return -EINVAL;
+ case AFSVL_IDALREADYHASHED: return -EINVAL;
+ case AFSVL_ENTRYLOCKED: return -EBUSY;
+ case AFSVL_BADVOLOPER: return -EBADRQC;
+ case AFSVL_BADRELLOCKTYPE: return -EINVAL;
+ case AFSVL_RERELEASE: return -EREMOTEIO;
+ case AFSVL_BADSERVERFLAG: return -EINVAL;
+ case AFSVL_PERM: return -EACCES;
+ case AFSVL_NOMEM: return -EREMOTEIO;
+
+ /* Unified AFS error table; ET "uae" == 0x2f6df00 */
case 0x2f6df00: return -EPERM;
case 0x2f6df01: return -ENOENT;
case 0x2f6df04: return -EIO;
@@ -68,7 +98,7 @@ int afs_abort_to_error(u32 abort_code)
case 0x2f6df6c: return -ETIMEDOUT;
case 0x2f6df78: return -EDQUOT;
- /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
+ /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
case RXKADPACKETSHORT: return -EPROTO;
case RXKADLEVELFAIL: return -EKEYREJECTED;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 35efb9a31dd7..4508dd54f789 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -17,8 +17,15 @@
#include <linux/uaccess.h>
#include "internal.h"
-static struct proc_dir_entry *proc_afs;
+static inline struct afs_net *afs_proc2net(struct file *f)
+{
+ return &__afs_net;
+}
+static inline struct afs_net *afs_seq2net(struct seq_file *m)
+{
+ return &__afs_net; // TODO: use seq_file_net(m)
+}
static int afs_proc_cells_open(struct inode *inode, struct file *file);
static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos);
@@ -98,22 +105,22 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
.release = seq_release,
};
-static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
-static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos);
-static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
+static int afs_proc_servers_open(struct inode *inode, struct file *file);
+static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_servers_next(struct seq_file *p, void *v,
loff_t *pos);
-static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
-static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
-
-static const struct seq_operations afs_proc_cell_servers_ops = {
- .start = afs_proc_cell_servers_start,
- .next = afs_proc_cell_servers_next,
- .stop = afs_proc_cell_servers_stop,
- .show = afs_proc_cell_servers_show,
+static void afs_proc_servers_stop(struct seq_file *p, void *v);
+static int afs_proc_servers_show(struct seq_file *m, void *v);
+
+static const struct seq_operations afs_proc_servers_ops = {
+ .start = afs_proc_servers_start,
+ .next = afs_proc_servers_next,
+ .stop = afs_proc_servers_stop,
+ .show = afs_proc_servers_show,
};
-static const struct file_operations afs_proc_cell_servers_fops = {
- .open = afs_proc_cell_servers_open,
+static const struct file_operations afs_proc_servers_fops = {
+ .open = afs_proc_servers_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
@@ -122,23 +129,24 @@ static const struct file_operations afs_proc_cell_servers_fops = {
/*
* initialise the /proc/fs/afs/ directory
*/
-int afs_proc_init(void)
+int afs_proc_init(struct afs_net *net)
{
_enter("");
- proc_afs = proc_mkdir("fs/afs", NULL);
- if (!proc_afs)
+ net->proc_afs = proc_mkdir("fs/afs", NULL);
+ if (!net->proc_afs)
goto error_dir;
- if (!proc_create("cells", 0644, proc_afs, &afs_proc_cells_fops) ||
- !proc_create("rootcell", 0644, proc_afs, &afs_proc_rootcell_fops))
+ if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
+ !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
+ !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops))
goto error_tree;
_leave(" = 0");
return 0;
error_tree:
- remove_proc_subtree("fs/afs", NULL);
+ proc_remove(net->proc_afs);
error_dir:
_leave(" = -ENOMEM");
return -ENOMEM;
@@ -147,9 +155,10 @@ error_dir:
/*
* clean up the /proc/fs/afs/ directory
*/
-void afs_proc_cleanup(void)
+void afs_proc_cleanup(struct afs_net *net)
{
- remove_proc_subtree("fs/afs", NULL);
+ proc_remove(net->proc_afs);
+ net->proc_afs = NULL;
}
/*
@@ -166,7 +175,6 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
m = file->private_data;
m->private = PDE_DATA(inode);
-
return 0;
}
@@ -176,25 +184,28 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
*/
static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
{
- /* lock the list against modification */
- down_read(&afs_proc_cells_sem);
- return seq_list_start_head(&afs_proc_cells, *_pos);
+ struct afs_net *net = afs_seq2net(m);
+
+ rcu_read_lock();
+ return seq_list_start_head(&net->proc_cells, *_pos);
}
/*
* move to next cell in cells list
*/
-static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
+static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
{
- return seq_list_next(v, &afs_proc_cells, pos);
+ struct afs_net *net = afs_seq2net(m);
+
+ return seq_list_next(v, &net->proc_cells, pos);
}
/*
* clean up after reading from the cells list
*/
-static void afs_proc_cells_stop(struct seq_file *p, void *v)
+static void afs_proc_cells_stop(struct seq_file *m, void *v)
{
- up_read(&afs_proc_cells_sem);
+ rcu_read_unlock();
}
/*
@@ -203,16 +214,16 @@ static void afs_proc_cells_stop(struct seq_file *p, void *v)
static int afs_proc_cells_show(struct seq_file *m, void *v)
{
struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
+ struct afs_net *net = afs_seq2net(m);
- if (v == &afs_proc_cells) {
+ if (v == &net->proc_cells) {
/* display header on line 1 */
seq_puts(m, "USE NAME\n");
return 0;
}
/* display one cell per line on subsequent lines */
- seq_printf(m, "%3d %s\n",
- atomic_read(&cell->usage), cell->name);
+ seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
return 0;
}
@@ -223,6 +234,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
size_t size, loff_t *_pos)
{
+ struct afs_net *net = afs_proc2net(file);
char *kbuf, *name, *args;
int ret;
@@ -264,13 +276,13 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
if (strcmp(kbuf, "add") == 0) {
struct afs_cell *cell;
- cell = afs_cell_create(name, strlen(name), args, false);
+ cell = afs_lookup_cell(net, name, strlen(name), args, true);
if (IS_ERR(cell)) {
ret = PTR_ERR(cell);
goto done;
}
- afs_put_cell(cell);
+ set_bit(AFS_CELL_FL_NO_GC, &cell->flags);
printk("kAFS: Added new cell '%s'\n", name);
} else {
goto inval;
@@ -303,6 +315,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
const char __user *buf,
size_t size, loff_t *_pos)
{
+ struct afs_net *net = afs_proc2net(file);
char *kbuf, *s;
int ret;
@@ -322,7 +335,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
/* determine command to perform */
_debug("rootcell=%s", kbuf);
- ret = afs_cell_init(kbuf);
+ ret = afs_cell_init(net, kbuf);
if (ret >= 0)
ret = size; /* consume everything, always */
@@ -334,29 +347,27 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
/*
* initialise /proc/fs/afs/<cell>/
*/
-int afs_proc_cell_setup(struct afs_cell *cell)
+int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell)
{
struct proc_dir_entry *dir;
- _enter("%p{%s}", cell, cell->name);
+ _enter("%p{%s},%p", cell, cell->name, net->proc_afs);
- dir = proc_mkdir(cell->name, proc_afs);
+ dir = proc_mkdir(cell->name, net->proc_afs);
if (!dir)
goto error_dir;
- if (!proc_create_data("servers", 0, dir,
- &afs_proc_cell_servers_fops, cell) ||
- !proc_create_data("vlservers", 0, dir,
- &afs_proc_cell_vlservers_fops, cell) ||
+ if (!proc_create_data("vlservers", 0, dir,
+ &afs_proc_cell_vlservers_fops, cell) ||
!proc_create_data("volumes", 0, dir,
- &afs_proc_cell_volumes_fops, cell))
+ &afs_proc_cell_volumes_fops, cell))
goto error_tree;
_leave(" = 0");
return 0;
error_tree:
- remove_proc_subtree(cell->name, proc_afs);
+ remove_proc_subtree(cell->name, net->proc_afs);
error_dir:
_leave(" = -ENOMEM");
return -ENOMEM;
@@ -365,11 +376,11 @@ error_dir:
/*
* remove /proc/fs/afs/<cell>/
*/
-void afs_proc_cell_remove(struct afs_cell *cell)
+void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell)
{
_enter("");
- remove_proc_subtree(cell->name, proc_afs);
+ remove_proc_subtree(cell->name, net->proc_afs);
_leave("");
}
@@ -407,9 +418,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
_enter("cell=%p pos=%Ld", cell, *_pos);
- /* lock the list against modification */
- down_read(&cell->vl_sem);
- return seq_list_start_head(&cell->vl_list, *_pos);
+ read_lock(&cell->proc_lock);
+ return seq_list_start_head(&cell->proc_volumes, *_pos);
}
/*
@@ -421,7 +431,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
struct afs_cell *cell = p->private;
_enter("cell=%p pos=%Ld", cell, *_pos);
- return seq_list_next(v, &cell->vl_list, _pos);
+ return seq_list_next(v, &cell->proc_volumes, _pos);
}
/*
@@ -431,17 +441,13 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
{
struct afs_cell *cell = p->private;
- up_read(&cell->vl_sem);
+ read_unlock(&cell->proc_lock);
}
-static const char afs_vlocation_states[][4] = {
- [AFS_VL_NEW] = "New",
- [AFS_VL_CREATING] = "Crt",
- [AFS_VL_VALID] = "Val",
- [AFS_VL_NO_VOLUME] = "NoV",
- [AFS_VL_UPDATING] = "Upd",
- [AFS_VL_VOLUME_DELETED] = "Del",
- [AFS_VL_UNCERTAIN] = "Unc",
+static const char afs_vol_types[3][3] = {
+ [AFSVL_RWVOL] = "RW",
+ [AFSVL_ROVOL] = "RO",
+ [AFSVL_BACKVOL] = "BK",
};
/*
@@ -450,23 +456,17 @@ static const char afs_vlocation_states[][4] = {
static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
{
struct afs_cell *cell = m->private;
- struct afs_vlocation *vlocation =
- list_entry(v, struct afs_vlocation, link);
+ struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
- /* display header on line 1 */
- if (v == &cell->vl_list) {
- seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n");
+ /* Display header on line 1 */
+ if (v == &cell->proc_volumes) {
+ seq_puts(m, "USE VID TY\n");
return 0;
}
- /* display one cell per line on subsequent lines */
- seq_printf(m, "%3d %s %08x %08x %08x %s\n",
- atomic_read(&vlocation->usage),
- afs_vlocation_states[vlocation->state],
- vlocation->vldb.vid[0],
- vlocation->vldb.vid[1],
- vlocation->vldb.vid[2],
- vlocation->vldb.name);
+ seq_printf(m, "%3d %08x %s\n",
+ atomic_read(&vol->usage), vol->vid,
+ afs_vol_types[vol->type]);
return 0;
}
@@ -501,23 +501,23 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
*/
static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
{
+ struct afs_addr_list *alist;
struct afs_cell *cell = m->private;
loff_t pos = *_pos;
- _enter("cell=%p pos=%Ld", cell, *_pos);
+ rcu_read_lock();
- /* lock the list against modification */
- down_read(&cell->vl_sem);
+ alist = rcu_dereference(cell->vl_addrs);
/* allow for the header line */
if (!pos)
return (void *) 1;
pos--;
- if (pos >= cell->vl_naddrs)
+ if (!alist || pos >= alist->nr_addrs)
return NULL;
- return &cell->vl_addrs[pos];
+ return alist->addrs + pos;
}
/*
@@ -526,17 +526,18 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *_pos)
{
+ struct afs_addr_list *alist;
struct afs_cell *cell = p->private;
loff_t pos;
- _enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos);
+ alist = rcu_dereference(cell->vl_addrs);
pos = *_pos;
(*_pos)++;
- if (pos >= cell->vl_naddrs)
+ if (!alist || pos >= alist->nr_addrs)
return NULL;
- return &cell->vl_addrs[pos];
+ return alist->addrs + pos;
}
/*
@@ -544,9 +545,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
*/
static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
{
- struct afs_cell *cell = p->private;
-
- up_read(&cell->vl_sem);
+ rcu_read_unlock();
}
/*
@@ -554,100 +553,76 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
*/
static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
{
- struct in_addr *addr = v;
+ struct sockaddr_rxrpc *addr = v;
/* display header on line 1 */
- if (v == (struct in_addr *) 1) {
+ if (v == (void *)1) {
seq_puts(m, "ADDRESS\n");
return 0;
}
/* display one cell per line on subsequent lines */
- seq_printf(m, "%pI4\n", &addr->s_addr);
+ seq_printf(m, "%pISp\n", &addr->transport);
return 0;
}
/*
- * open "/proc/fs/afs/<cell>/servers" which provides a summary of active
+ * open "/proc/fs/afs/servers" which provides a summary of active
* servers
*/
-static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
+static int afs_proc_servers_open(struct inode *inode, struct file *file)
{
- struct afs_cell *cell;
- struct seq_file *m;
- int ret;
-
- cell = PDE_DATA(inode);
- if (!cell)
- return -ENOENT;
-
- ret = seq_open(file, &afs_proc_cell_servers_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = cell;
- return 0;
+ return seq_open(file, &afs_proc_servers_ops);
}
/*
- * set up the iterator to start reading from the cells list and return the
- * first item
+ * Set up the iterator to start reading from the server list and return the
+ * first item.
*/
-static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
- __acquires(m->private->servers_lock)
+static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos)
{
- struct afs_cell *cell = m->private;
-
- _enter("cell=%p pos=%Ld", cell, *_pos);
+ struct afs_net *net = afs_seq2net(m);
- /* lock the list against modification */
- read_lock(&cell->servers_lock);
- return seq_list_start_head(&cell->servers, *_pos);
+ rcu_read_lock();
+ return seq_hlist_start_head_rcu(&net->fs_proc, *_pos);
}
/*
* move to next cell in cells list
*/
-static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
- loff_t *_pos)
+static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos)
{
- struct afs_cell *cell = p->private;
+ struct afs_net *net = afs_seq2net(m);
- _enter("cell=%p pos=%Ld", cell, *_pos);
- return seq_list_next(v, &cell->servers, _pos);
+ return seq_hlist_next_rcu(v, &net->fs_proc, _pos);
}
/*
* clean up after reading from the cells list
*/
-static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
- __releases(p->private->servers_lock)
+static void afs_proc_servers_stop(struct seq_file *p, void *v)
{
- struct afs_cell *cell = p->private;
-
- read_unlock(&cell->servers_lock);
+ rcu_read_unlock();
}
/*
* display a header line followed by a load of volume lines
*/
-static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
+static int afs_proc_servers_show(struct seq_file *m, void *v)
{
- struct afs_cell *cell = m->private;
- struct afs_server *server = list_entry(v, struct afs_server, link);
- char ipaddr[20];
+ struct afs_server *server;
+ struct afs_addr_list *alist;
- /* display header on line 1 */
- if (v == &cell->servers) {
- seq_puts(m, "USE ADDR STATE\n");
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(m, "UUID USE ADDR\n");
return 0;
}
- /* display one cell per line on subsequent lines */
- sprintf(ipaddr, "%pI4", &server->addr);
- seq_printf(m, "%3d %-15.15s %5d\n",
- atomic_read(&server->usage), ipaddr, server->fs_state);
-
+ server = list_entry(v, struct afs_server, proc_link);
+ alist = rcu_dereference(server->addresses);
+ seq_printf(m, "%pU %3d %pISp\n",
+ &server->uuid,
+ atomic_read(&server->usage),
+ &alist->addrs[alist->index].transport);
return 0;
}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
new file mode 100644
index 000000000000..e728ca1776c9
--- /dev/null
+++ b/fs/afs/rotate.c
@@ -0,0 +1,715 @@
+/* Handle fileserver selection and rotation.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_fs.h"
+
+/*
+ * Initialise a filesystem server cursor for iterating over FS servers.
+ */
+void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+ memset(fc, 0, sizeof(*fc));
+}
+
+/*
+ * Begin an operation on the fileserver.
+ *
+ * Fileserver operations are serialised on the server by vnode, so we serialise
+ * them here also using the io_lock.
+ */
+bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+ struct key *key)
+{
+ afs_init_fs_cursor(fc, vnode);
+ fc->vnode = vnode;
+ fc->key = key;
+ fc->ac.error = SHRT_MAX;
+
+ if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ fc->ac.error = -EINTR;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+
+ if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
+ test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
+ fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
+ return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the vnode's last used
+ * server if possible, or the last recorded good server if not.
+ */
+static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode)
+{
+ struct afs_cb_interest *cbi;
+ int i;
+
+ read_lock(&vnode->volume->servers_lock);
+ fc->server_list = afs_get_serverlist(vnode->volume->servers);
+ read_unlock(&vnode->volume->servers_lock);
+
+ cbi = vnode->cb_interest;
+ if (cbi) {
+ /* See if the vnode's preferred record is still available */
+ for (i = 0; i < fc->server_list->nr_servers; i++) {
+ if (fc->server_list->servers[i].cb_interest == cbi) {
+ fc->start = i;
+ goto found_interest;
+ }
+ }
+
+ /* If we have a lock outstanding on a server that's no longer
+ * serving this vnode, then we can't switch to another server
+ * and have to return an error.
+ */
+ if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
+ fc->ac.error = -ESTALE;
+ return false;
+ }
+
+ /* Note that the callback promise is effectively broken */
+ write_seqlock(&vnode->cb_lock);
+ ASSERTCMP(cbi, ==, vnode->cb_interest);
+ vnode->cb_interest = NULL;
+ if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ vnode->cb_break++;
+ write_sequnlock(&vnode->cb_lock);
+
+ afs_put_cb_interest(afs_v2net(vnode), cbi);
+ cbi = NULL;
+ } else {
+ fc->start = READ_ONCE(fc->server_list->index);
+ }
+
+found_interest:
+ fc->index = fc->start;
+ return true;
+}
+
+/*
+ * Post volume busy note.
+ */
+static void afs_busy(struct afs_volume *volume, u32 abort_code)
+{
+ const char *m;
+
+ switch (abort_code) {
+ case VOFFLINE: m = "offline"; break;
+ case VRESTARTING: m = "restarting"; break;
+ case VSALVAGING: m = "being salvaged"; break;
+ default: m = "busy"; break;
+ }
+
+ pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
+}
+
+/*
+ * Sleep and retry the operation to the same fileserver.
+ */
+static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
+{
+ msleep_interruptible(1000);
+ if (signal_pending(current)) {
+ fc->ac.error = -ERESTARTSYS;
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Select the fileserver to use. May be called multiple times to rotate
+ * through the fileservers.
+ */
+bool afs_select_fileserver(struct afs_fs_cursor *fc)
+{
+ struct afs_addr_list *alist;
+ struct afs_server *server;
+ struct afs_vnode *vnode = fc->vnode;
+
+ _enter("%u/%u,%u/%u,%d,%d",
+ fc->index, fc->start,
+ fc->ac.index, fc->ac.start,
+ fc->ac.error, fc->ac.abort_code);
+
+ if (fc->flags & AFS_FS_CURSOR_STOP) {
+ _leave(" = f [stopped]");
+ return false;
+ }
+
+ /* Evaluate the result of the previous operation, if there was one. */
+ switch (fc->ac.error) {
+ case SHRT_MAX:
+ goto start;
+
+ case 0:
+ default:
+ /* Success or local failure. Stop. */
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ _leave(" = f [okay/local %d]", fc->ac.error);
+ return false;
+
+ case -ECONNABORTED:
+ /* The far side rejected the operation on some grounds. This
+ * might involve the server being busy or the volume having been moved.
+ */
+ switch (fc->ac.abort_code) {
+ case VNOVOL:
+ /* This fileserver doesn't know about the volume.
+ * - May indicate that the VL is wrong - retry once and compare
+ * the results.
+ * - May indicate that the fileserver couldn't attach to the vol.
+ */
+ if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
+ fc->ac.error = -EREMOTEIO;
+ goto failed;
+ }
+
+ write_lock(&vnode->volume->servers_lock);
+ fc->server_list->vnovol_mask |= 1 << fc->index;
+ write_unlock(&vnode->volume->servers_lock);
+
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
+ fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
+ if (fc->ac.error < 0)
+ goto failed;
+
+ if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
+ fc->ac.error = -ENOMEDIUM;
+ goto failed;
+ }
+
+ /* If the server list didn't change, then assume that
+ * it's the fileserver having trouble.
+ */
+ if (vnode->volume->servers == fc->server_list) {
+ fc->ac.error = -EREMOTEIO;
+ goto failed;
+ }
+
+ /* Try again */
+ fc->flags |= AFS_FS_CURSOR_VNOVOL;
+ _leave(" = t [vnovol]");
+ return true;
+
+ case VSALVAGE: /* TODO: Should this return an error or iterate? */
+ case VVOLEXISTS:
+ case VNOSERVICE:
+ case VONLINE:
+ case VDISKFULL:
+ case VOVERQUOTA:
+ fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+ goto next_server;
+
+ case VOFFLINE:
+ if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
+ afs_busy(vnode->volume, fc->ac.abort_code);
+ clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
+ }
+ if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
+ fc->ac.error = -EADV;
+ goto failed;
+ }
+ if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
+ fc->ac.error = -ESTALE;
+ goto failed;
+ }
+ goto busy;
+
+ case VSALVAGING:
+ case VRESTARTING:
+ case VBUSY:
+ /* Retry after going round all the servers unless we
+ * have a file lock we need to maintain.
+ */
+ if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
+ fc->ac.error = -EBUSY;
+ goto failed;
+ }
+ if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
+ afs_busy(vnode->volume, fc->ac.abort_code);
+ clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
+ }
+ busy:
+ if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
+ if (!afs_sleep_and_retry(fc))
+ goto failed;
+
+ /* Retry with same server & address */
+ _leave(" = t [vbusy]");
+ return true;
+ }
+
+ fc->flags |= AFS_FS_CURSOR_VBUSY;
+ goto next_server;
+
+ case VMOVED:
+ /* The volume migrated to another server. We consider
+ * consider all locks and callbacks broken and request
+ * an update from the VLDB.
+ *
+ * We also limit the number of VMOVED hops we will
+ * honour, just in case someone sets up a loop.
+ */
+ if (fc->flags & AFS_FS_CURSOR_VMOVED) {
+ fc->ac.error = -EREMOTEIO;
+ goto failed;
+ }
+ fc->flags |= AFS_FS_CURSOR_VMOVED;
+
+ set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
+ fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
+ if (fc->ac.error < 0)
+ goto failed;
+
+ /* If the server list didn't change, then the VLDB is
+ * out of sync with the fileservers. This is hopefully
+ * a temporary condition, however, so we don't want to
+ * permanently block access to the file.
+ *
+ * TODO: Try other fileservers if we can.
+ *
+ * TODO: Retry a few times with sleeps.
+ */
+ if (vnode->volume->servers == fc->server_list) {
+ fc->ac.error = -ENOMEDIUM;
+ goto failed;
+ }
+
+ goto restart_from_beginning;
+
+ default:
+ clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
+ clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
+ fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+ goto failed;
+ }
+
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ _debug("no conn");
+ goto iterate_address;
+ }
+
+restart_from_beginning:
+ _debug("restart");
+ afs_end_cursor(&fc->ac);
+ afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+ fc->cbi = NULL;
+ afs_put_serverlist(afs_v2net(vnode), fc->server_list);
+ fc->server_list = NULL;
+start:
+ _debug("start");
+ /* See if we need to do an update of the volume record. Note that the
+ * volume may have moved or even have been deleted.
+ */
+ fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
+ if (fc->ac.error < 0)
+ goto failed;
+
+ if (!afs_start_fs_iteration(fc, vnode))
+ goto failed;
+ goto use_server;
+
+next_server:
+ _debug("next");
+ afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+ fc->cbi = NULL;
+ fc->index++;
+ if (fc->index >= fc->server_list->nr_servers)
+ fc->index = 0;
+ if (fc->index != fc->start)
+ goto use_server;
+
+ /* That's all the servers poked to no good effect. Try again if some
+ * of them were busy.
+ */
+ if (fc->flags & AFS_FS_CURSOR_VBUSY)
+ goto restart_from_beginning;
+
+ fc->ac.error = -EDESTADDRREQ;
+ goto failed;
+
+use_server:
+ _debug("use");
+ /* We're starting on a different fileserver from the list. We need to
+ * check it, create a callback intercept, find its address list and
+ * probe its capabilities before we use it.
+ */
+ ASSERTCMP(fc->ac.alist, ==, NULL);
+ server = fc->server_list->servers[fc->index].server;
+
+ if (!afs_check_server_record(fc, server))
+ goto failed;
+
+ _debug("USING SERVER: %pU", &server->uuid);
+
+ /* Make sure we've got a callback interest record for this server. We
+ * have to link it in before we send the request as we can be sent a
+ * break request before we've finished decoding the reply and
+ * installing the vnode.
+ */
+ fc->ac.error = afs_register_server_cb_interest(
+ vnode, &fc->server_list->servers[fc->index]);
+ if (fc->ac.error < 0)
+ goto failed;
+
+ fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+
+ read_lock(&server->fs_lock);
+ alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ afs_get_addrlist(alist);
+ read_unlock(&server->fs_lock);
+
+
+ /* Probe the current fileserver if we haven't done so yet. */
+ if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
+ fc->ac.alist = afs_get_addrlist(alist);
+
+ if (!afs_probe_fileserver(fc))
+ goto failed;
+ }
+
+ if (!fc->ac.alist)
+ fc->ac.alist = alist;
+ else
+ afs_put_addrlist(alist);
+
+ fc->ac.addr = NULL;
+ fc->ac.start = READ_ONCE(alist->index);
+ fc->ac.index = fc->ac.start;
+ fc->ac.error = 0;
+ fc->ac.begun = false;
+ goto iterate_address;
+
+iterate_address:
+ ASSERT(fc->ac.alist);
+ _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
+ /* Iterate over the current server's address list to try and find an
+ * address on which it will respond to us.
+ */
+ if (afs_iterate_addresses(&fc->ac)) {
+ _leave(" = t");
+ return true;
+ }
+
+ afs_end_cursor(&fc->ac);
+ goto next_server;
+
+failed:
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ _leave(" = f [failed %d]", fc->ac.error);
+ return false;
+}
+
+/*
+ * Select the same fileserver we used for a vnode before and only that
+ * fileserver. We use this when we have a lock on that file, which is backed
+ * only by the fileserver we obtained it from.
+ */
+bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_cb_interest *cbi = vnode->cb_interest;
+ struct afs_addr_list *alist;
+
+ _enter("");
+
+ if (!cbi) {
+ fc->ac.error = -ESTALE;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+
+ read_lock(&cbi->server->fs_lock);
+ alist = afs_get_addrlist(cbi->server->addresses);
+ read_unlock(&cbi->server->fs_lock);
+ if (!alist) {
+ fc->ac.error = -ESTALE;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+
+ fc->ac.alist = alist;
+ fc->ac.error = 0;
+ return true;
+}
+
+/*
+ * Tidy up a filesystem cursor and unlock the vnode.
+ */
+int afs_end_vnode_operation(struct afs_fs_cursor *fc)
+{
+ struct afs_net *net = afs_v2net(fc->vnode);
+ int ret;
+
+ mutex_unlock(&fc->vnode->io_lock);
+
+ afs_end_cursor(&fc->ac);
+ afs_put_cb_interest(net, fc->cbi);
+ afs_put_serverlist(net, fc->server_list);
+
+ ret = fc->ac.error;
+ if (ret == -ECONNABORTED)
+ afs_abort_to_error(fc->ac.abort_code);
+
+ return fc->ac.error;
+}
+
+#if 0
+/*
+ * Set a filesystem server cursor for using a specific FS server.
+ */
+int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+ afs_init_fs_cursor(fc, vnode);
+
+ read_seqlock_excl(&vnode->cb_lock);
+ if (vnode->cb_interest) {
+ if (vnode->cb_interest->server->fs_state == 0)
+ fc->server = afs_get_server(vnode->cb_interest->server);
+ else
+ fc->ac.error = vnode->cb_interest->server->fs_state;
+ } else {
+ fc->ac.error = -ESTALE;
+ }
+ read_sequnlock_excl(&vnode->cb_lock);
+
+ return fc->ac.error;
+}
+
+/*
+ * pick a server to use to try accessing this volume
+ * - returns with an elevated usage count on the server chosen
+ */
+bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+{
+ struct afs_volume *volume = vnode->volume;
+ struct afs_server *server;
+ int ret, state, loop;
+
+ _enter("%s", volume->vlocation->vldb.name);
+
+ /* stick with the server we're already using if we can */
+ if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
+ fc->server = afs_get_server(vnode->cb_interest->server);
+ goto set_server;
+ }
+
+ down_read(&volume->server_sem);
+
+ /* handle the no-server case */
+ if (volume->nservers == 0) {
+ fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
+ up_read(&volume->server_sem);
+ _leave(" = f [no servers %d]", fc->ac.error);
+ return false;
+ }
+
+ /* basically, just search the list for the first live server and use
+ * that */
+ ret = 0;
+ for (loop = 0; loop < volume->nservers; loop++) {
+ server = volume->servers[loop];
+ state = server->fs_state;
+
+ _debug("consider %d [%d]", loop, state);
+
+ switch (state) {
+ case 0:
+ goto picked_server;
+
+ case -ENETUNREACH:
+ if (ret == 0)
+ ret = state;
+ break;
+
+ case -EHOSTUNREACH:
+ if (ret == 0 ||
+ ret == -ENETUNREACH)
+ ret = state;
+ break;
+
+ case -ECONNREFUSED:
+ if (ret == 0 ||
+ ret == -ENETUNREACH ||
+ ret == -EHOSTUNREACH)
+ ret = state;
+ break;
+
+ default:
+ case -EREMOTEIO:
+ if (ret == 0 ||
+ ret == -ENETUNREACH ||
+ ret == -EHOSTUNREACH ||
+ ret == -ECONNREFUSED)
+ ret = state;
+ break;
+ }
+ }
+
+error:
+ fc->ac.error = ret;
+
+ /* no available servers
+ * - TODO: handle the no active servers case better
+ */
+ up_read(&volume->server_sem);
+ _leave(" = f [%d]", fc->ac.error);
+ return false;
+
+picked_server:
+ /* Found an apparently healthy server. We need to register an interest
+ * in receiving callbacks before we talk to it.
+ */
+ ret = afs_register_server_cb_interest(vnode,
+ &volume->cb_interests[loop], server);
+ if (ret < 0)
+ goto error;
+
+ fc->server = afs_get_server(server);
+ up_read(&volume->server_sem);
+set_server:
+ fc->ac.alist = afs_get_addrlist(fc->server->addrs);
+ fc->ac.addr = &fc->ac.alist->addrs[0];
+ _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
+ _leave(" = t (picked %pIS)", &fc->ac.addr->transport);
+ return true;
+}
+
+/*
+ * release a server after use
+ * - releases the ref on the server struct that was acquired by picking
+ * - records result of using a particular server to access a volume
+ * - return true to try again, false if okay or to issue error
+ * - the caller must release the server struct if result was false
+ */
+bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode)
+{
+ struct afs_volume *volume = vnode->volume;
+ struct afs_server *server = fc->server;
+ unsigned loop;
+
+ _enter("%s,%pIS,%d",
+ volume->vlocation->vldb.name, &fc->ac.addr->transport,
+ fc->ac.error);
+
+ switch (fc->ac.error) {
+ /* success */
+ case 0:
+ server->fs_state = 0;
+ _leave(" = f");
+ return false;
+
+ /* the fileserver denied all knowledge of the volume */
+ case -ENOMEDIUM:
+ down_write(&volume->server_sem);
+
+ /* firstly, find where the server is in the active list (if it
+ * is) */
+ for (loop = 0; loop < volume->nservers; loop++)
+ if (volume->servers[loop] == server)
+ goto present;
+
+ /* no longer there - may have been discarded by another op */
+ goto try_next_server_upw;
+
+ present:
+ volume->nservers--;
+ memmove(&volume->servers[loop],
+ &volume->servers[loop + 1],
+ sizeof(volume->servers[loop]) *
+ (volume->nservers - loop));
+ volume->servers[volume->nservers] = NULL;
+ afs_put_server(afs_v2net(vnode), server);
+ volume->rjservers++;
+
+ if (volume->nservers > 0)
+ /* another server might acknowledge its existence */
+ goto try_next_server_upw;
+
+ /* handle the case where all the fileservers have rejected the
+ * volume
+ * - TODO: try asking the fileservers for volume information
+ * - TODO: contact the VL server again to see if the volume is
+ * no longer registered
+ */
+ up_write(&volume->server_sem);
+ afs_put_server(afs_v2net(vnode), server);
+ fc->server = NULL;
+ _leave(" = f [completely rejected]");
+ return false;
+
+ /* problem reaching the server */
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIME:
+ case -ETIMEDOUT:
+ case -EREMOTEIO:
+ /* mark the server as dead
+ * TODO: vary dead timeout depending on error
+ */
+ spin_lock(&server->fs_lock);
+ if (!server->fs_state) {
+ server->fs_state = fc->ac.error;
+ printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
+ }
+ spin_unlock(&server->fs_lock);
+ goto try_next_server;
+
+ /* miscellaneous error */
+ default:
+ case -ENOMEM:
+ case -ENONET:
+ /* tell the caller to accept the result */
+ afs_put_server(afs_v2net(vnode), server);
+ fc->server = NULL;
+ _leave(" = f [local failure]");
+ return false;
+ }
+
+ /* tell the caller to loop around and try the next server */
+try_next_server_upw:
+ up_write(&volume->server_sem);
+try_next_server:
+ afs_put_server(afs_v2net(vnode), server);
+ _leave(" = t [try next server]");
+ return true;
+}
+
+/*
+ * Clean up a fileserver cursor.
+ */
+int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
+{
+ afs_end_cursor(&fc->ac);
+ afs_put_server(net, fc->server);
+ return fc->ac.error;
+}
+
+#endif
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bb1e2caa1720..ea1460b9b71a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -17,13 +17,10 @@
#include "internal.h"
#include "afs_cm.h"
-struct socket *afs_socket; /* my RxRPC socket */
-static struct workqueue_struct *afs_async_calls;
-static struct afs_call *afs_spare_incoming_call;
-atomic_t afs_outstanding_calls;
+struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
-static int afs_wait_for_call_to_complete(struct afs_call *);
+static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
@@ -34,24 +31,13 @@ static int afs_deliver_cm_op_id(struct afs_call *);
static const struct afs_call_type afs_RXCMxxxx = {
.name = "CB.xxxx",
.deliver = afs_deliver_cm_op_id,
- .abort_to_error = afs_abort_to_error,
};
-static void afs_charge_preallocation(struct work_struct *);
-
-static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
-
-static int afs_wait_atomic_t(atomic_t *p)
-{
- schedule();
- return 0;
-}
-
/*
* open an RxRPC socket and bind it to be a server for callback notifications
* - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
*/
-int afs_open_socket(void)
+int afs_open_socket(struct afs_net *net)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
@@ -59,28 +45,26 @@ int afs_open_socket(void)
_enter("");
- ret = -ENOMEM;
- afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
- if (!afs_async_calls)
- goto error_0;
-
- ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
+ ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket);
if (ret < 0)
goto error_1;
socket->sk->sk_allocation = GFP_NOFS;
/* bind the callback manager's address to make this a server socket */
+ memset(&srx, 0, sizeof(srx));
srx.srx_family = AF_RXRPC;
srx.srx_service = CM_SERVICE;
srx.transport_type = SOCK_DGRAM;
- srx.transport_len = sizeof(srx.transport.sin);
- srx.transport.sin.sin_family = AF_INET;
- srx.transport.sin.sin_port = htons(AFS_CM_PORT);
- memset(&srx.transport.sin.sin_addr, 0,
- sizeof(srx.transport.sin.sin_addr));
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ if (ret == -EADDRINUSE) {
+ srx.transport.sin6.sin6_port = 0;
+ ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ }
if (ret < 0)
goto error_2;
@@ -91,16 +75,14 @@ int afs_open_socket(void)
if (ret < 0)
goto error_2;
- afs_socket = socket;
- afs_charge_preallocation(NULL);
+ net->socket = socket;
+ afs_charge_preallocation(&net->charge_preallocation_work);
_leave(" = 0");
return 0;
error_2:
sock_release(socket);
error_1:
- destroy_workqueue(afs_async_calls);
-error_0:
_leave(" = %d", ret);
return ret;
}
@@ -108,36 +90,36 @@ error_0:
/*
* close the RxRPC socket AFS was using
*/
-void afs_close_socket(void)
+void afs_close_socket(struct afs_net *net)
{
_enter("");
- kernel_listen(afs_socket, 0);
+ kernel_listen(net->socket, 0);
flush_workqueue(afs_async_calls);
- if (afs_spare_incoming_call) {
- afs_put_call(afs_spare_incoming_call);
- afs_spare_incoming_call = NULL;
+ if (net->spare_incoming_call) {
+ afs_put_call(net->spare_incoming_call);
+ net->spare_incoming_call = NULL;
}
- _debug("outstanding %u", atomic_read(&afs_outstanding_calls));
- wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
+ _debug("outstanding %u", atomic_read(&net->nr_outstanding_calls));
+ wait_on_atomic_t(&net->nr_outstanding_calls, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
_debug("no outstanding calls");
- kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+ kernel_sock_shutdown(net->socket, SHUT_RDWR);
flush_workqueue(afs_async_calls);
- sock_release(afs_socket);
+ sock_release(net->socket);
_debug("dework");
- destroy_workqueue(afs_async_calls);
_leave("");
}
/*
* Allocate a call.
*/
-static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
+static struct afs_call *afs_alloc_call(struct afs_net *net,
+ const struct afs_call_type *type,
gfp_t gfp)
{
struct afs_call *call;
@@ -148,11 +130,13 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
return NULL;
call->type = type;
+ call->net = net;
atomic_set(&call->usage, 1);
INIT_WORK(&call->async_work, afs_process_async_call);
init_waitqueue_head(&call->waitq);
+ spin_lock_init(&call->state_lock);
- o = atomic_inc_return(&afs_outstanding_calls);
+ o = atomic_inc_return(&net->nr_outstanding_calls);
trace_afs_call(call, afs_call_trace_alloc, 1, o,
__builtin_return_address(0));
return call;
@@ -163,8 +147,9 @@ static struct afs_call *afs_alloc_call(const struct afs_call_type *type,
*/
void afs_put_call(struct afs_call *call)
{
+ struct afs_net *net = call->net;
int n = atomic_dec_return(&call->usage);
- int o = atomic_read(&afs_outstanding_calls);
+ int o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(call, afs_call_trace_put, n + 1, o,
__builtin_return_address(0));
@@ -175,20 +160,22 @@ void afs_put_call(struct afs_call *call)
ASSERT(call->type->name != NULL);
if (call->rxcall) {
- rxrpc_kernel_end_call(afs_socket, call->rxcall);
+ rxrpc_kernel_end_call(net->socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
call->type->destructor(call);
+ afs_put_server(call->net, call->cm_server);
+ afs_put_cb_interest(call->net, call->cbi);
kfree(call->request);
kfree(call);
- o = atomic_dec_return(&afs_outstanding_calls);
+ o = atomic_dec_return(&net->nr_outstanding_calls);
trace_afs_call(call, afs_call_trace_free, 0, o,
__builtin_return_address(0));
if (o == 0)
- wake_up_atomic_t(&afs_outstanding_calls);
+ wake_up_atomic_t(&net->nr_outstanding_calls);
}
}
@@ -200,7 +187,7 @@ int afs_queue_call_work(struct afs_call *call)
int u = atomic_inc_return(&call->usage);
trace_afs_call(call, afs_call_trace_work, u,
- atomic_read(&afs_outstanding_calls),
+ atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
INIT_WORK(&call->work, call->type->work);
@@ -213,12 +200,13 @@ int afs_queue_call_work(struct afs_call *call)
/*
* allocate a call with flat request and reply buffers
*/
-struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
+struct afs_call *afs_alloc_flat_call(struct afs_net *net,
+ const struct afs_call_type *type,
size_t request_size, size_t reply_max)
{
struct afs_call *call;
- call = afs_alloc_call(type, GFP_NOFS);
+ call = afs_alloc_call(net, type, GFP_NOFS);
if (!call)
goto nomem_call;
@@ -236,6 +224,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
goto nomem_free;
}
+ call->operation_ID = type->op;
init_waitqueue_head(&call->waitq);
return call;
@@ -300,8 +289,7 @@ static void afs_notify_end_request_tx(struct sock *sock,
{
struct afs_call *call = (struct afs_call *)call_user_ID;
- if (call->state == AFS_CALL_REQUESTING)
- call->state = AFS_CALL_AWAIT_REPLY;
+ afs_set_call_state(call, AFS_CALL_CL_REQUESTING, AFS_CALL_CL_AWAIT_REPLY);
}
/*
@@ -319,11 +307,13 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
do {
afs_load_bvec(call, msg, bv, first, last, offset);
+ trace_afs_send_pages(call, msg, first, last, offset);
+
offset = 0;
bytes = msg->msg_iter.count;
nr = msg->msg_iter.nr_segs;
- ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg,
+ ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg,
bytes, afs_notify_end_request_tx);
for (loop = 0; loop < nr; loop++)
put_page(bv[loop].bv_page);
@@ -333,63 +323,62 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
first += nr;
} while (first <= last);
+ trace_afs_sent_pages(call, call->first, last, first, ret);
return ret;
}
/*
* initiate a call
*/
-int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
- bool async)
+long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
+ gfp_t gfp, bool async)
{
- struct sockaddr_rxrpc srx;
+ struct sockaddr_rxrpc *srx = ac->addr;
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
size_t offset;
s64 tx_total_len;
- u32 abort_code;
int ret;
- _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
+ _enter(",{%pISp},", &srx->transport);
ASSERT(call->type != NULL);
ASSERT(call->type->name != NULL);
_debug("____MAKE %p{%s,%x} [%d]____",
call, call->type->name, key_serial(call->key),
- atomic_read(&afs_outstanding_calls));
+ atomic_read(&call->net->nr_outstanding_calls));
call->async = async;
- memset(&srx, 0, sizeof(srx));
- srx.srx_family = AF_RXRPC;
- srx.srx_service = call->service_id;
- srx.transport_type = SOCK_DGRAM;
- srx.transport_len = sizeof(srx.transport.sin);
- srx.transport.sin.sin_family = AF_INET;
- srx.transport.sin.sin_port = call->port;
- memcpy(&srx.transport.sin.sin_addr, addr, 4);
-
/* Work out the length we're going to transmit. This is awkward for
* calls such as FS.StoreData where there's an extra injection of data
* after the initial fixed part.
*/
tx_total_len = call->request_size;
if (call->send_pages) {
- tx_total_len += call->last_to - call->first_offset;
- tx_total_len += (call->last - call->first) * PAGE_SIZE;
+ if (call->last == call->first) {
+ tx_total_len += call->last_to - call->first_offset;
+ } else {
+ /* It looks mathematically like you should be able to
+ * combine the following lines with the ones above, but
+ * unsigned arithmetic is fun when it wraps...
+ */
+ tx_total_len += PAGE_SIZE - call->first_offset;
+ tx_total_len += call->last_to;
+ tx_total_len += (call->last - call->first - 1) * PAGE_SIZE;
+ }
}
/* create a call */
- rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
+ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
(unsigned long)call,
tx_total_len, gfp,
(async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter),
call->upgrade);
- call->key = NULL;
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
goto error_kill_call;
@@ -409,14 +398,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
msg.msg_controllen = 0;
msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
- /* We have to change the state *before* sending the last packet as
- * rxrpc might give us the reply before it returns from sending the
- * request. Further, if the send fails, we may already have been given
- * a notification and may have collected it.
- */
- if (!call->send_pages)
- call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(afs_socket, rxcall,
+ ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
&msg, call->request_size,
afs_notify_end_request_tx);
if (ret < 0)
@@ -433,22 +415,26 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
if (call->async)
return -EINPROGRESS;
- return afs_wait_for_call_to_complete(call);
+ return afs_wait_for_call_to_complete(call, ac);
error_do_abort:
call->state = AFS_CALL_COMPLETE;
if (ret != -ECONNABORTED) {
- rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
- ret, "KSD");
+ rxrpc_kernel_abort_call(call->net->socket, rxcall,
+ RX_USER_ABORT, ret, "KSD");
} else {
- abort_code = 0;
offset = 0;
- rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
- false, &abort_code, &call->service_id);
- ret = call->type->abort_to_error(abort_code);
+ rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
+ 0, &offset, false, &call->abort_code,
+ &call->service_id);
+ ac->abort_code = call->abort_code;
+ ac->responded = true;
}
+ call->error = ret;
+ trace_afs_call_done(call);
error_kill_call:
afs_put_call(call);
+ ac->error = ret;
_leave(" = %d", ret);
return ret;
}
@@ -458,88 +444,98 @@ error_kill_call:
*/
static void afs_deliver_to_call(struct afs_call *call)
{
- u32 abort_code;
+ enum afs_call_state state;
+ u32 abort_code, remote_abort = 0;
int ret;
_enter("%s", call->type->name);
- while (call->state == AFS_CALL_AWAIT_REPLY ||
- call->state == AFS_CALL_AWAIT_OP_ID ||
- call->state == AFS_CALL_AWAIT_REQUEST ||
- call->state == AFS_CALL_AWAIT_ACK
+ while (state = READ_ONCE(call->state),
+ state == AFS_CALL_CL_AWAIT_REPLY ||
+ state == AFS_CALL_SV_AWAIT_OP_ID ||
+ state == AFS_CALL_SV_AWAIT_REQUEST ||
+ state == AFS_CALL_SV_AWAIT_ACK
) {
- if (call->state == AFS_CALL_AWAIT_ACK) {
+ if (state == AFS_CALL_SV_AWAIT_ACK) {
size_t offset = 0;
- ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+ ret = rxrpc_kernel_recv_data(call->net->socket,
+ call->rxcall,
NULL, 0, &offset, false,
- &call->abort_code,
+ &remote_abort,
&call->service_id);
trace_afs_recv_data(call, 0, offset, false, ret);
if (ret == -EINPROGRESS || ret == -EAGAIN)
return;
- if (ret == 1 || ret < 0) {
- call->state = AFS_CALL_COMPLETE;
- goto done;
+ if (ret < 0 || ret == 1) {
+ if (ret == 1)
+ ret = 0;
+ goto call_complete;
}
return;
}
ret = call->type->deliver(call);
+ state = READ_ONCE(call->state);
switch (ret) {
case 0:
- if (call->state == AFS_CALL_AWAIT_REPLY)
- call->state = AFS_CALL_COMPLETE;
+ if (state == AFS_CALL_CL_PROC_REPLY)
+ goto call_complete;
+ ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY);
goto done;
case -EINPROGRESS:
case -EAGAIN:
goto out;
+ case -EIO:
case -ECONNABORTED:
- goto call_complete;
+ ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+ goto done;
case -ENOTCONN:
abort_code = RX_CALL_DEAD;
- rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret, "KNC");
- goto save_error;
+ goto local_abort;
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
- rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret, "KIV");
- goto save_error;
+ goto local_abort;
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
default:
abort_code = RXGEN_CC_UNMARSHAL;
- if (call->state != AFS_CALL_AWAIT_REPLY)
+ if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
- rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, -EBADMSG, "KUM");
- goto save_error;
+ goto local_abort;
}
}
done:
- if (call->state == AFS_CALL_COMPLETE && call->incoming)
+ if (state == AFS_CALL_COMPLETE && call->incoming)
afs_put_call(call);
out:
_leave("");
return;
-save_error:
- call->error = ret;
+local_abort:
+ abort_code = 0;
call_complete:
- call->state = AFS_CALL_COMPLETE;
+ afs_set_call_complete(call, ret, remote_abort);
+ state = AFS_CALL_COMPLETE;
goto done;
}
/*
* wait synchronously for a call to complete
*/
-static int afs_wait_for_call_to_complete(struct afs_call *call)
+static long afs_wait_for_call_to_complete(struct afs_call *call,
+ struct afs_addr_cursor *ac)
{
signed long rtt2, timeout;
- int ret;
+ long ret;
u64 rtt;
u32 life, last_life;
@@ -547,30 +543,31 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
_enter("");
- rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall);
+ rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
rtt2 = nsecs_to_jiffies64(rtt) * 2;
if (rtt2 < 2)
rtt2 = 2;
timeout = rtt2;
- last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+ last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
add_wait_queue(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
/* deliver any messages that are in the queue */
- if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE) &&
+ call->need_attention) {
call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
continue;
}
- if (call->state == AFS_CALL_COMPLETE)
+ if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+ life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
if (timeout == 0 &&
life == last_life && signal_pending(current))
break;
@@ -587,16 +584,34 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
__set_current_state(TASK_RUNNING);
/* Kill off the call if it's still live. */
- if (call->state < AFS_CALL_COMPLETE) {
+ if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
_debug("call interrupted");
- rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_USER_ABORT, -EINTR, "KWI");
+ if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ RX_USER_ABORT, -EINTR, "KWI"))
+ afs_set_call_complete(call, -EINTR, 0);
+ }
+
+ spin_lock_bh(&call->state_lock);
+ ac->abort_code = call->abort_code;
+ ac->error = call->error;
+ spin_unlock_bh(&call->state_lock);
+
+ ret = ac->error;
+ switch (ret) {
+ case 0:
+ if (call->ret_reply0) {
+ ret = (long)call->reply[0];
+ call->reply[0] = NULL;
+ }
+ /* Fall through */
+ case -ECONNABORTED:
+ ac->responded = true;
+ break;
}
- ret = call->error;
_debug("call complete");
afs_put_call(call);
- _leave(" = %d", ret);
+ _leave(" = %p", (void *)ret);
return ret;
}
@@ -627,7 +642,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
u = __atomic_add_unless(&call->usage, 1, 0);
if (u != 0) {
trace_afs_call(call, afs_call_trace_wake, u,
- atomic_read(&afs_outstanding_calls),
+ atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
if (!queue_work(afs_async_calls, &call->async_work))
@@ -666,7 +681,7 @@ static void afs_process_async_call(struct work_struct *work)
}
if (call->state == AFS_CALL_COMPLETE) {
- call->reply = NULL;
+ call->reply[0] = NULL;
/* We have two refs to release - one from the alloc and one
* queued with the work item - and we can't just deallocate the
@@ -691,22 +706,24 @@ static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
/*
* Charge the incoming call preallocation.
*/
-static void afs_charge_preallocation(struct work_struct *work)
+void afs_charge_preallocation(struct work_struct *work)
{
- struct afs_call *call = afs_spare_incoming_call;
+ struct afs_net *net =
+ container_of(work, struct afs_net, charge_preallocation_work);
+ struct afs_call *call = net->spare_incoming_call;
for (;;) {
if (!call) {
- call = afs_alloc_call(&afs_RXCMxxxx, GFP_KERNEL);
+ call = afs_alloc_call(net, &afs_RXCMxxxx, GFP_KERNEL);
if (!call)
break;
call->async = true;
- call->state = AFS_CALL_AWAIT_OP_ID;
+ call->state = AFS_CALL_SV_AWAIT_OP_ID;
init_waitqueue_head(&call->waitq);
}
- if (rxrpc_kernel_charge_accept(afs_socket,
+ if (rxrpc_kernel_charge_accept(net->socket,
afs_wake_up_async_call,
afs_rx_attach,
(unsigned long)call,
@@ -714,7 +731,7 @@ static void afs_charge_preallocation(struct work_struct *work)
break;
call = NULL;
}
- afs_spare_incoming_call = call;
+ net->spare_incoming_call = call;
}
/*
@@ -735,7 +752,9 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long user_call_ID)
{
- queue_work(afs_wq, &afs_charge_preallocation_work);
+ struct afs_net *net = afs_sock2net(sk);
+
+ queue_work(afs_wq, &net->charge_preallocation_work);
}
/*
@@ -756,7 +775,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
return ret;
call->operation_ID = ntohl(call->tmp);
- call->state = AFS_CALL_AWAIT_REQUEST;
+ afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
call->offset = 0;
/* ask the cache manager to route the call (it'll change the call type
@@ -781,8 +800,7 @@ static void afs_notify_end_reply_tx(struct sock *sock,
{
struct afs_call *call = (struct afs_call *)call_user_ID;
- if (call->state == AFS_CALL_REPLYING)
- call->state = AFS_CALL_AWAIT_ACK;
+ afs_set_call_state(call, AFS_CALL_SV_REPLYING, AFS_CALL_SV_AWAIT_ACK);
}
/*
@@ -790,11 +808,12 @@ static void afs_notify_end_reply_tx(struct sock *sock,
*/
void afs_send_empty_reply(struct afs_call *call)
{
+ struct afs_net *net = call->net;
struct msghdr msg;
_enter("");
- rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0);
+ rxrpc_kernel_set_tx_length(net->socket, call->rxcall, 0);
msg.msg_name = NULL;
msg.msg_namelen = 0;
@@ -803,8 +822,7 @@ void afs_send_empty_reply(struct afs_call *call)
msg.msg_controllen = 0;
msg.msg_flags = 0;
- call->state = AFS_CALL_AWAIT_ACK;
- switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0,
+ switch (rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, 0,
afs_notify_end_reply_tx)) {
case 0:
_leave(" [replied]");
@@ -812,7 +830,7 @@ void afs_send_empty_reply(struct afs_call *call)
case -ENOMEM:
_debug("oom");
- rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ rxrpc_kernel_abort_call(net->socket, call->rxcall,
RX_USER_ABORT, -ENOMEM, "KOO");
default:
_leave(" [error]");
@@ -825,13 +843,14 @@ void afs_send_empty_reply(struct afs_call *call)
*/
void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
{
+ struct afs_net *net = call->net;
struct msghdr msg;
struct kvec iov[1];
int n;
_enter("");
- rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len);
+ rxrpc_kernel_set_tx_length(net->socket, call->rxcall, len);
iov[0].iov_base = (void *) buf;
iov[0].iov_len = len;
@@ -842,8 +861,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
msg.msg_controllen = 0;
msg.msg_flags = 0;
- call->state = AFS_CALL_AWAIT_ACK;
- n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len,
+ n = rxrpc_kernel_send_data(net->socket, call->rxcall, &msg, len,
afs_notify_end_reply_tx);
if (n >= 0) {
/* Success */
@@ -853,7 +871,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
if (n == -ENOMEM) {
_debug("oom");
- rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ rxrpc_kernel_abort_call(net->socket, call->rxcall,
RX_USER_ABORT, -ENOMEM, "KOO");
}
_leave(" [error]");
@@ -865,6 +883,9 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
int afs_extract_data(struct afs_call *call, void *buf, size_t count,
bool want_more)
{
+ struct afs_net *net = call->net;
+ enum afs_call_state state;
+ u32 remote_abort;
int ret;
_enter("{%s,%zu},,%zu,%d",
@@ -872,32 +893,32 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
ASSERTCMP(call->offset, <=, count);
- ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+ ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
buf, count, &call->offset,
- want_more, &call->abort_code,
+ want_more, &remote_abort,
&call->service_id);
trace_afs_recv_data(call, count, call->offset, want_more, ret);
if (ret == 0 || ret == -EAGAIN)
return ret;
+ state = READ_ONCE(call->state);
if (ret == 1) {
- switch (call->state) {
- case AFS_CALL_AWAIT_REPLY:
- call->state = AFS_CALL_COMPLETE;
+ switch (state) {
+ case AFS_CALL_CL_AWAIT_REPLY:
+ afs_set_call_state(call, state, AFS_CALL_CL_PROC_REPLY);
break;
- case AFS_CALL_AWAIT_REQUEST:
- call->state = AFS_CALL_REPLYING;
+ case AFS_CALL_SV_AWAIT_REQUEST:
+ afs_set_call_state(call, state, AFS_CALL_SV_REPLYING);
break;
+ case AFS_CALL_COMPLETE:
+ kdebug("prem complete %d", call->error);
+ return -EIO;
default:
break;
}
return 0;
}
- if (ret == -ECONNABORTED)
- call->error = call->type->abort_to_error(call->abort_code);
- else
- call->error = ret;
- call->state = AFS_CALL_COMPLETE;
+ afs_set_call_complete(call, ret, remote_abort);
return ret;
}
diff --git a/fs/afs/security.c b/fs/afs/security.c
index faca66227ecf..46a881a4d08f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -1,6 +1,6 @@
/* AFS security handling
*
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -14,9 +14,13 @@
#include <linux/fs.h>
#include <linux/ctype.h>
#include <linux/sched.h>
+#include <linux/hashtable.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
+static DEFINE_HASHTABLE(afs_permits_cache, 10);
+static DEFINE_SPINLOCK(afs_permits_lock);
+
/*
* get a key
*/
@@ -46,167 +50,233 @@ struct key *afs_request_key(struct afs_cell *cell)
}
/*
- * dispose of a permits list
+ * Dispose of a list of permits.
*/
-void afs_zap_permits(struct rcu_head *rcu)
+static void afs_permits_rcu(struct rcu_head *rcu)
{
struct afs_permits *permits =
container_of(rcu, struct afs_permits, rcu);
- int loop;
-
- _enter("{%d}", permits->count);
+ int i;
- for (loop = permits->count - 1; loop >= 0; loop--)
- key_put(permits->permits[loop].key);
+ for (i = 0; i < permits->nr_permits; i++)
+ key_put(permits->permits[i].key);
kfree(permits);
}
/*
- * dispose of a permits list in which all the key pointers have been copied
+ * Discard a permission cache.
*/
-static void afs_dispose_of_permits(struct rcu_head *rcu)
+void afs_put_permits(struct afs_permits *permits)
{
- struct afs_permits *permits =
- container_of(rcu, struct afs_permits, rcu);
-
- _enter("{%d}", permits->count);
-
- kfree(permits);
+ if (permits && refcount_dec_and_test(&permits->usage)) {
+ spin_lock(&afs_permits_lock);
+ hash_del_rcu(&permits->hash_node);
+ spin_unlock(&afs_permits_lock);
+ call_rcu(&permits->rcu, afs_permits_rcu);
+ }
}
/*
- * get the authorising vnode - this is the specified inode itself if it's a
- * directory or it's the parent directory if the specified inode is a file or
- * symlink
- * - the caller must release the ref on the inode
+ * Clear a permit cache on callback break.
*/
-static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
- struct key *key)
+void afs_clear_permits(struct afs_vnode *vnode)
{
- struct afs_vnode *auth_vnode;
- struct inode *auth_inode;
+ struct afs_permits *permits;
- _enter("");
+ spin_lock(&vnode->lock);
+ permits = rcu_dereference_protected(vnode->permit_cache,
+ lockdep_is_held(&vnode->lock));
+ RCU_INIT_POINTER(vnode->permit_cache, NULL);
+ vnode->cb_break++;
+ spin_unlock(&vnode->lock);
- if (S_ISDIR(vnode->vfs_inode.i_mode)) {
- auth_inode = igrab(&vnode->vfs_inode);
- ASSERT(auth_inode != NULL);
- } else {
- auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
- &vnode->status.parent, NULL, NULL);
- if (IS_ERR(auth_inode))
- return ERR_CAST(auth_inode);
- }
-
- auth_vnode = AFS_FS_I(auth_inode);
- _leave(" = {%x}", auth_vnode->fid.vnode);
- return auth_vnode;
+ if (permits)
+ afs_put_permits(permits);
}
/*
- * clear the permit cache on a directory vnode
+ * Hash a list of permits. Use simple addition to make it easy to add an extra
+ * one at an as-yet indeterminate position in the list.
*/
-void afs_clear_permits(struct afs_vnode *vnode)
+static void afs_hash_permits(struct afs_permits *permits)
{
- struct afs_permits *permits;
-
- _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+ unsigned long h = permits->nr_permits;
+ int i;
- mutex_lock(&vnode->permits_lock);
- permits = vnode->permits;
- RCU_INIT_POINTER(vnode->permits, NULL);
- mutex_unlock(&vnode->permits_lock);
+ for (i = 0; i < permits->nr_permits; i++) {
+ h += (unsigned long)permits->permits[i].key / sizeof(void *);
+ h += permits->permits[i].access;
+ }
- if (permits)
- call_rcu(&permits->rcu, afs_zap_permits);
- _leave("");
+ permits->h = h;
}
/*
- * add the result obtained for a vnode to its or its parent directory's cache
- * for the key used to access it
+ * Cache the CallerAccess result obtained from doing a fileserver operation
+ * that returned a vnode status for a particular key. If a callback break
+ * occurs whilst the operation was in progress then we have to ditch the cache
+ * as the ACL *may* have changed.
*/
-void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
+void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
+ unsigned int cb_break)
{
- struct afs_permits *permits, *xpermits;
- struct afs_permit *permit;
- struct afs_vnode *auth_vnode;
- int count, loop;
+ struct afs_permits *permits, *xpermits, *replacement, *new = NULL;
+ afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
+ size_t size = 0;
+ bool changed = false;
+ int i, j;
+
+ _enter("{%x:%u},%x,%x",
+ vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
+
+ rcu_read_lock();
+
+ /* Check for the common case first: We got back the same access as last
+ * time we tried and already have it recorded.
+ */
+ permits = rcu_dereference(vnode->permit_cache);
+ if (permits) {
+ if (!permits->invalidated) {
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (permits->permits[i].key < key)
+ continue;
+ if (permits->permits[i].key > key)
+ break;
+ if (permits->permits[i].access != caller_access) {
+ changed = true;
+ break;
+ }
- _enter("{%x:%u},%x,%lx",
- vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order);
+ if (cb_break != (vnode->cb_break +
+ vnode->cb_interest->server->cb_s_break)) {
+ changed = true;
+ break;
+ }
- auth_vnode = afs_get_auth_inode(vnode, key);
- if (IS_ERR(auth_vnode)) {
- _leave(" [get error %ld]", PTR_ERR(auth_vnode));
- return;
- }
+ /* The cache is still good. */
+ rcu_read_unlock();
+ return;
+ }
+ }
+
+ changed |= permits->invalidated;
+ size = permits->nr_permits;
- mutex_lock(&auth_vnode->permits_lock);
+ /* If this set of permits is now wrong, clear the permits
+ * pointer so that no one tries to use the stale information.
+ */
+ if (changed) {
+ spin_lock(&vnode->lock);
+ if (permits != rcu_access_pointer(vnode->permit_cache))
+ goto someone_else_changed_it_unlock;
+ RCU_INIT_POINTER(vnode->permit_cache, NULL);
+ spin_unlock(&vnode->lock);
+
+ afs_put_permits(permits);
+ permits = NULL;
+ size = 0;
+ }
+ }
- /* guard against a rename being detected whilst we waited for the
- * lock */
- if (memcmp(&auth_vnode->fid, &vnode->status.parent,
- sizeof(struct afs_fid)) != 0) {
- _debug("renamed");
- goto out_unlock;
+ if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) {
+ rcu_read_unlock();
+ goto someone_else_changed_it;
}
- /* have to be careful as the directory's callback may be broken between
- * us receiving the status we're trying to cache and us getting the
- * lock to update the cache for the status */
- if (auth_vnode->acl_order - acl_order > 0) {
- _debug("ACL changed?");
- goto out_unlock;
+ /* We need a ref on any permits list we want to copy as we'll have to
+ * drop the lock to do memory allocation.
+ */
+ if (permits && !refcount_inc_not_zero(&permits->usage)) {
+ rcu_read_unlock();
+ goto someone_else_changed_it;
}
- /* always update the anonymous mask */
- _debug("anon access %x", vnode->status.anon_access);
- auth_vnode->status.anon_access = vnode->status.anon_access;
- if (key == vnode->volume->cell->anonymous_key)
- goto out_unlock;
-
- xpermits = auth_vnode->permits;
- count = 0;
- if (xpermits) {
- /* see if the permit is already in the list
- * - if it is then we just amend the list
- */
- count = xpermits->count;
- permit = xpermits->permits;
- for (loop = count; loop > 0; loop--) {
- if (permit->key == key) {
- permit->access_mask =
- vnode->status.caller_access;
- goto out_unlock;
+ rcu_read_unlock();
+
+ /* Speculatively create a new list with the revised permission set. We
+ * discard this if we find an extant match already in the hash, but
+ * it's easier to compare with memcmp this way.
+ *
+ * We fill in the key pointers at this time, but we don't get the refs
+ * yet.
+ */
+ size++;
+ new = kzalloc(sizeof(struct afs_permits) +
+ sizeof(struct afs_permit) * size, GFP_NOFS);
+ if (!new)
+ return;
+
+ refcount_set(&new->usage, 1);
+ new->nr_permits = size;
+ i = j = 0;
+ if (permits) {
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (j == i && permits->permits[i].key > key) {
+ new->permits[j].key = key;
+ new->permits[j].access = caller_access;
+ j++;
}
- permit++;
+ new->permits[j].key = permits->permits[i].key;
+ new->permits[j].access = permits->permits[i].access;
+ j++;
+ }
+ }
+
+ if (j == i) {
+ new->permits[j].key = key;
+ new->permits[j].access = caller_access;
+ }
+
+ afs_hash_permits(new);
+
+ afs_put_permits(permits);
+
+ /* Now see if the permit list we want is actually already available */
+ spin_lock(&afs_permits_lock);
+
+ hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) {
+ if (xpermits->h != new->h ||
+ xpermits->invalidated ||
+ xpermits->nr_permits != new->nr_permits ||
+ memcmp(xpermits->permits, new->permits,
+ new->nr_permits * sizeof(struct afs_permit)) != 0)
+ continue;
+
+ if (refcount_inc_not_zero(&xpermits->usage)) {
+ replacement = xpermits;
+ goto found;
}
+
+ break;
}
- permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
- GFP_NOFS);
- if (!permits)
- goto out_unlock;
-
- if (xpermits)
- memcpy(permits->permits, xpermits->permits,
- count * sizeof(struct afs_permit));
-
- _debug("key %x access %x",
- key_serial(key), vnode->status.caller_access);
- permits->permits[count].access_mask = vnode->status.caller_access;
- permits->permits[count].key = key_get(key);
- permits->count = count + 1;
-
- rcu_assign_pointer(auth_vnode->permits, permits);
- if (xpermits)
- call_rcu(&xpermits->rcu, afs_dispose_of_permits);
-
-out_unlock:
- mutex_unlock(&auth_vnode->permits_lock);
- iput(&auth_vnode->vfs_inode);
- _leave("");
+ for (i = 0; i < new->nr_permits; i++)
+ key_get(new->permits[i].key);
+ hash_add_rcu(afs_permits_cache, &new->hash_node, new->h);
+ replacement = new;
+ new = NULL;
+
+found:
+ spin_unlock(&afs_permits_lock);
+
+ kfree(new);
+
+ spin_lock(&vnode->lock);
+ if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) ||
+ permits != rcu_access_pointer(vnode->permit_cache))
+ goto someone_else_changed_it_unlock;
+ rcu_assign_pointer(vnode->permit_cache, replacement);
+ spin_unlock(&vnode->lock);
+ afs_put_permits(permits);
+ return;
+
+someone_else_changed_it_unlock:
+ spin_unlock(&vnode->lock);
+someone_else_changed_it:
+ /* Someone else changed the cache under us - don't recheck at this
+ * time.
+ */
+ return;
}
/*
@@ -218,56 +288,45 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
afs_access_t *_access)
{
struct afs_permits *permits;
- struct afs_permit *permit;
- struct afs_vnode *auth_vnode;
- bool valid;
- int loop, ret;
+ bool valid = false;
+ int i, ret;
_enter("{%x:%u},%x",
vnode->fid.vid, vnode->fid.vnode, key_serial(key));
- auth_vnode = afs_get_auth_inode(vnode, key);
- if (IS_ERR(auth_vnode)) {
- *_access = 0;
- _leave(" = %ld", PTR_ERR(auth_vnode));
- return PTR_ERR(auth_vnode);
- }
-
- ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
+ permits = vnode->permit_cache;
/* check the permits to see if we've got one yet */
- if (key == auth_vnode->volume->cell->anonymous_key) {
+ if (key == vnode->volume->cell->anonymous_key) {
_debug("anon");
- *_access = auth_vnode->status.anon_access;
+ *_access = vnode->status.anon_access;
valid = true;
} else {
- valid = false;
rcu_read_lock();
- permits = rcu_dereference(auth_vnode->permits);
+ permits = rcu_dereference(vnode->permit_cache);
if (permits) {
- permit = permits->permits;
- for (loop = permits->count; loop > 0; loop--) {
- if (permit->key == key) {
- _debug("found in cache");
- *_access = permit->access_mask;
- valid = true;
+ for (i = 0; i < permits->nr_permits; i++) {
+ if (permits->permits[i].key < key)
+ continue;
+ if (permits->permits[i].key > key)
break;
- }
- permit++;
+
+ *_access = permits->permits[i].access;
+ valid = !permits->invalidated;
+ break;
}
}
rcu_read_unlock();
}
if (!valid) {
- /* check the status on the file we're actually interested in
- * (the post-processing will cache the result on auth_vnode) */
+ /* Check the status on the file we're actually interested in
+ * (the post-processing will cache the result).
+ */
_debug("no valid permit");
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
+ ret = afs_fetch_status(vnode, key);
if (ret < 0) {
- iput(&auth_vnode->vfs_inode);
*_access = 0;
_leave(" = %d", ret);
return ret;
@@ -275,7 +334,6 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
*_access = vnode->status.caller_access;
}
- iput(&auth_vnode->vfs_inode);
_leave(" = 0 [access %x]", *_access);
return 0;
}
@@ -304,14 +362,9 @@ int afs_permission(struct inode *inode, int mask)
return PTR_ERR(key);
}
- /* if the promise has expired, we need to check the server again */
- if (!vnode->cb_promised) {
- _debug("not promised");
- ret = afs_vnode_fetch_status(vnode, NULL, key);
- if (ret < 0)
- goto error;
- _debug("new promise [fl=%lx]", vnode->flags);
- }
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error;
/* check the permits to see if we've got one yet */
ret = afs_check_permit(vnode, key, &access);
@@ -365,3 +418,12 @@ error:
_leave(" = %d", ret);
return ret;
}
+
+void __exit afs_clean_up_permit_cache(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE(afs_permits_cache); i++)
+ WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i]));
+
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index c001b1f2455f..1880f1b6a9f1 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -11,317 +11,689 @@
#include <linux/sched.h>
#include <linux/slab.h>
+#include "afs_fs.h"
#include "internal.h"
-static unsigned afs_server_timeout = 10; /* server timeout in seconds */
+static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
+static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
-static void afs_reap_server(struct work_struct *);
+static void afs_inc_servers_outstanding(struct afs_net *net)
+{
+ atomic_inc(&net->servers_outstanding);
+}
+
+static void afs_dec_servers_outstanding(struct afs_net *net)
+{
+ if (atomic_dec_and_test(&net->servers_outstanding))
+ wake_up_atomic_t(&net->servers_outstanding);
+}
+
+/*
+ * Find a server by one of its addresses.
+ */
+struct afs_server *afs_find_server(struct afs_net *net,
+ const struct sockaddr_rxrpc *srx)
+{
+ const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
+ const struct afs_addr_list *alist;
+ struct afs_server *server = NULL;
+ unsigned int i;
+ bool ipv6 = true;
+ int seq = 0, diff;
+
+ if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
+ srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
+ srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
+ ipv6 = false;
+
+ rcu_read_lock();
+
+ do {
+ if (server)
+ afs_put_server(net, server);
+ server = NULL;
+ read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
+
+ if (ipv6) {
+ hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+ alist = rcu_dereference(server->addresses);
+ for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
+ b = &alist->addrs[i].transport.sin6;
+ diff = (u16)a->sin6_port - (u16)b->sin6_port;
+ if (diff == 0)
+ diff = memcmp(&a->sin6_addr,
+ &b->sin6_addr,
+ sizeof(struct in6_addr));
+ if (diff == 0)
+ goto found;
+ if (diff < 0) {
+ // TODO: Sort the list
+ //if (i == alist->nr_ipv4)
+ // goto not_found;
+ break;
+ }
+ }
+ }
+ } else {
+ hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
+ alist = rcu_dereference(server->addresses);
+ for (i = 0; i < alist->nr_ipv4; i++) {
+ b = &alist->addrs[i].transport.sin6;
+ diff = (u16)a->sin6_port - (u16)b->sin6_port;
+ if (diff == 0)
+ diff = ((u32)a->sin6_addr.s6_addr32[3] -
+ (u32)b->sin6_addr.s6_addr32[3]);
+ if (diff == 0)
+ goto found;
+ if (diff < 0) {
+ // TODO: Sort the list
+ //if (i == 0)
+ // goto not_found;
+ break;
+ }
+ }
+ }
+ }
+
+ //not_found:
+ server = NULL;
+ found:
+ if (server && !atomic_inc_not_zero(&server->usage))
+ server = NULL;
+
+ } while (need_seqretry(&net->fs_addr_lock, seq));
-/* tree of all the servers, indexed by IP address */
-static struct rb_root afs_servers = RB_ROOT;
-static DEFINE_RWLOCK(afs_servers_lock);
+ done_seqretry(&net->fs_addr_lock, seq);
-/* LRU list of all the servers not currently in use */
-static LIST_HEAD(afs_server_graveyard);
-static DEFINE_SPINLOCK(afs_server_graveyard_lock);
-static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
+ rcu_read_unlock();
+ return server;
+}
/*
- * install a server record in the master tree
+ * Look up a server by its UUID
*/
-static int afs_install_server(struct afs_server *server)
+struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
{
- struct afs_server *xserver;
+ struct afs_server *server = NULL;
+ struct rb_node *p;
+ int diff, seq = 0;
+
+ _enter("%pU", uuid);
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ if (server)
+ afs_put_server(net, server);
+ server = NULL;
+
+ read_seqbegin_or_lock(&net->fs_lock, &seq);
+
+ p = net->fs_servers.rb_node;
+ while (p) {
+ server = rb_entry(p, struct afs_server, uuid_rb);
+
+ diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
+ if (diff < 0) {
+ p = p->rb_left;
+ } else if (diff > 0) {
+ p = p->rb_right;
+ } else {
+ afs_get_server(server);
+ break;
+ }
+
+ server = NULL;
+ }
+ } while (need_seqretry(&net->fs_lock, seq));
+
+ done_seqretry(&net->fs_lock, seq);
+
+ _leave(" = %p", server);
+ return server;
+}
+
+/*
+ * Install a server record in the namespace tree
+ */
+static struct afs_server *afs_install_server(struct afs_net *net,
+ struct afs_server *candidate)
+{
+ const struct afs_addr_list *alist;
+ struct afs_server *server;
struct rb_node **pp, *p;
- int ret;
+ int ret = -EEXIST, diff;
- _enter("%p", server);
+ _enter("%p", candidate);
- write_lock(&afs_servers_lock);
+ write_seqlock(&net->fs_lock);
- ret = -EEXIST;
- pp = &afs_servers.rb_node;
+ /* Firstly install the server in the UUID lookup tree */
+ pp = &net->fs_servers.rb_node;
p = NULL;
while (*pp) {
p = *pp;
_debug("- consider %p", p);
- xserver = rb_entry(p, struct afs_server, master_rb);
- if (server->addr.s_addr < xserver->addr.s_addr)
+ server = rb_entry(p, struct afs_server, uuid_rb);
+ diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
+ if (diff < 0)
pp = &(*pp)->rb_left;
- else if (server->addr.s_addr > xserver->addr.s_addr)
+ else if (diff > 0)
pp = &(*pp)->rb_right;
else
- goto error;
+ goto exists;
}
- rb_link_node(&server->master_rb, p, pp);
- rb_insert_color(&server->master_rb, &afs_servers);
+ server = candidate;
+ rb_link_node(&server->uuid_rb, p, pp);
+ rb_insert_color(&server->uuid_rb, &net->fs_servers);
+ hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
+
+ write_seqlock(&net->fs_addr_lock);
+ alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&net->fs_addr_lock.lock));
+
+ /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
+ * it in the IPv4 and/or IPv6 reverse-map lists.
+ *
+ * TODO: For speed we want to use something other than a flat list
+ * here; even sorting the list in terms of lowest address would help a
+ * bit, but anything we might want to do gets messy and memory
+ * intensive.
+ */
+ if (alist->nr_ipv4 > 0)
+ hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
+ if (alist->nr_addrs > alist->nr_ipv4)
+ hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
+
+ write_sequnlock(&net->fs_addr_lock);
ret = 0;
-error:
- write_unlock(&afs_servers_lock);
- return ret;
+exists:
+ afs_get_server(server);
+ write_sequnlock(&net->fs_lock);
+ return server;
}
/*
* allocate a new server record
*/
-static struct afs_server *afs_alloc_server(struct afs_cell *cell,
- const struct in_addr *addr)
+static struct afs_server *afs_alloc_server(struct afs_net *net,
+ const uuid_t *uuid,
+ struct afs_addr_list *alist)
{
struct afs_server *server;
_enter("");
server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
- if (server) {
- atomic_set(&server->usage, 1);
- server->cell = cell;
-
- INIT_LIST_HEAD(&server->link);
- INIT_LIST_HEAD(&server->grave);
- init_rwsem(&server->sem);
- spin_lock_init(&server->fs_lock);
- server->fs_vnodes = RB_ROOT;
- server->cb_promises = RB_ROOT;
- spin_lock_init(&server->cb_lock);
- init_waitqueue_head(&server->cb_break_waitq);
- INIT_DELAYED_WORK(&server->cb_break_work,
- afs_dispatch_give_up_callbacks);
-
- memcpy(&server->addr, addr, sizeof(struct in_addr));
- server->addr.s_addr = addr->s_addr;
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
- } else {
- _leave(" = NULL [nomem]");
- }
+ if (!server)
+ goto enomem;
+
+ atomic_set(&server->usage, 1);
+ RCU_INIT_POINTER(server->addresses, alist);
+ server->addr_version = alist->version;
+ server->uuid = *uuid;
+ server->flags = (1UL << AFS_SERVER_FL_NEW);
+ server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
+ rwlock_init(&server->fs_lock);
+ INIT_LIST_HEAD(&server->cb_interests);
+ rwlock_init(&server->cb_break_lock);
+
+ afs_inc_servers_outstanding(net);
+ _leave(" = %p", server);
return server;
+
+enomem:
+ _leave(" = NULL [nomem]");
+ return NULL;
+}
+
+/*
+ * Look up an address record for a server
+ */
+static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
+ struct key *key, const uuid_t *uuid)
+{
+ struct afs_addr_cursor ac;
+ struct afs_addr_list *alist;
+ int ret;
+
+ ret = afs_set_vl_cursor(&ac, cell);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ while (afs_iterate_addresses(&ac)) {
+ if (test_bit(ac.index, &ac.alist->yfs))
+ alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
+ else
+ alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
+ switch (ac.error) {
+ case 0:
+ afs_end_cursor(&ac);
+ return alist;
+ case -ECONNABORTED:
+ ac.error = afs_abort_to_error(ac.abort_code);
+ goto error;
+ case -ENOMEM:
+ case -ENONET:
+ goto error;
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ break;
+ default:
+ ac.error = -EIO;
+ goto error;
+ }
+ }
+
+error:
+ return ERR_PTR(afs_end_cursor(&ac));
}
/*
- * get an FS-server record for a cell
+ * Get or create a fileserver record.
*/
-struct afs_server *afs_lookup_server(struct afs_cell *cell,
- const struct in_addr *addr)
+struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
+ const uuid_t *uuid)
{
+ struct afs_addr_list *alist;
struct afs_server *server, *candidate;
- _enter("%p,%pI4", cell, &addr->s_addr);
+ _enter("%p,%pU", cell->net, uuid);
- /* quick scan of the list to see if we already have the server */
- read_lock(&cell->servers_lock);
+ server = afs_find_server_by_uuid(cell->net, uuid);
+ if (server)
+ return server;
- list_for_each_entry(server, &cell->servers, link) {
- if (server->addr.s_addr == addr->s_addr)
- goto found_server_quickly;
- }
- read_unlock(&cell->servers_lock);
+ alist = afs_vl_lookup_addrs(cell, key, uuid);
+ if (IS_ERR(alist))
+ return ERR_CAST(alist);
- candidate = afs_alloc_server(cell, addr);
+ candidate = afs_alloc_server(cell->net, uuid, alist);
if (!candidate) {
- _leave(" = -ENOMEM");
+ afs_put_addrlist(alist);
return ERR_PTR(-ENOMEM);
}
- write_lock(&cell->servers_lock);
-
- /* check the cell's server list again */
- list_for_each_entry(server, &cell->servers, link) {
- if (server->addr.s_addr == addr->s_addr)
- goto found_server;
+ server = afs_install_server(cell->net, candidate);
+ if (server != candidate) {
+ afs_put_addrlist(alist);
+ kfree(candidate);
}
- _debug("new");
- server = candidate;
- if (afs_install_server(server) < 0)
- goto server_in_two_cells;
-
- afs_get_cell(cell);
- list_add_tail(&server->link, &cell->servers);
-
- write_unlock(&cell->servers_lock);
_leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
+}
- /* found a matching server quickly */
-found_server_quickly:
- _debug("found quickly");
- afs_get_server(server);
- read_unlock(&cell->servers_lock);
-no_longer_unused:
- if (!list_empty(&server->grave)) {
- spin_lock(&afs_server_graveyard_lock);
- list_del_init(&server->grave);
- spin_unlock(&afs_server_graveyard_lock);
+/*
+ * Set the server timer to fire after a given delay, assuming it's not already
+ * set for an earlier time.
+ */
+static void afs_set_server_timer(struct afs_net *net, time64_t delay)
+{
+ if (net->live) {
+ afs_inc_servers_outstanding(net);
+ if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
+ afs_dec_servers_outstanding(net);
}
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
- return server;
+}
- /* found a matching server on the second pass */
-found_server:
- _debug("found");
- afs_get_server(server);
- write_unlock(&cell->servers_lock);
- kfree(candidate);
- goto no_longer_unused;
-
- /* found a server that seems to be in two cells */
-server_in_two_cells:
- write_unlock(&cell->servers_lock);
- kfree(candidate);
- printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
- addr);
- _leave(" = -EEXIST");
- return ERR_PTR(-EEXIST);
+/*
+ * Server management timer. We have an increment on fs_outstanding that we
+ * need to pass along to the work item.
+ */
+void afs_servers_timer(struct timer_list *timer)
+{
+ struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
+
+ _enter("");
+ if (!queue_work(afs_wq, &net->fs_manager))
+ afs_dec_servers_outstanding(net);
}
/*
- * look up a server by its IP address
+ * Release a reference on a server record.
*/
-struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
+void afs_put_server(struct afs_net *net, struct afs_server *server)
{
- struct afs_server *server = NULL;
- struct rb_node *p;
- struct in_addr addr = srx->transport.sin.sin_addr;
+ unsigned int usage;
- _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr);
+ if (!server)
+ return;
- if (srx->transport.family != AF_INET) {
- WARN(true, "AFS does not yes support non-IPv4 addresses\n");
- return NULL;
- }
+ server->put_time = ktime_get_real_seconds();
- read_lock(&afs_servers_lock);
+ usage = atomic_dec_return(&server->usage);
- p = afs_servers.rb_node;
- while (p) {
- server = rb_entry(p, struct afs_server, master_rb);
+ _enter("{%u}", usage);
- _debug("- consider %p", p);
+ if (likely(usage > 0))
+ return;
- if (addr.s_addr < server->addr.s_addr) {
- p = p->rb_left;
- } else if (addr.s_addr > server->addr.s_addr) {
- p = p->rb_right;
- } else {
- afs_get_server(server);
- goto found;
- }
- }
+ afs_set_server_timer(net, afs_server_gc_delay);
+}
- server = NULL;
-found:
- read_unlock(&afs_servers_lock);
- ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
- _leave(" = %p", server);
- return server;
+static void afs_server_rcu(struct rcu_head *rcu)
+{
+ struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+
+ afs_put_addrlist(server->addresses);
+ kfree(server);
}
/*
- * destroy a server record
- * - removes from the cell list
+ * destroy a dead server
*/
-void afs_put_server(struct afs_server *server)
+static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
{
- if (!server)
- return;
+ struct afs_addr_list *alist = server->addresses;
+ struct afs_addr_cursor ac = {
+ .alist = alist,
+ .addr = &alist->addrs[0],
+ .start = alist->index,
+ .index = alist->index,
+ .error = 0,
+ };
+ _enter("%p", server);
- _enter("%p{%d}", server, atomic_read(&server->usage));
+ afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ call_rcu(&server->rcu, afs_server_rcu);
+ afs_dec_servers_outstanding(net);
+}
- _debug("PUT SERVER %d", atomic_read(&server->usage));
+/*
+ * Garbage collect any expired servers.
+ */
+static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
+{
+ struct afs_server *server;
+ bool deleted;
+ int usage;
+
+ while ((server = gc_list)) {
+ gc_list = server->gc_next;
+
+ write_seqlock(&net->fs_lock);
+ usage = 1;
+ deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
+ if (deleted) {
+ rb_erase(&server->uuid_rb, &net->fs_servers);
+ hlist_del_rcu(&server->proc_link);
+ }
+ write_sequnlock(&net->fs_lock);
- ASSERTCMP(atomic_read(&server->usage), >, 0);
+ if (deleted)
+ afs_destroy_server(net, server);
+ }
+}
- if (likely(!atomic_dec_and_test(&server->usage))) {
- _leave("");
- return;
+/*
+ * Manage the records of servers known to be within a network namespace. This
+ * includes garbage collecting unused servers.
+ *
+ * Note also that we were given an increment on net->servers_outstanding by
+ * whoever queued us that we need to deal with before returning.
+ */
+void afs_manage_servers(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, fs_manager);
+ struct afs_server *gc_list = NULL;
+ struct rb_node *cursor;
+ time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
+ bool purging = !net->live;
+
+ _enter("");
+
+ /* Trawl the server list looking for servers that have expired from
+ * lack of use.
+ */
+ read_seqlock_excl(&net->fs_lock);
+
+ for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
+ struct afs_server *server =
+ rb_entry(cursor, struct afs_server, uuid_rb);
+ int usage = atomic_read(&server->usage);
+
+ _debug("manage %pU %u", &server->uuid, usage);
+
+ ASSERTCMP(usage, >=, 1);
+ ASSERTIFCMP(purging, usage, ==, 1);
+
+ if (usage == 1) {
+ time64_t expire_at = server->put_time;
+
+ if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
+ !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
+ expire_at += afs_server_gc_delay;
+ if (purging || expire_at <= now) {
+ server->gc_next = gc_list;
+ gc_list = server;
+ } else if (expire_at < next_manage) {
+ next_manage = expire_at;
+ }
+ }
}
- afs_flush_callback_breaks(server);
+ read_sequnlock_excl(&net->fs_lock);
+
+ /* Update the timer on the way out. We have to pass an increment on
+ * servers_outstanding in the namespace that we are in to the timer or
+ * the work scheduler.
+ */
+ if (!purging && next_manage < TIME64_MAX) {
+ now = ktime_get_real_seconds();
- spin_lock(&afs_server_graveyard_lock);
- if (atomic_read(&server->usage) == 0) {
- list_move_tail(&server->grave, &afs_server_graveyard);
- server->time_of_death = ktime_get_real_seconds();
- queue_delayed_work(afs_wq, &afs_server_reaper,
- afs_server_timeout * HZ);
+ if (next_manage - now <= 0) {
+ if (queue_work(afs_wq, &net->fs_manager))
+ afs_inc_servers_outstanding(net);
+ } else {
+ afs_set_server_timer(net, next_manage - now);
+ }
}
- spin_unlock(&afs_server_graveyard_lock);
- _leave(" [dead]");
+
+ afs_gc_servers(net, gc_list);
+
+ afs_dec_servers_outstanding(net);
+ _leave(" [%d]", atomic_read(&net->servers_outstanding));
+}
+
+static void afs_queue_server_manager(struct afs_net *net)
+{
+ afs_inc_servers_outstanding(net);
+ if (!queue_work(afs_wq, &net->fs_manager))
+ afs_dec_servers_outstanding(net);
}
/*
- * destroy a dead server
+ * Purge list of servers.
*/
-static void afs_destroy_server(struct afs_server *server)
+void afs_purge_servers(struct afs_net *net)
{
- _enter("%p", server);
+ _enter("");
- ASSERTIF(server->cb_break_head != server->cb_break_tail,
- delayed_work_pending(&server->cb_break_work));
+ if (del_timer_sync(&net->fs_timer))
+ atomic_dec(&net->servers_outstanding);
- ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
- ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
- ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
- ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
+ afs_queue_server_manager(net);
- afs_put_cell(server->cell);
- kfree(server);
+ _debug("wait");
+ wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
+ TASK_UNINTERRUPTIBLE);
+ _leave("");
}
/*
- * reap dead server records
+ * Probe a fileserver to find its capabilities.
+ *
+ * TODO: Try service upgrade.
*/
-static void afs_reap_server(struct work_struct *work)
+static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
{
- LIST_HEAD(corpses);
- struct afs_server *server;
- unsigned long delay, expiry;
- time64_t now;
-
- now = ktime_get_real_seconds();
- spin_lock(&afs_server_graveyard_lock);
-
- while (!list_empty(&afs_server_graveyard)) {
- server = list_entry(afs_server_graveyard.next,
- struct afs_server, grave);
+ _enter("");
- /* the queue is ordered most dead first */
- expiry = server->time_of_death + afs_server_timeout;
- if (expiry > now) {
- delay = (expiry - now) * HZ;
- mod_delayed_work(afs_wq, &afs_server_reaper, delay);
+ fc->ac.addr = NULL;
+ fc->ac.start = READ_ONCE(fc->ac.alist->index);
+ fc->ac.index = fc->ac.start;
+ fc->ac.error = 0;
+ fc->ac.begun = false;
+
+ while (afs_iterate_addresses(&fc->ac)) {
+ afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
+ &fc->ac, fc->key);
+ switch (fc->ac.error) {
+ case 0:
+ afs_end_cursor(&fc->ac);
+ set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
+ return true;
+ case -ECONNABORTED:
+ fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+ goto error;
+ case -ENOMEM:
+ case -ENONET:
+ goto error;
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
break;
+ default:
+ fc->ac.error = -EIO;
+ goto error;
}
+ }
- write_lock(&server->cell->servers_lock);
- write_lock(&afs_servers_lock);
- if (atomic_read(&server->usage) > 0) {
- list_del_init(&server->grave);
- } else {
- list_move_tail(&server->grave, &corpses);
- list_del_init(&server->link);
- rb_erase(&server->master_rb, &afs_servers);
- }
- write_unlock(&afs_servers_lock);
- write_unlock(&server->cell->servers_lock);
+error:
+ afs_end_cursor(&fc->ac);
+ return false;
+}
+
+/*
+ * If we haven't already, try probing the fileserver to get its capabilities.
+ * We try not to instigate parallel probes, but it's possible that the parallel
+ * probes will fail due to authentication failure when ours would succeed.
+ *
+ * TODO: Try sending an anonymous probe if an authenticated probe fails.
+ */
+bool afs_probe_fileserver(struct afs_fs_cursor *fc)
+{
+ bool success;
+ int ret, retries = 0;
+
+ _enter("");
+
+retry:
+ if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
+ _leave(" = t");
+ return true;
}
- spin_unlock(&afs_server_graveyard_lock);
+ if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
+ success = afs_do_probe_fileserver(fc);
+ clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
+ wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
+ _leave(" = t");
+ return success;
+ }
+
+ _debug("wait");
+ ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
+ TASK_INTERRUPTIBLE);
+ if (ret == -ERESTARTSYS) {
+ fc->ac.error = ret;
+ _leave(" = f [%d]", ret);
+ return false;
+ }
- /* now reap the corpses we've extracted */
- while (!list_empty(&corpses)) {
- server = list_entry(corpses.next, struct afs_server, grave);
- list_del(&server->grave);
- afs_destroy_server(server);
+ retries++;
+ if (retries == 4) {
+ fc->ac.error = -ESTALE;
+ _leave(" = f [stale]");
+ return false;
}
+ _debug("retry");
+ goto retry;
}
/*
- * discard all the server records for rmmod
+ * Get an update for a server's address list.
*/
-void __exit afs_purge_servers(void)
+static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
{
- afs_server_timeout = 0;
- mod_delayed_work(afs_wq, &afs_server_reaper, 0);
+ struct afs_addr_list *alist, *discard;
+
+ _enter("");
+
+ alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
+ &server->uuid);
+ if (IS_ERR(alist)) {
+ fc->ac.error = PTR_ERR(alist);
+ _leave(" = f [%d]", fc->ac.error);
+ return false;
+ }
+
+ discard = alist;
+ if (server->addr_version != alist->version) {
+ write_lock(&server->fs_lock);
+ discard = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ rcu_assign_pointer(server->addresses, alist);
+ server->addr_version = alist->version;
+ write_unlock(&server->fs_lock);
+ }
+
+ server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
+ afs_put_addrlist(discard);
+ _leave(" = t");
+ return true;
+}
+
+/*
+ * See if a server's address list needs updating.
+ */
+bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
+{
+ time64_t now = ktime_get_real_seconds();
+ long diff;
+ bool success;
+ int ret, retries = 0;
+
+ _enter("");
+
+ ASSERT(server);
+
+retry:
+ diff = READ_ONCE(server->update_at) - now;
+ if (diff > 0) {
+ _leave(" = t [not now %ld]", diff);
+ return true;
+ }
+
+ if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
+ success = afs_update_server_record(fc, server);
+ clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
+ wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
+ _leave(" = %d", success);
+ return success;
+ }
+
+ ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
+ TASK_INTERRUPTIBLE);
+ if (ret == -ERESTARTSYS) {
+ fc->ac.error = ret;
+ _leave(" = f [intr]");
+ return false;
+ }
+
+ retries++;
+ if (retries == 4) {
+ _leave(" = f [stale]");
+ ret = -ESTALE;
+ return false;
+ }
+ goto retry;
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
new file mode 100644
index 000000000000..26bad7032bba
--- /dev/null
+++ b/fs/afs/server_list.c
@@ -0,0 +1,153 @@
+/* AFS fileserver list management.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
+{
+ int i;
+
+ if (refcount_dec_and_test(&slist->usage)) {
+ for (i = 0; i < slist->nr_servers; i++) {
+ afs_put_cb_interest(net, slist->servers[i].cb_interest);
+ afs_put_server(net, slist->servers[i].server);
+ }
+ kfree(slist);
+ }
+}
+
+/*
+ * Build a server list from a VLDB record.
+ */
+struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
+ struct key *key,
+ struct afs_vldb_entry *vldb,
+ u8 type_mask)
+{
+ struct afs_server_list *slist;
+ struct afs_server *server;
+ int ret = -ENOMEM, nr_servers = 0, i, j;
+
+ for (i = 0; i < vldb->nr_servers; i++)
+ if (vldb->fs_mask[i] & type_mask)
+ nr_servers++;
+
+ slist = kzalloc(sizeof(struct afs_server_list) +
+ sizeof(struct afs_server_entry) * nr_servers,
+ GFP_KERNEL);
+ if (!slist)
+ goto error;
+
+ refcount_set(&slist->usage, 1);
+
+ /* Make sure a records exists for each server in the list. */
+ for (i = 0; i < vldb->nr_servers; i++) {
+ if (!(vldb->fs_mask[i] & type_mask))
+ continue;
+
+ server = afs_lookup_server(cell, key, &vldb->fs_server[i]);
+ if (IS_ERR(server)) {
+ ret = PTR_ERR(server);
+ if (ret == -ENOENT)
+ continue;
+ goto error_2;
+ }
+
+ /* Insertion-sort by server pointer */
+ for (j = 0; j < slist->nr_servers; j++)
+ if (slist->servers[j].server >= server)
+ break;
+ if (j < slist->nr_servers) {
+ if (slist->servers[j].server == server) {
+ afs_put_server(cell->net, server);
+ continue;
+ }
+
+ memmove(slist->servers + j + 1,
+ slist->servers + j,
+ (slist->nr_servers - j) * sizeof(struct afs_server_entry));
+ }
+
+ slist->servers[j].server = server;
+ slist->nr_servers++;
+ }
+
+ if (slist->nr_servers == 0) {
+ ret = -EDESTADDRREQ;
+ goto error_2;
+ }
+
+ return slist;
+
+error_2:
+ afs_put_serverlist(cell->net, slist);
+error:
+ return ERR_PTR(ret);
+}
+
+/*
+ * Copy the annotations from an old server list to its potential replacement.
+ */
+bool afs_annotate_server_list(struct afs_server_list *new,
+ struct afs_server_list *old)
+{
+ struct afs_server *cur;
+ int i, j;
+
+ if (old->nr_servers != new->nr_servers)
+ goto changed;
+
+ for (i = 0; i < old->nr_servers; i++)
+ if (old->servers[i].server != new->servers[i].server)
+ goto changed;
+
+ return false;
+
+changed:
+ /* Maintain the same current server as before if possible. */
+ cur = old->servers[old->index].server;
+ for (j = 0; j < new->nr_servers; j++) {
+ if (new->servers[j].server == cur) {
+ new->index = j;
+ break;
+ }
+ }
+
+ /* Keep the old callback interest records where possible so that we
+ * maintain callback interception.
+ */
+ i = 0;
+ j = 0;
+ while (i < old->nr_servers && j < new->nr_servers) {
+ if (new->servers[j].server == old->servers[i].server) {
+ struct afs_cb_interest *cbi = old->servers[i].cb_interest;
+ if (cbi) {
+ new->servers[j].cb_interest = cbi;
+ refcount_inc(&cbi->usage);
+ }
+ i++;
+ j++;
+ continue;
+ }
+
+ if (new->servers[j].server < old->servers[i].server) {
+ j++;
+ continue;
+ }
+
+ i++;
+ continue;
+ }
+
+ return true;
+}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 689173c0a682..875b5eb02242 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -25,11 +25,10 @@
#include <linux/statfs.h>
#include <linux/sched.h>
#include <linux/nsproxy.h>
+#include <linux/magic.h>
#include <net/net_namespace.h>
#include "internal.h"
-#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
-
static void afs_i_init_once(void *foo);
static struct dentry *afs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data);
@@ -143,9 +142,9 @@ void __exit afs_fs_exit(void)
*/
static int afs_show_devname(struct seq_file *m, struct dentry *root)
{
- struct afs_super_info *as = root->d_sb->s_fs_info;
+ struct afs_super_info *as = AFS_FS_S(root->d_sb);
struct afs_volume *volume = as->volume;
- struct afs_cell *cell = volume->cell;
+ struct afs_cell *cell = as->cell;
const char *suf = "";
char pref = '%';
@@ -163,7 +162,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
break;
}
- seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf);
+ seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf);
return 0;
}
@@ -201,12 +200,14 @@ static int afs_parse_options(struct afs_mount_params *params,
token = match_token(p, afs_options_list, args);
switch (token) {
case afs_opt_cell:
- cell = afs_cell_lookup(args[0].from,
- args[0].to - args[0].from,
- false);
+ rcu_read_lock();
+ cell = afs_lookup_cell_rcu(params->net,
+ args[0].from,
+ args[0].to - args[0].from);
+ rcu_read_unlock();
if (IS_ERR(cell))
return PTR_ERR(cell);
- afs_put_cell(params->cell);
+ afs_put_cell(params->net, params->cell);
params->cell = cell;
break;
@@ -308,13 +309,14 @@ static int afs_parse_device_name(struct afs_mount_params *params,
/* lookup the cell record */
if (cellname || !params->cell) {
- cell = afs_cell_lookup(cellname, cellnamesz, true);
+ cell = afs_lookup_cell(params->net, cellname, cellnamesz,
+ NULL, false);
if (IS_ERR(cell)) {
printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
cellnamesz, cellnamesz, cellname ?: "");
return PTR_ERR(cell);
}
- afs_put_cell(params->cell);
+ afs_put_cell(params->net, params->cell);
params->cell = cell;
}
@@ -332,14 +334,16 @@ static int afs_parse_device_name(struct afs_mount_params *params,
static int afs_test_super(struct super_block *sb, void *data)
{
struct afs_super_info *as1 = data;
- struct afs_super_info *as = sb->s_fs_info;
+ struct afs_super_info *as = AFS_FS_S(sb);
- return as->volume == as1->volume;
+ return as->net == as1->net && as->volume->vid == as1->volume->vid;
}
static int afs_set_super(struct super_block *sb, void *data)
{
- sb->s_fs_info = data;
+ struct afs_super_info *as = data;
+
+ sb->s_fs_info = as;
return set_anon_super(sb, NULL);
}
@@ -349,7 +353,7 @@ static int afs_set_super(struct super_block *sb, void *data)
static int afs_fill_super(struct super_block *sb,
struct afs_mount_params *params)
{
- struct afs_super_info *as = sb->s_fs_info;
+ struct afs_super_info *as = AFS_FS_S(sb);
struct afs_fid fid;
struct inode *inode = NULL;
int ret;
@@ -366,13 +370,15 @@ static int afs_fill_super(struct super_block *sb,
if (ret)
return ret;
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
- strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
+ sprintf(sb->s_id, "%u", as->volume->vid);
+
+ afs_activate_volume(as->volume);
/* allocate the root inode and dentry */
fid.vid = as->volume->vid;
fid.vnode = 1;
fid.unique = 1;
- inode = afs_iget(sb, params->key, &fid, NULL, NULL);
+ inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -394,23 +400,45 @@ error:
return ret;
}
+static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
+{
+ struct afs_super_info *as;
+
+ as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+ if (as) {
+ as->net = afs_get_net(params->net);
+ as->cell = afs_get_cell(params->cell);
+ }
+ return as;
+}
+
+static void afs_destroy_sbi(struct afs_super_info *as)
+{
+ if (as) {
+ afs_put_volume(as->cell, as->volume);
+ afs_put_cell(as->net, as->cell);
+ afs_put_net(as->net);
+ kfree(as);
+ }
+}
+
/*
* get an AFS superblock
*/
static struct dentry *afs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *options)
+ int flags, const char *dev_name, void *options)
{
struct afs_mount_params params;
struct super_block *sb;
- struct afs_volume *vol;
+ struct afs_volume *candidate;
struct key *key;
- char *new_opts = kstrdup(options, GFP_KERNEL);
struct afs_super_info *as;
int ret;
_enter(",,%s,%p", dev_name, options);
memset(&params, 0, sizeof(params));
+ params.net = &__afs_net;
ret = -EINVAL;
if (current->nsproxy->net_ns != &init_net)
@@ -436,66 +464,75 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
}
params.key = key;
- /* parse the device name */
- vol = afs_volume_lookup(&params);
- if (IS_ERR(vol)) {
- ret = PTR_ERR(vol);
- goto error;
- }
-
/* allocate a superblock info record */
- as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
- if (!as) {
- ret = -ENOMEM;
- afs_put_volume(vol);
- goto error;
+ ret = -ENOMEM;
+ as = afs_alloc_sbi(&params);
+ if (!as)
+ goto error_key;
+
+ /* Assume we're going to need a volume record; at the very least we can
+ * use it to update the volume record if we have one already. This
+ * checks that the volume exists within the cell.
+ */
+ candidate = afs_create_volume(&params);
+ if (IS_ERR(candidate)) {
+ ret = PTR_ERR(candidate);
+ goto error_as;
}
- as->volume = vol;
+
+ as->volume = candidate;
/* allocate a deviceless superblock */
sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
- afs_put_volume(vol);
- kfree(as);
- goto error;
+ goto error_as;
}
if (!sb->s_root) {
/* initial superblock/root creation */
_debug("create");
ret = afs_fill_super(sb, &params);
- if (ret < 0) {
- deactivate_locked_super(sb);
- goto error;
- }
+ if (ret < 0)
+ goto error_sb;
+ as = NULL;
sb->s_flags |= MS_ACTIVE;
} else {
_debug("reuse");
ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
- afs_put_volume(vol);
- kfree(as);
+ afs_destroy_sbi(as);
+ as = NULL;
}
- afs_put_cell(params.cell);
- kfree(new_opts);
+ afs_put_cell(params.net, params.cell);
+ key_put(params.key);
_leave(" = 0 [%p]", sb);
return dget(sb->s_root);
-error:
- afs_put_cell(params.cell);
+error_sb:
+ deactivate_locked_super(sb);
+ goto error_key;
+error_as:
+ afs_destroy_sbi(as);
+error_key:
key_put(params.key);
- kfree(new_opts);
+error:
+ afs_put_cell(params.net, params.cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
}
static void afs_kill_super(struct super_block *sb)
{
- struct afs_super_info *as = sb->s_fs_info;
+ struct afs_super_info *as = AFS_FS_S(sb);
+
+ /* Clear the callback interests (which will do ilookup5) before
+ * deactivating the superblock.
+ */
+ afs_clear_callback_interests(as->net, as->volume->servers);
kill_anon_super(sb);
- afs_put_volume(as->volume);
- kfree(as);
+ afs_deactivate_volume(as->volume);
+ afs_destroy_sbi(as);
}
/*
@@ -507,16 +544,15 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode);
- init_waitqueue_head(&vnode->update_waitq);
- mutex_init(&vnode->permits_lock);
+ mutex_init(&vnode->io_lock);
mutex_init(&vnode->validate_lock);
- spin_lock_init(&vnode->writeback_lock);
+ spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
- INIT_LIST_HEAD(&vnode->writebacks);
+ INIT_LIST_HEAD(&vnode->wb_keys);
INIT_LIST_HEAD(&vnode->pending_locks);
INIT_LIST_HEAD(&vnode->granted_locks);
INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
- INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
+ seqlock_init(&vnode->cb_lock);
}
/*
@@ -536,9 +572,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
memset(&vnode->status, 0, sizeof(vnode->status));
vnode->volume = NULL;
- vnode->update_cnt = 0;
vnode->flags = 1 << AFS_VNODE_UNSET;
- vnode->cb_promised = false;
_leave(" = %p", &vnode->vfs_inode);
return &vnode->vfs_inode;
@@ -562,7 +596,7 @@ static void afs_destroy_inode(struct inode *inode)
_debug("DESTROY INODE %p", inode);
- ASSERTCMP(vnode->server, ==, NULL);
+ ASSERTCMP(vnode->cb_interest, ==, NULL);
call_rcu(&inode->i_rcu, afs_i_callback);
atomic_dec(&afs_count_active_inodes);
@@ -573,6 +607,7 @@ static void afs_destroy_inode(struct inode *inode)
*/
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
+ struct afs_fs_cursor fc;
struct afs_volume_status vs;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
@@ -582,21 +617,32 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (IS_ERR(key))
return PTR_ERR(key);
- ret = afs_vnode_get_volume_status(vnode, key, &vs);
- key_put(key);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_get_volume_status(&fc, &vs);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
}
- buf->f_type = dentry->d_sb->s_magic;
- buf->f_bsize = AFS_BLOCK_SIZE;
- buf->f_namelen = AFSNAMEMAX - 1;
+ key_put(key);
- if (vs.max_quota == 0)
- buf->f_blocks = vs.part_max_blocks;
- else
- buf->f_blocks = vs.max_quota;
- buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use;
- return 0;
+ if (ret == 0) {
+ buf->f_type = dentry->d_sb->s_magic;
+ buf->f_bsize = AFS_BLOCK_SIZE;
+ buf->f_namelen = AFSNAMEMAX - 1;
+
+ if (vs.max_quota == 0)
+ buf->f_blocks = vs.part_max_blocks;
+ else
+ buf->f_blocks = vs.max_quota;
+ buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use;
+ }
+
+ return ret;
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index a5e4cc561b6c..e372f89fd36a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -12,58 +12,19 @@
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include "afs_fs.h"
#include "internal.h"
/*
- * map volume locator abort codes to error codes
+ * Deliver reply data to a VL.GetEntryByNameU call.
*/
-static int afs_vl_abort_to_error(u32 abort_code)
+static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
{
- _enter("%u", abort_code);
-
- switch (abort_code) {
- case AFSVL_IDEXIST: return -EEXIST;
- case AFSVL_IO: return -EREMOTEIO;
- case AFSVL_NAMEEXIST: return -EEXIST;
- case AFSVL_CREATEFAIL: return -EREMOTEIO;
- case AFSVL_NOENT: return -ENOMEDIUM;
- case AFSVL_EMPTY: return -ENOMEDIUM;
- case AFSVL_ENTDELETED: return -ENOMEDIUM;
- case AFSVL_BADNAME: return -EINVAL;
- case AFSVL_BADINDEX: return -EINVAL;
- case AFSVL_BADVOLTYPE: return -EINVAL;
- case AFSVL_BADSERVER: return -EINVAL;
- case AFSVL_BADPARTITION: return -EINVAL;
- case AFSVL_REPSFULL: return -EFBIG;
- case AFSVL_NOREPSERVER: return -ENOENT;
- case AFSVL_DUPREPSERVER: return -EEXIST;
- case AFSVL_RWNOTFOUND: return -ENOENT;
- case AFSVL_BADREFCOUNT: return -EINVAL;
- case AFSVL_SIZEEXCEEDED: return -EINVAL;
- case AFSVL_BADENTRY: return -EINVAL;
- case AFSVL_BADVOLIDBUMP: return -EINVAL;
- case AFSVL_IDALREADYHASHED: return -EINVAL;
- case AFSVL_ENTRYLOCKED: return -EBUSY;
- case AFSVL_BADVOLOPER: return -EBADRQC;
- case AFSVL_BADRELLOCKTYPE: return -EINVAL;
- case AFSVL_RERELEASE: return -EREMOTEIO;
- case AFSVL_BADSERVERFLAG: return -EINVAL;
- case AFSVL_PERM: return -EACCES;
- case AFSVL_NOMEM: return -EREMOTEIO;
- default:
- return afs_abort_to_error(abort_code);
- }
-}
-
-/*
- * deliver reply data to a VL.GetEntryByXXX call
- */
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
-{
- struct afs_cache_vlocation *entry;
- __be32 *bp;
+ struct afs_uvldbentry__xdr *uvldb;
+ struct afs_vldb_entry *entry;
+ bool new_only = false;
u32 tmp;
- int loop, ret;
+ int i, ret;
_enter("");
@@ -72,144 +33,613 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
return ret;
/* unmarshall the reply once we've received all of it */
- entry = call->reply;
- bp = call->buffer;
-
- for (loop = 0; loop < 64; loop++)
- entry->name[loop] = ntohl(*bp++);
- entry->name[loop] = 0;
- bp++; /* final NUL */
+ uvldb = call->buffer;
+ entry = call->reply[0];
- bp++; /* type */
- entry->nservers = ntohl(*bp++);
+ for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++)
+ entry->name[i] = (u8)ntohl(uvldb->name[i]);
+ entry->name[i] = 0;
+ entry->name_len = strlen(entry->name);
- for (loop = 0; loop < 8; loop++)
- entry->servers[loop].s_addr = *bp++;
+ /* If there is a new replication site that we can use, ignore all the
+ * sites that aren't marked as new.
+ */
+ for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+ tmp = ntohl(uvldb->serverFlags[i]);
+ if (!(tmp & AFS_VLSF_DONTUSE) &&
+ (tmp & AFS_VLSF_NEWREPSITE))
+ new_only = true;
+ }
- bp += 8; /* partition IDs */
+ for (i = 0; i < AFS_NMAXNSERVERS; i++) {
+ struct afs_uuid__xdr *xdr;
+ struct afs_uuid *uuid;
+ int j;
- for (loop = 0; loop < 8; loop++) {
- tmp = ntohl(*bp++);
- entry->srvtmask[loop] = 0;
+ tmp = ntohl(uvldb->serverFlags[i]);
+ if (tmp & AFS_VLSF_DONTUSE ||
+ (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
+ continue;
if (tmp & AFS_VLSF_RWVOL)
- entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
+ entry->fs_mask[i] |= AFS_VOL_VTM_RW;
if (tmp & AFS_VLSF_ROVOL)
- entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
+ entry->fs_mask[i] |= AFS_VOL_VTM_RO;
if (tmp & AFS_VLSF_BACKVOL)
- entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
- }
+ entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+ if (!entry->fs_mask[i])
+ continue;
- entry->vid[0] = ntohl(*bp++);
- entry->vid[1] = ntohl(*bp++);
- entry->vid[2] = ntohl(*bp++);
+ xdr = &uvldb->serverNumber[i];
+ uuid = (struct afs_uuid *)&entry->fs_server[i];
+ uuid->time_low = xdr->time_low;
+ uuid->time_mid = htons(ntohl(xdr->time_mid));
+ uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version));
+ uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved);
+ uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low);
+ for (j = 0; j < 6; j++)
+ uuid->node[j] = (u8)ntohl(xdr->node[j]);
- bp++; /* clone ID */
+ entry->nr_servers++;
+ }
+
+ for (i = 0; i < AFS_MAXTYPES; i++)
+ entry->vid[i] = ntohl(uvldb->volumeId[i]);
- tmp = ntohl(*bp++); /* flags */
- entry->vidmask = 0;
+ tmp = ntohl(uvldb->flags);
if (tmp & AFS_VLF_RWEXISTS)
- entry->vidmask |= AFS_VOL_VTM_RW;
+ __set_bit(AFS_VLDB_HAS_RW, &entry->flags);
if (tmp & AFS_VLF_ROEXISTS)
- entry->vidmask |= AFS_VOL_VTM_RO;
+ __set_bit(AFS_VLDB_HAS_RO, &entry->flags);
if (tmp & AFS_VLF_BACKEXISTS)
- entry->vidmask |= AFS_VOL_VTM_BAK;
- if (!entry->vidmask)
- return -EBADMSG;
+ __set_bit(AFS_VLDB_HAS_BAK, &entry->flags);
+ if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) {
+ entry->error = -ENOMEDIUM;
+ __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags);
+ }
+
+ __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags);
_leave(" = 0 [done]");
return 0;
}
-/*
- * VL.GetEntryByName operation type
- */
-static const struct afs_call_type afs_RXVLGetEntryByName = {
- .name = "VL.GetEntryByName",
- .deliver = afs_deliver_vl_get_entry_by_xxx,
- .abort_to_error = afs_vl_abort_to_error,
- .destructor = afs_flat_call_destructor,
-};
+static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
+{
+ kfree(call->reply[0]);
+ afs_flat_call_destructor(call);
+}
/*
- * VL.GetEntryById operation type
+ * VL.GetEntryByNameU operation type.
*/
-static const struct afs_call_type afs_RXVLGetEntryById = {
- .name = "VL.GetEntryById",
- .deliver = afs_deliver_vl_get_entry_by_xxx,
- .abort_to_error = afs_vl_abort_to_error,
- .destructor = afs_flat_call_destructor,
+static const struct afs_call_type afs_RXVLGetEntryByNameU = {
+ .name = "VL.GetEntryByNameU",
+ .op = afs_VL_GetEntryByNameU,
+ .deliver = afs_deliver_vl_get_entry_by_name_u,
+ .destructor = afs_destroy_vl_get_entry_by_name_u,
};
/*
- * dispatch a get volume entry by name operation
+ * Dispatch a get volume entry by name or ID operation (uuid variant). If the
+ * volname is a decimal number then it's a volume ID not a volume name.
*/
-int afs_vl_get_entry_by_name(struct in_addr *addr,
- struct key *key,
- const char *volname,
- struct afs_cache_vlocation *entry,
- bool async)
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key,
+ const char *volname,
+ int volnamesz)
{
+ struct afs_vldb_entry *entry;
struct afs_call *call;
- size_t volnamesz, reqsz, padsz;
+ size_t reqsz, padsz;
__be32 *bp;
_enter("");
- volnamesz = strlen(volname);
padsz = (4 - (volnamesz & 3)) & 3;
reqsz = 8 + volnamesz + padsz;
- call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384);
- if (!call)
- return -ENOMEM;
+ entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz,
+ sizeof(struct afs_uvldbentry__xdr));
+ if (!call) {
+ kfree(entry);
+ return ERR_PTR(-ENOMEM);
+ }
call->key = key;
- call->reply = entry;
- call->service_id = VL_SERVICE;
- call->port = htons(AFS_VL_PORT);
+ call->reply[0] = entry;
+ call->ret_reply0 = true;
- /* marshall the parameters */
+ /* Marshall the parameters */
bp = call->request;
- *bp++ = htonl(VLGETENTRYBYNAME);
+ *bp++ = htonl(VLGETENTRYBYNAMEU);
*bp++ = htonl(volnamesz);
memcpy(bp, volname, volnamesz);
if (padsz > 0)
- memset((void *) bp + volnamesz, 0, padsz);
+ memset((void *)bp + volnamesz, 0, padsz);
- /* initiate the call */
- return afs_make_call(addr, call, GFP_KERNEL, async);
+ trace_afs_make_vl_call(call);
+ return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
}
/*
- * dispatch a get volume entry by ID operation
+ * Deliver reply data to a VL.GetAddrsU call.
+ *
+ * GetAddrsU(IN ListAddrByAttributes *inaddr,
+ * OUT afsUUID *uuidp1,
+ * OUT uint32_t *uniquifier,
+ * OUT uint32_t *nentries,
+ * OUT bulkaddrs *blkaddrs);
*/
-int afs_vl_get_entry_by_id(struct in_addr *addr,
- struct key *key,
- afs_volid_t volid,
- afs_voltype_t voltype,
- struct afs_cache_vlocation *entry,
- bool async)
+static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
{
+ struct afs_addr_list *alist;
+ __be32 *bp;
+ u32 uniquifier, nentries, count;
+ int i, ret;
+
+ _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
+ case 1:
+ ret = afs_extract_data(call, call->buffer,
+ sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
+ true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer + sizeof(struct afs_uuid__xdr);
+ uniquifier = ntohl(*bp++);
+ nentries = ntohl(*bp++);
+ count = ntohl(*bp);
+
+ nentries = min(nentries, count);
+ alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT);
+ if (!alist)
+ return -ENOMEM;
+ alist->version = uniquifier;
+ call->reply[0] = alist;
+ call->count = count;
+ call->count2 = nentries;
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract entries */
+ case 2:
+ count = min(call->count, 4U);
+ ret = afs_extract_data(call, call->buffer,
+ count * sizeof(__be32),
+ call->count > 4);
+ if (ret < 0)
+ return ret;
+
+ alist = call->reply[0];
+ bp = call->buffer;
+ for (i = 0; i < count; i++)
+ if (alist->nr_addrs < call->count2)
+ afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
+
+ call->count -= count;
+ if (call->count > 0)
+ goto again;
+ call->offset = 0;
+ call->unmarshall++;
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
+{
+ afs_put_server(call->net, (struct afs_server *)call->reply[0]);
+ kfree(call->reply[1]);
+ return afs_flat_call_destructor(call);
+}
+
+/*
+ * VL.GetAddrsU operation type.
+ */
+static const struct afs_call_type afs_RXVLGetAddrsU = {
+ .name = "VL.GetAddrsU",
+ .op = afs_VL_GetAddrsU,
+ .deliver = afs_deliver_vl_get_addrs_u,
+ .destructor = afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key,
+ const uuid_t *uuid)
+{
+ struct afs_ListAddrByAttributes__xdr *r;
+ const struct afs_uuid *u = (const struct afs_uuid *)uuid;
struct afs_call *call;
__be32 *bp;
+ int i;
_enter("");
- call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384);
+ call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU,
+ sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr),
+ sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = key;
+ call->reply[0] = NULL;
+ call->ret_reply0 = true;
+
+ /* Marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(VLGETADDRSU);
+ r = (struct afs_ListAddrByAttributes__xdr *)bp;
+ r->Mask = htonl(AFS_VLADDR_UUID);
+ r->ipaddr = 0;
+ r->index = 0;
+ r->spare = 0;
+ r->uuid.time_low = u->time_low;
+ r->uuid.time_mid = htonl(ntohs(u->time_mid));
+ r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version));
+ r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved);
+ r->uuid.clock_seq_low = htonl(u->clock_seq_low);
+ for (i = 0; i < 6; i++)
+ r->uuid.node[i] = ntohl(u->node[i]);
+
+ trace_afs_make_vl_call(call);
+ return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+}
+
+/*
+ * Deliver reply data to an VL.GetCapabilities operation.
+ */
+static int afs_deliver_vl_get_capabilities(struct afs_call *call)
+{
+ u32 count;
+ int ret;
+
+ _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+
+again:
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract the capabilities word count */
+ case 1:
+ ret = afs_extract_data(call, &call->tmp,
+ 1 * sizeof(__be32),
+ true);
+ if (ret < 0)
+ return ret;
+
+ count = ntohl(call->tmp);
+
+ call->count = count;
+ call->count2 = count;
+ call->offset = 0;
+ call->unmarshall++;
+
+ /* Extract capabilities words */
+ case 2:
+ count = min(call->count, 16U);
+ ret = afs_extract_data(call, call->buffer,
+ count * sizeof(__be32),
+ call->count > 16);
+ if (ret < 0)
+ return ret;
+
+ /* TODO: Examine capabilities */
+
+ call->count -= count;
+ if (call->count > 0)
+ goto again;
+ call->offset = 0;
+ call->unmarshall++;
+ break;
+ }
+
+ call->reply[0] = (void *)(unsigned long)call->service_id;
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * VL.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_RXVLGetCapabilities = {
+ .name = "VL.GetCapabilities",
+ .op = afs_VL_GetCapabilities,
+ .deliver = afs_deliver_vl_get_capabilities,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Probe a fileserver for the capabilities that it supports. This can
+ * return up to 196 words.
+ *
+ * We use this to probe for service upgrade to determine what the server at the
+ * other end supports.
+ */
+int afs_vl_get_capabilities(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
if (!call)
return -ENOMEM;
call->key = key;
- call->reply = entry;
- call->service_id = VL_SERVICE;
- call->port = htons(AFS_VL_PORT);
+ call->upgrade = true; /* Let's see if this is a YFS server */
+ call->reply[0] = (void *)VLGETCAPABILITIES;
+ call->ret_reply0 = true;
/* marshall the parameters */
bp = call->request;
- *bp++ = htonl(VLGETENTRYBYID);
- *bp++ = htonl(volid);
- *bp = htonl(voltype);
+ *bp++ = htonl(VLGETCAPABILITIES);
+
+ /* Can't take a ref on server */
+ trace_afs_make_vl_call(call);
+ return afs_make_call(ac, call, GFP_KERNEL, false);
+}
+
+/*
+ * Deliver reply data to a YFSVL.GetEndpoints call.
+ *
+ * GetEndpoints(IN yfsServerAttributes *attr,
+ * OUT opr_uuid *uuid,
+ * OUT afs_int32 *uniquifier,
+ * OUT endpoints *fsEndpoints,
+ * OUT endpoints *volEndpoints)
+ */
+static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
+{
+ struct afs_addr_list *alist;
+ __be32 *bp;
+ u32 uniquifier, size;
+ int ret;
+
+ _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+
+again:
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->unmarshall = 1;
+
+ /* Extract the returned uuid, uniquifier, fsEndpoints count and
+ * either the first fsEndpoint type or the volEndpoints
+ * count if there are no fsEndpoints. */
+ case 1:
+ ret = afs_extract_data(call, call->buffer,
+ sizeof(uuid_t) +
+ 3 * sizeof(__be32),
+ true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer + sizeof(uuid_t);
+ uniquifier = ntohl(*bp++);
+ call->count = ntohl(*bp++);
+ call->count2 = ntohl(*bp); /* Type or next count */
+
+ if (call->count > YFS_MAXENDPOINTS)
+ return -EBADMSG;
+
+ alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
+ if (!alist)
+ return -ENOMEM;
+ alist->version = uniquifier;
+ call->reply[0] = alist;
+ call->offset = 0;
+
+ if (call->count == 0)
+ goto extract_volendpoints;
+
+ call->unmarshall = 2;
+
+ /* Extract fsEndpoints[] entries */
+ case 2:
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ size = sizeof(__be32) * (1 + 1 + 1);
+ break;
+ case YFS_ENDPOINT_IPV6:
+ size = sizeof(__be32) * (1 + 4 + 1);
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ size += sizeof(__be32);
+ ret = afs_extract_data(call, call->buffer, size, true);
+ if (ret < 0)
+ return ret;
+
+ alist = call->reply[0];
+ bp = call->buffer;
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ if (ntohl(bp[0]) != sizeof(__be32) * 2)
+ return -EBADMSG;
+ afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
+ bp += 3;
+ break;
+ case YFS_ENDPOINT_IPV6:
+ if (ntohl(bp[0]) != sizeof(__be32) * 5)
+ return -EBADMSG;
+ afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
+ bp += 6;
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ /* Got either the type of the next entry or the count of
+ * volEndpoints if no more fsEndpoints.
+ */
+ call->count2 = htonl(*bp++);
+
+ call->offset = 0;
+ call->count--;
+ if (call->count > 0)
+ goto again;
+
+ extract_volendpoints:
+ /* Extract the list of volEndpoints. */
+ call->count = call->count2;
+ if (!call->count)
+ goto end;
+ if (call->count > YFS_MAXENDPOINTS)
+ return -EBADMSG;
+
+ call->unmarshall = 3;
+
+ /* Extract the type of volEndpoints[0]. Normally we would
+ * extract the type of the next endpoint when we extract the
+ * data of the current one, but this is the first...
+ */
+ case 3:
+ ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ call->count2 = htonl(*bp++);
+ call->offset = 0;
+ call->unmarshall = 4;
+
+ /* Extract volEndpoints[] entries */
+ case 4:
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ size = sizeof(__be32) * (1 + 1 + 1);
+ break;
+ case YFS_ENDPOINT_IPV6:
+ size = sizeof(__be32) * (1 + 4 + 1);
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ if (call->count > 1)
+ size += sizeof(__be32);
+ ret = afs_extract_data(call, call->buffer, size, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ switch (call->count2) {
+ case YFS_ENDPOINT_IPV4:
+ if (ntohl(bp[0]) != sizeof(__be32) * 2)
+ return -EBADMSG;
+ bp += 3;
+ break;
+ case YFS_ENDPOINT_IPV6:
+ if (ntohl(bp[0]) != sizeof(__be32) * 5)
+ return -EBADMSG;
+ bp += 6;
+ break;
+ default:
+ return -EBADMSG;
+ }
+
+ /* Got either the type of the next entry or the count of
+ * volEndpoints if no more fsEndpoints.
+ */
+ call->offset = 0;
+ call->count--;
+ if (call->count > 0) {
+ call->count2 = htonl(*bp++);
+ goto again;
+ }
+
+ end:
+ call->unmarshall = 5;
+
+ /* Done */
+ case 5:
+ ret = afs_extract_data(call, call->buffer, 0, false);
+ if (ret < 0)
+ return ret;
+ call->unmarshall = 6;
+
+ case 6:
+ break;
+ }
+
+ alist = call->reply[0];
+
+ /* Start with IPv6 if available. */
+ if (alist->nr_ipv4 < alist->nr_addrs)
+ alist->index = alist->nr_ipv4;
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFSVL.GetEndpoints operation type.
+ */
+static const struct afs_call_type afs_YFSVLGetEndpoints = {
+ .name = "YFSVL.GetEndpoints",
+ .op = afs_YFSVL_GetEndpoints,
+ .deliver = afs_deliver_yfsvl_get_endpoints,
+ .destructor = afs_vl_get_addrs_u_destructor,
+};
+
+/*
+ * Dispatch an operation to get the addresses for a server, where the server is
+ * nominated by UUID.
+ */
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
+ struct afs_addr_cursor *ac,
+ struct key *key,
+ const uuid_t *uuid)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
+ sizeof(__be32) * 2 + sizeof(*uuid),
+ sizeof(struct in6_addr) + sizeof(__be32) * 3);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ call->key = key;
+ call->reply[0] = NULL;
+ call->ret_reply0 = true;
+
+ /* Marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(YVLGETENDPOINTS);
+ *bp++ = htonl(YFS_SERVER_UUID);
+ memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
- /* initiate the call */
- return afs_make_call(addr, call, GFP_KERNEL, async);
+ trace_afs_make_vl_call(call);
+ return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
deleted file mode 100644
index 37b7c3b342a6..000000000000
--- a/fs/afs/vlocation.c
+++ /dev/null
@@ -1,720 +0,0 @@
-/* AFS volume location management
- *
- * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include "internal.h"
-
-static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
-static unsigned afs_vlocation_update_timeout = 10 * 60;
-
-static void afs_vlocation_reaper(struct work_struct *);
-static void afs_vlocation_updater(struct work_struct *);
-
-static LIST_HEAD(afs_vlocation_updates);
-static LIST_HEAD(afs_vlocation_graveyard);
-static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
-static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
-static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
-static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
-static struct workqueue_struct *afs_vlocation_update_worker;
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
- struct key *key,
- struct afs_cache_vlocation *vldb)
-{
- struct afs_cell *cell = vl->cell;
- struct in_addr addr;
- int count, ret;
-
- _enter("%s,%s", cell->name, vl->vldb.name);
-
- down_write(&vl->cell->vl_sem);
- ret = -ENOMEDIUM;
- for (count = cell->vl_naddrs; count > 0; count--) {
- addr = cell->vl_addrs[cell->vl_curr_svix];
-
- _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
-
- /* attempt to access the VL server */
- ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
- false);
- switch (ret) {
- case 0:
- goto out;
- case -ENOMEM:
- case -ENONET:
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- if (ret == -ENOMEM || ret == -ENONET)
- goto out;
- goto rotate;
- case -ENOMEDIUM:
- case -EKEYREJECTED:
- case -EKEYEXPIRED:
- goto out;
- default:
- ret = -EIO;
- goto rotate;
- }
-
- /* rotate the server records upon lookup failure */
- rotate:
- cell->vl_curr_svix++;
- cell->vl_curr_svix %= cell->vl_naddrs;
- }
-
-out:
- up_write(&vl->cell->vl_sem);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- */
-static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
- struct key *key,
- afs_volid_t volid,
- afs_voltype_t voltype,
- struct afs_cache_vlocation *vldb)
-{
- struct afs_cell *cell = vl->cell;
- struct in_addr addr;
- int count, ret;
-
- _enter("%s,%x,%d,", cell->name, volid, voltype);
-
- down_write(&vl->cell->vl_sem);
- ret = -ENOMEDIUM;
- for (count = cell->vl_naddrs; count > 0; count--) {
- addr = cell->vl_addrs[cell->vl_curr_svix];
-
- _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
-
- /* attempt to access the VL server */
- ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
- false);
- switch (ret) {
- case 0:
- goto out;
- case -ENOMEM:
- case -ENONET:
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- if (ret == -ENOMEM || ret == -ENONET)
- goto out;
- goto rotate;
- case -EBUSY:
- vl->upd_busy_cnt++;
- if (vl->upd_busy_cnt <= 3) {
- if (vl->upd_busy_cnt > 1) {
- /* second+ BUSY - sleep a little bit */
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
- }
- continue;
- }
- break;
- case -ENOMEDIUM:
- vl->upd_rej_cnt++;
- goto rotate;
- default:
- ret = -EIO;
- goto rotate;
- }
-
- /* rotate the server records upon lookup failure */
- rotate:
- cell->vl_curr_svix++;
- cell->vl_curr_svix %= cell->vl_naddrs;
- vl->upd_busy_cnt = 0;
- }
-
-out:
- if (ret < 0 && vl->upd_rej_cnt > 0) {
- printk(KERN_NOTICE "kAFS:"
- " Active volume no longer valid '%s'\n",
- vl->vldb.name);
- vl->valid = 0;
- ret = -ENOMEDIUM;
- }
-
- up_write(&vl->cell->vl_sem);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * allocate a volume location record
- */
-static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
- const char *name,
- size_t namesz)
-{
- struct afs_vlocation *vl;
-
- vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
- if (vl) {
- vl->cell = cell;
- vl->state = AFS_VL_NEW;
- atomic_set(&vl->usage, 1);
- INIT_LIST_HEAD(&vl->link);
- INIT_LIST_HEAD(&vl->grave);
- INIT_LIST_HEAD(&vl->update);
- init_waitqueue_head(&vl->waitq);
- spin_lock_init(&vl->lock);
- memcpy(vl->vldb.name, name, namesz);
- }
-
- _leave(" = %p", vl);
- return vl;
-}
-
-/*
- * update record if we found it in the cache
- */
-static int afs_vlocation_update_record(struct afs_vlocation *vl,
- struct key *key,
- struct afs_cache_vlocation *vldb)
-{
- afs_voltype_t voltype;
- afs_volid_t vid;
- int ret;
-
- /* try to look up a cached volume in the cell VL databases by ID */
- _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
- vl->vldb.name,
- vl->vldb.vidmask,
- ntohl(vl->vldb.servers[0].s_addr),
- vl->vldb.srvtmask[0],
- ntohl(vl->vldb.servers[1].s_addr),
- vl->vldb.srvtmask[1],
- ntohl(vl->vldb.servers[2].s_addr),
- vl->vldb.srvtmask[2]);
-
- _debug("Vids: %08x %08x %08x",
- vl->vldb.vid[0],
- vl->vldb.vid[1],
- vl->vldb.vid[2]);
-
- if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
- vid = vl->vldb.vid[0];
- voltype = AFSVL_RWVOL;
- } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
- vid = vl->vldb.vid[1];
- voltype = AFSVL_ROVOL;
- } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
- vid = vl->vldb.vid[2];
- voltype = AFSVL_BACKVOL;
- } else {
- BUG();
- vid = 0;
- voltype = 0;
- }
-
- /* contact the server to make sure the volume is still available
- * - TODO: need to handle disconnected operation here
- */
- ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
- switch (ret) {
- /* net error */
- default:
- printk(KERN_WARNING "kAFS:"
- " failed to update volume '%s' (%x) up in '%s': %d\n",
- vl->vldb.name, vid, vl->cell->name, ret);
- _leave(" = %d", ret);
- return ret;
-
- /* pulled from local cache into memory */
- case 0:
- _leave(" = 0");
- return 0;
-
- /* uh oh... looks like the volume got deleted */
- case -ENOMEDIUM:
- printk(KERN_ERR "kAFS:"
- " volume '%s' (%x) does not exist '%s'\n",
- vl->vldb.name, vid, vl->cell->name);
-
- /* TODO: make existing record unavailable */
- _leave(" = %d", ret);
- return ret;
- }
-}
-
-/*
- * apply the update to a VL record
- */
-static void afs_vlocation_apply_update(struct afs_vlocation *vl,
- struct afs_cache_vlocation *vldb)
-{
- _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
- vldb->name, vldb->vidmask,
- ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
- ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
- ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
-
- _debug("Vids: %08x %08x %08x",
- vldb->vid[0], vldb->vid[1], vldb->vid[2]);
-
- if (strcmp(vldb->name, vl->vldb.name) != 0)
- printk(KERN_NOTICE "kAFS:"
- " name of volume '%s' changed to '%s' on server\n",
- vl->vldb.name, vldb->name);
-
- vl->vldb = *vldb;
-
-#ifdef CONFIG_AFS_FSCACHE
- fscache_update_cookie(vl->cache);
-#endif
-}
-
-/*
- * fill in a volume location record, consulting the cache and the VL server
- * both
- */
-static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
- struct key *key)
-{
- struct afs_cache_vlocation vldb;
- int ret;
-
- _enter("");
-
- ASSERTCMP(vl->valid, ==, 0);
-
- memset(&vldb, 0, sizeof(vldb));
-
- /* see if we have an in-cache copy (will set vl->valid if there is) */
-#ifdef CONFIG_AFS_FSCACHE
- vl->cache = fscache_acquire_cookie(vl->cell->cache,
- &afs_vlocation_cache_index_def, vl,
- true);
-#endif
-
- if (vl->valid) {
- /* try to update a known volume in the cell VL databases by
- * ID as the name may have changed */
- _debug("found in cache");
- ret = afs_vlocation_update_record(vl, key, &vldb);
- } else {
- /* try to look up an unknown volume in the cell VL databases by
- * name */
- ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
- if (ret < 0) {
- printk("kAFS: failed to locate '%s' in cell '%s'\n",
- vl->vldb.name, vl->cell->name);
- return ret;
- }
- }
-
- afs_vlocation_apply_update(vl, &vldb);
- _leave(" = 0");
- return 0;
-}
-
-/*
- * queue a vlocation record for updates
- */
-static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
-{
- struct afs_vlocation *xvl;
-
- /* wait at least 10 minutes before updating... */
- vl->update_at = ktime_get_real_seconds() +
- afs_vlocation_update_timeout;
-
- spin_lock(&afs_vlocation_updates_lock);
-
- if (!list_empty(&afs_vlocation_updates)) {
- /* ... but wait at least 1 second more than the newest record
- * already queued so that we don't spam the VL server suddenly
- * with lots of requests
- */
- xvl = list_entry(afs_vlocation_updates.prev,
- struct afs_vlocation, update);
- if (vl->update_at <= xvl->update_at)
- vl->update_at = xvl->update_at + 1;
- } else {
- queue_delayed_work(afs_vlocation_update_worker,
- &afs_vlocation_update,
- afs_vlocation_update_timeout * HZ);
- }
-
- list_add_tail(&vl->update, &afs_vlocation_updates);
- spin_unlock(&afs_vlocation_updates_lock);
-}
-
-/*
- * lookup volume location
- * - iterate through the VL servers in a cell until one of them admits knowing
- * about the volume in question
- * - lookup in the local cache if not able to find on the VL server
- * - insert/update in the local cache if did get a VL response
- */
-struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
- struct key *key,
- const char *name,
- size_t namesz)
-{
- struct afs_vlocation *vl;
- int ret;
-
- _enter("{%s},{%x},%*.*s,%zu",
- cell->name, key_serial(key),
- (int) namesz, (int) namesz, name, namesz);
-
- if (namesz >= sizeof(vl->vldb.name)) {
- _leave(" = -ENAMETOOLONG");
- return ERR_PTR(-ENAMETOOLONG);
- }
-
- /* see if we have an in-memory copy first */
- down_write(&cell->vl_sem);
- spin_lock(&cell->vl_lock);
- list_for_each_entry(vl, &cell->vl_list, link) {
- if (vl->vldb.name[namesz] != '\0')
- continue;
- if (memcmp(vl->vldb.name, name, namesz) == 0)
- goto found_in_memory;
- }
- spin_unlock(&cell->vl_lock);
-
- /* not in the cell's in-memory lists - create a new record */
- vl = afs_vlocation_alloc(cell, name, namesz);
- if (!vl) {
- up_write(&cell->vl_sem);
- return ERR_PTR(-ENOMEM);
- }
-
- afs_get_cell(cell);
-
- list_add_tail(&vl->link, &cell->vl_list);
- vl->state = AFS_VL_CREATING;
- up_write(&cell->vl_sem);
-
-fill_in_record:
- ret = afs_vlocation_fill_in_record(vl, key);
- if (ret < 0)
- goto error_abandon;
- spin_lock(&vl->lock);
- vl->state = AFS_VL_VALID;
- spin_unlock(&vl->lock);
- wake_up(&vl->waitq);
-
- /* update volume entry in local cache */
-#ifdef CONFIG_AFS_FSCACHE
- fscache_update_cookie(vl->cache);
-#endif
-
- /* schedule for regular updates */
- afs_vlocation_queue_for_updates(vl);
- goto success;
-
-found_in_memory:
- /* found in memory */
- _debug("found in memory");
- atomic_inc(&vl->usage);
- spin_unlock(&cell->vl_lock);
- if (!list_empty(&vl->grave)) {
- spin_lock(&afs_vlocation_graveyard_lock);
- list_del_init(&vl->grave);
- spin_unlock(&afs_vlocation_graveyard_lock);
- }
- up_write(&cell->vl_sem);
-
- /* see if it was an abandoned record that we might try filling in */
- spin_lock(&vl->lock);
- while (vl->state != AFS_VL_VALID) {
- afs_vlocation_state_t state = vl->state;
-
- _debug("invalid [state %d]", state);
-
- if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
- vl->state = AFS_VL_CREATING;
- spin_unlock(&vl->lock);
- goto fill_in_record;
- }
-
- /* must now wait for creation or update by someone else to
- * complete */
- _debug("wait");
-
- spin_unlock(&vl->lock);
- ret = wait_event_interruptible(vl->waitq,
- vl->state == AFS_VL_NEW ||
- vl->state == AFS_VL_VALID ||
- vl->state == AFS_VL_NO_VOLUME);
- if (ret < 0)
- goto error;
- spin_lock(&vl->lock);
- }
- spin_unlock(&vl->lock);
-
-success:
- _leave(" = %p", vl);
- return vl;
-
-error_abandon:
- spin_lock(&vl->lock);
- vl->state = AFS_VL_NEW;
- spin_unlock(&vl->lock);
- wake_up(&vl->waitq);
-error:
- ASSERT(vl != NULL);
- afs_put_vlocation(vl);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
-}
-
-/*
- * finish using a volume location record
- */
-void afs_put_vlocation(struct afs_vlocation *vl)
-{
- if (!vl)
- return;
-
- _enter("%s", vl->vldb.name);
-
- ASSERTCMP(atomic_read(&vl->usage), >, 0);
-
- if (likely(!atomic_dec_and_test(&vl->usage))) {
- _leave("");
- return;
- }
-
- spin_lock(&afs_vlocation_graveyard_lock);
- if (atomic_read(&vl->usage) == 0) {
- _debug("buried");
- list_move_tail(&vl->grave, &afs_vlocation_graveyard);
- vl->time_of_death = ktime_get_real_seconds();
- queue_delayed_work(afs_wq, &afs_vlocation_reap,
- afs_vlocation_timeout * HZ);
-
- /* suspend updates on this record */
- if (!list_empty(&vl->update)) {
- spin_lock(&afs_vlocation_updates_lock);
- list_del_init(&vl->update);
- spin_unlock(&afs_vlocation_updates_lock);
- }
- }
- spin_unlock(&afs_vlocation_graveyard_lock);
- _leave(" [killed?]");
-}
-
-/*
- * destroy a dead volume location record
- */
-static void afs_vlocation_destroy(struct afs_vlocation *vl)
-{
- _enter("%p", vl);
-
-#ifdef CONFIG_AFS_FSCACHE
- fscache_relinquish_cookie(vl->cache, 0);
-#endif
- afs_put_cell(vl->cell);
- kfree(vl);
-}
-
-/*
- * reap dead volume location records
- */
-static void afs_vlocation_reaper(struct work_struct *work)
-{
- LIST_HEAD(corpses);
- struct afs_vlocation *vl;
- unsigned long delay, expiry;
- time64_t now;
-
- _enter("");
-
- now = ktime_get_real_seconds();
- spin_lock(&afs_vlocation_graveyard_lock);
-
- while (!list_empty(&afs_vlocation_graveyard)) {
- vl = list_entry(afs_vlocation_graveyard.next,
- struct afs_vlocation, grave);
-
- _debug("check %p", vl);
-
- /* the queue is ordered most dead first */
- expiry = vl->time_of_death + afs_vlocation_timeout;
- if (expiry > now) {
- delay = (expiry - now) * HZ;
- _debug("delay %lu", delay);
- mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
- break;
- }
-
- spin_lock(&vl->cell->vl_lock);
- if (atomic_read(&vl->usage) > 0) {
- _debug("no reap");
- list_del_init(&vl->grave);
- } else {
- _debug("reap");
- list_move_tail(&vl->grave, &corpses);
- list_del_init(&vl->link);
- }
- spin_unlock(&vl->cell->vl_lock);
- }
-
- spin_unlock(&afs_vlocation_graveyard_lock);
-
- /* now reap the corpses we've extracted */
- while (!list_empty(&corpses)) {
- vl = list_entry(corpses.next, struct afs_vlocation, grave);
- list_del(&vl->grave);
- afs_vlocation_destroy(vl);
- }
-
- _leave("");
-}
-
-/*
- * initialise the VL update process
- */
-int __init afs_vlocation_update_init(void)
-{
- afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
- WQ_MEM_RECLAIM, 0);
- return afs_vlocation_update_worker ? 0 : -ENOMEM;
-}
-
-/*
- * discard all the volume location records for rmmod
- */
-void afs_vlocation_purge(void)
-{
- afs_vlocation_timeout = 0;
-
- spin_lock(&afs_vlocation_updates_lock);
- list_del_init(&afs_vlocation_updates);
- spin_unlock(&afs_vlocation_updates_lock);
- mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
- destroy_workqueue(afs_vlocation_update_worker);
-
- mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
-}
-
-/*
- * update a volume location
- */
-static void afs_vlocation_updater(struct work_struct *work)
-{
- struct afs_cache_vlocation vldb;
- struct afs_vlocation *vl, *xvl;
- time64_t now;
- long timeout;
- int ret;
-
- _enter("");
-
- now = ktime_get_real_seconds();
-
- /* find a record to update */
- spin_lock(&afs_vlocation_updates_lock);
- for (;;) {
- if (list_empty(&afs_vlocation_updates)) {
- spin_unlock(&afs_vlocation_updates_lock);
- _leave(" [nothing]");
- return;
- }
-
- vl = list_entry(afs_vlocation_updates.next,
- struct afs_vlocation, update);
- if (atomic_read(&vl->usage) > 0)
- break;
- list_del_init(&vl->update);
- }
-
- timeout = vl->update_at - now;
- if (timeout > 0) {
- queue_delayed_work(afs_vlocation_update_worker,
- &afs_vlocation_update, timeout * HZ);
- spin_unlock(&afs_vlocation_updates_lock);
- _leave(" [nothing]");
- return;
- }
-
- list_del_init(&vl->update);
- atomic_inc(&vl->usage);
- spin_unlock(&afs_vlocation_updates_lock);
-
- /* we can now perform the update */
- _debug("update %s", vl->vldb.name);
- vl->state = AFS_VL_UPDATING;
- vl->upd_rej_cnt = 0;
- vl->upd_busy_cnt = 0;
-
- ret = afs_vlocation_update_record(vl, NULL, &vldb);
- spin_lock(&vl->lock);
- switch (ret) {
- case 0:
- afs_vlocation_apply_update(vl, &vldb);
- vl->state = AFS_VL_VALID;
- break;
- case -ENOMEDIUM:
- vl->state = AFS_VL_VOLUME_DELETED;
- break;
- default:
- vl->state = AFS_VL_UNCERTAIN;
- break;
- }
- spin_unlock(&vl->lock);
- wake_up(&vl->waitq);
-
- /* and then reschedule */
- _debug("reschedule");
- vl->update_at = ktime_get_real_seconds() +
- afs_vlocation_update_timeout;
-
- spin_lock(&afs_vlocation_updates_lock);
-
- if (!list_empty(&afs_vlocation_updates)) {
- /* next update in 10 minutes, but wait at least 1 second more
- * than the newest record already queued so that we don't spam
- * the VL server suddenly with lots of requests
- */
- xvl = list_entry(afs_vlocation_updates.prev,
- struct afs_vlocation, update);
- if (vl->update_at <= xvl->update_at)
- vl->update_at = xvl->update_at + 1;
- xvl = list_entry(afs_vlocation_updates.next,
- struct afs_vlocation, update);
- timeout = xvl->update_at - now;
- if (timeout < 0)
- timeout = 0;
- } else {
- timeout = afs_vlocation_update_timeout;
- }
-
- ASSERT(list_empty(&vl->update));
-
- list_add_tail(&vl->update, &afs_vlocation_updates);
-
- _debug("timeout %ld", timeout);
- queue_delayed_work(afs_vlocation_update_worker,
- &afs_vlocation_update, timeout * HZ);
- spin_unlock(&afs_vlocation_updates_lock);
- afs_put_vlocation(vl);
-}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
deleted file mode 100644
index dcb956143c86..000000000000
--- a/fs/afs/vnode.c
+++ /dev/null
@@ -1,1025 +0,0 @@
-/* AFS vnode management
- *
- * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include "internal.h"
-
-#if 0
-static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
- int depth, char lr)
-{
- struct afs_vnode *vnode;
- bool bad = false;
-
- if (!node)
- return false;
-
- if (node->rb_left)
- bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
-
- vnode = rb_entry(node, struct afs_vnode, cb_promise);
- _debug("%c %*.*s%c%p {%d}",
- rb_is_red(node) ? 'R' : 'B',
- depth, depth, "", lr,
- vnode, vnode->cb_expires_at);
- if (rb_parent(node) != parent) {
- printk("BAD: %p != %p\n", rb_parent(node), parent);
- bad = true;
- }
-
- if (node->rb_right)
- bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
-
- return bad;
-}
-
-static noinline void dump_tree(const char *name, struct afs_server *server)
-{
- _enter("%s", name);
- if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
- BUG();
-}
-#endif
-
-/*
- * insert a vnode into the backing server's vnode tree
- */
-static void afs_install_vnode(struct afs_vnode *vnode,
- struct afs_server *server)
-{
- struct afs_server *old_server = vnode->server;
- struct afs_vnode *xvnode;
- struct rb_node *parent, **p;
-
- _enter("%p,%p", vnode, server);
-
- if (old_server) {
- spin_lock(&old_server->fs_lock);
- rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
- spin_unlock(&old_server->fs_lock);
- }
-
- afs_get_server(server);
- vnode->server = server;
- afs_put_server(old_server);
-
- /* insert into the server's vnode tree in FID order */
- spin_lock(&server->fs_lock);
-
- parent = NULL;
- p = &server->fs_vnodes.rb_node;
- while (*p) {
- parent = *p;
- xvnode = rb_entry(parent, struct afs_vnode, server_rb);
- if (vnode->fid.vid < xvnode->fid.vid)
- p = &(*p)->rb_left;
- else if (vnode->fid.vid > xvnode->fid.vid)
- p = &(*p)->rb_right;
- else if (vnode->fid.vnode < xvnode->fid.vnode)
- p = &(*p)->rb_left;
- else if (vnode->fid.vnode > xvnode->fid.vnode)
- p = &(*p)->rb_right;
- else if (vnode->fid.unique < xvnode->fid.unique)
- p = &(*p)->rb_left;
- else if (vnode->fid.unique > xvnode->fid.unique)
- p = &(*p)->rb_right;
- else
- BUG(); /* can't happen unless afs_iget() malfunctions */
- }
-
- rb_link_node(&vnode->server_rb, parent, p);
- rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
-
- spin_unlock(&server->fs_lock);
- _leave("");
-}
-
-/*
- * insert a vnode into the promising server's update/expiration tree
- * - caller must hold vnode->lock
- */
-static void afs_vnode_note_promise(struct afs_vnode *vnode,
- struct afs_server *server)
-{
- struct afs_server *old_server;
- struct afs_vnode *xvnode;
- struct rb_node *parent, **p;
-
- _enter("%p,%p", vnode, server);
-
- ASSERT(server != NULL);
-
- old_server = vnode->server;
- if (vnode->cb_promised) {
- if (server == old_server &&
- vnode->cb_expires == vnode->cb_expires_at) {
- _leave(" [no change]");
- return;
- }
-
- spin_lock(&old_server->cb_lock);
- if (vnode->cb_promised) {
- _debug("delete");
- rb_erase(&vnode->cb_promise, &old_server->cb_promises);
- vnode->cb_promised = false;
- }
- spin_unlock(&old_server->cb_lock);
- }
-
- if (vnode->server != server)
- afs_install_vnode(vnode, server);
-
- vnode->cb_expires_at = vnode->cb_expires;
- _debug("PROMISE on %p {%lu}",
- vnode, (unsigned long) vnode->cb_expires_at);
-
- /* abuse an RB-tree to hold the expiration order (we may have multiple
- * items with the same expiration time) */
- spin_lock(&server->cb_lock);
-
- parent = NULL;
- p = &server->cb_promises.rb_node;
- while (*p) {
- parent = *p;
- xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
- if (vnode->cb_expires_at < xvnode->cb_expires_at)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&vnode->cb_promise, parent, p);
- rb_insert_color(&vnode->cb_promise, &server->cb_promises);
- vnode->cb_promised = true;
-
- spin_unlock(&server->cb_lock);
- _leave("");
-}
-
-/*
- * handle remote file deletion by discarding the callback promise
- */
-static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
-{
- struct afs_server *server;
-
- _enter("{%p}", vnode->server);
-
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
-
- server = vnode->server;
- if (server) {
- if (vnode->cb_promised) {
- spin_lock(&server->cb_lock);
- if (vnode->cb_promised) {
- rb_erase(&vnode->cb_promise,
- &server->cb_promises);
- vnode->cb_promised = false;
- }
- spin_unlock(&server->cb_lock);
- }
-
- spin_lock(&server->fs_lock);
- rb_erase(&vnode->server_rb, &server->fs_vnodes);
- spin_unlock(&server->fs_lock);
-
- vnode->server = NULL;
- afs_put_server(server);
- } else {
- ASSERT(!vnode->cb_promised);
- }
-
- _leave("");
-}
-
-/*
- * finish off updating the recorded status of a file after a successful
- * operation completion
- * - starts callback expiry timer
- * - adds to server's callback list
- */
-void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
- struct afs_server *server)
-{
- struct afs_server *oldserver = NULL;
-
- _enter("%p,%p", vnode, server);
-
- spin_lock(&vnode->lock);
- clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- afs_vnode_note_promise(vnode, server);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
-
- wake_up_all(&vnode->update_waitq);
- afs_put_server(oldserver);
- _leave("");
-}
-
-/*
- * finish off updating the recorded status of a file after an operation failed
- */
-static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
-{
- _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, ret);
-
- spin_lock(&vnode->lock);
-
- clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-
- if (ret == -ENOENT) {
- /* the file was deleted on the server */
- _debug("got NOENT from server - marking file deleted");
- afs_vnode_deleted_remotely(vnode);
- }
-
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
-
- wake_up_all(&vnode->update_waitq);
- _leave("");
-}
-
-/*
- * fetch file status from the volume
- * - don't issue a fetch if:
- * - the changed bit is not set and there's a valid callback
- * - there are any outstanding ops that will fetch the status
- * - TODO implement local caching
- */
-int afs_vnode_fetch_status(struct afs_vnode *vnode,
- struct afs_vnode *auth_vnode, struct key *key)
-{
- struct afs_server *server;
- unsigned long acl_order;
- int ret;
-
- DECLARE_WAITQUEUE(myself, current);
-
- _enter("%s,{%x:%u.%u}",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
- if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
- vnode->cb_promised) {
- _leave(" [unchanged]");
- return 0;
- }
-
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- _leave(" [deleted]");
- return -ENOENT;
- }
-
- acl_order = 0;
- if (auth_vnode)
- acl_order = auth_vnode->acl_order;
-
- spin_lock(&vnode->lock);
-
- if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
- vnode->cb_promised) {
- spin_unlock(&vnode->lock);
- _leave(" [unchanged]");
- return 0;
- }
-
- ASSERTCMP(vnode->update_cnt, >=, 0);
-
- if (vnode->update_cnt > 0) {
- /* someone else started a fetch */
- _debug("wait on fetch %d", vnode->update_cnt);
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- ASSERT(myself.func != NULL);
- add_wait_queue(&vnode->update_waitq, &myself);
-
- /* wait for the status to be updated */
- for (;;) {
- if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
- break;
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
- break;
-
- /* check to see if it got updated and invalidated all
- * before we saw it */
- if (vnode->update_cnt == 0) {
- remove_wait_queue(&vnode->update_waitq,
- &myself);
- set_current_state(TASK_RUNNING);
- goto get_anyway;
- }
-
- spin_unlock(&vnode->lock);
-
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- spin_lock(&vnode->lock);
- }
-
- remove_wait_queue(&vnode->update_waitq, &myself);
- spin_unlock(&vnode->lock);
- set_current_state(TASK_RUNNING);
-
- return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
- -ENOENT : 0;
- }
-
-get_anyway:
- /* okay... we're going to have to initiate the op */
- vnode->update_cnt++;
-
- spin_unlock(&vnode->lock);
-
- /* merge AFS status fetches and clear outstanding callback on this
- * vnode */
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %p{%08x}",
- server, ntohl(server->addr.s_addr));
-
- ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
- false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- _debug("adjust");
- if (auth_vnode)
- afs_cache_permit(vnode, key, acl_order);
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
- } else {
- _debug("failed [%d]", ret);
- afs_vnode_status_update_failed(vnode, ret);
- }
-
- ASSERTCMP(vnode->update_cnt, >=, 0);
-
- _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
- return PTR_ERR(server);
-}
-
-/*
- * fetch file data from the volume
- * - TODO implement caching
- */
-int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
- struct afs_read *desc)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,,,",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key));
-
- /* this op will fetch the status */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
-
- /* merge in AFS status fetches and clear outstanding callback on this
- * vnode */
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_fetch_data(server, key, vnode, desc,
- false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- }
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- return PTR_ERR(server);
-}
-
-/*
- * make a file or a directory
- */
-int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
- const char *name, umode_t mode, struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb, struct afs_server **_server)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,%s,,",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key),
- name);
-
- /* this op will fetch the status on the directory we're creating in */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_create(server, key, vnode, name, mode, newfid,
- newstatus, newcb, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- *_server = server;
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- *_server = NULL;
- }
-
- _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
- return PTR_ERR(server);
-}
-
-/*
- * remove a file or directory
- */
-int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
- bool isdir)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,%s",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key),
- name);
-
- /* this op will fetch the status on the directory we're removing from */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_remove(server, key, vnode, name, isdir,
- false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- }
-
- _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
- return PTR_ERR(server);
-}
-
-/*
- * create a hard link
- */
-int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
- struct key *key, const char *name)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
- dvnode->volume->vlocation->vldb.name,
- dvnode->fid.vid,
- dvnode->fid.vnode,
- dvnode->fid.unique,
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key),
- name);
-
- /* this op will fetch the status on the directory we're removing from */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
- spin_lock(&dvnode->lock);
- dvnode->update_cnt++;
- spin_unlock(&dvnode->lock);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(dvnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_link(server, key, dvnode, vnode, name,
- false);
-
- } while (!afs_volume_release_fileserver(dvnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- afs_vnode_finalise_status_update(dvnode, server);
- afs_put_server(server);
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- afs_vnode_status_update_failed(dvnode, ret);
- }
-
- _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- spin_lock(&dvnode->lock);
- dvnode->update_cnt--;
- ASSERTCMP(dvnode->update_cnt, >=, 0);
- spin_unlock(&dvnode->lock);
- _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
- return PTR_ERR(server);
-}
-
-/*
- * create a symbolic link
- */
-int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
- const char *name, const char *content,
- struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_server **_server)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,%s,%s,,,",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key),
- name, content);
-
- /* this op will fetch the status on the directory we're creating in */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_symlink(server, key, vnode, name, content,
- newfid, newstatus, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- *_server = server;
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- *_server = NULL;
- }
-
- _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
- return PTR_ERR(server);
-}
-
-/*
- * rename a file
- */
-int afs_vnode_rename(struct afs_vnode *orig_dvnode,
- struct afs_vnode *new_dvnode,
- struct key *key,
- const char *orig_name,
- const char *new_name)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
- orig_dvnode->volume->vlocation->vldb.name,
- orig_dvnode->fid.vid,
- orig_dvnode->fid.vnode,
- orig_dvnode->fid.unique,
- new_dvnode->volume->vlocation->vldb.name,
- new_dvnode->fid.vid,
- new_dvnode->fid.vnode,
- new_dvnode->fid.unique,
- key_serial(key),
- orig_name,
- new_name);
-
- /* this op will fetch the status on both the directories we're dealing
- * with */
- spin_lock(&orig_dvnode->lock);
- orig_dvnode->update_cnt++;
- spin_unlock(&orig_dvnode->lock);
- if (new_dvnode != orig_dvnode) {
- spin_lock(&new_dvnode->lock);
- new_dvnode->update_cnt++;
- spin_unlock(&new_dvnode->lock);
- }
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(orig_dvnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
- new_dvnode, new_name, false);
-
- } while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(orig_dvnode, server);
- if (new_dvnode != orig_dvnode)
- afs_vnode_finalise_status_update(new_dvnode, server);
- afs_put_server(server);
- } else {
- afs_vnode_status_update_failed(orig_dvnode, ret);
- if (new_dvnode != orig_dvnode)
- afs_vnode_status_update_failed(new_dvnode, ret);
- }
-
- _leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
- return ret;
-
-no_server:
- spin_lock(&orig_dvnode->lock);
- orig_dvnode->update_cnt--;
- ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
- spin_unlock(&orig_dvnode->lock);
- if (new_dvnode != orig_dvnode) {
- spin_lock(&new_dvnode->lock);
- new_dvnode->update_cnt--;
- ASSERTCMP(new_dvnode->update_cnt, >=, 0);
- spin_unlock(&new_dvnode->lock);
- }
- _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
- return PTR_ERR(server);
-}
-
-/*
- * write to a file
- */
-int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to)
-{
- struct afs_server *server;
- struct afs_vnode *vnode = wb->vnode;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(wb->key),
- first, last, offset, to);
-
- /* this op will fetch the status */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_store_data(server, wb, first, last, offset, to,
- false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- }
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- return PTR_ERR(server);
-}
-
-/*
- * set the attributes on a file
- */
-int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
- struct iattr *attr)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key));
-
- /* this op will fetch the status */
- spin_lock(&vnode->lock);
- vnode->update_cnt++;
- spin_unlock(&vnode->lock);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_setattr(server, key, vnode, attr, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0) {
- afs_vnode_finalise_status_update(vnode, server);
- afs_put_server(server);
- } else {
- afs_vnode_status_update_failed(vnode, ret);
- }
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- spin_lock(&vnode->lock);
- vnode->update_cnt--;
- ASSERTCMP(vnode->update_cnt, >=, 0);
- spin_unlock(&vnode->lock);
- return PTR_ERR(server);
-}
-
-/*
- * get the status of a volume
- */
-int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key,
- struct afs_volume_status *vs)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key));
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_get_volume_status(server, key, vnode, vs, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0)
- afs_put_server(server);
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- return PTR_ERR(server);
-}
-
-/*
- * get a lock on a file
- */
-int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key,
- afs_lock_type_t type)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x,%u",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key), type);
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_set_lock(server, key, vnode, type, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0)
- afs_put_server(server);
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- return PTR_ERR(server);
-}
-
-/*
- * extend a lock on a file
- */
-int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key));
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_extend_lock(server, key, vnode, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0)
- afs_put_server(server);
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- return PTR_ERR(server);
-}
-
-/*
- * release a lock on a file
- */
-int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key)
-{
- struct afs_server *server;
- int ret;
-
- _enter("%s{%x:%u.%u},%x",
- vnode->volume->vlocation->vldb.name,
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- key_serial(key));
-
- do {
- /* pick a server to query */
- server = afs_volume_pick_fileserver(vnode);
- if (IS_ERR(server))
- goto no_server;
-
- _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
-
- ret = afs_fs_release_lock(server, key, vnode, false);
-
- } while (!afs_volume_release_fileserver(vnode, server, ret));
-
- /* adjust the flags */
- if (ret == 0)
- afs_put_server(server);
-
- _leave(" = %d", ret);
- return ret;
-
-no_server:
- return PTR_ERR(server);
-}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index db73d6dad02b..684c48293353 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -10,19 +10,167 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/sched.h>
#include "internal.h"
-static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
+unsigned __read_mostly afs_volume_gc_delay = 10;
+unsigned __read_mostly afs_volume_record_life = 60 * 60;
+
+static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
+
+/*
+ * Allocate a volume record and load it up from a vldb record.
+ */
+static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
+ struct afs_vldb_entry *vldb,
+ unsigned long type_mask)
+{
+ struct afs_server_list *slist;
+ struct afs_server *server;
+ struct afs_volume *volume;
+ int ret = -ENOMEM, nr_servers = 0, i, j;
+
+ for (i = 0; i < vldb->nr_servers; i++)
+ if (vldb->fs_mask[i] & type_mask)
+ nr_servers++;
+
+ volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
+ if (!volume)
+ goto error_0;
+
+ volume->vid = vldb->vid[params->type];
+ volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+ volume->cell = afs_get_cell(params->cell);
+ volume->type = params->type;
+ volume->type_force = params->force;
+ volume->name_len = vldb->name_len;
+
+ atomic_set(&volume->usage, 1);
+ INIT_LIST_HEAD(&volume->proc_link);
+ rwlock_init(&volume->servers_lock);
+ memcpy(volume->name, vldb->name, vldb->name_len + 1);
+
+ slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
+ if (IS_ERR(slist)) {
+ ret = PTR_ERR(slist);
+ goto error_1;
+ }
+
+ refcount_set(&slist->usage, 1);
+ volume->servers = slist;
+
+ /* Make sure a records exists for each server this volume occupies. */
+ for (i = 0; i < nr_servers; i++) {
+ if (!(vldb->fs_mask[i] & type_mask))
+ continue;
+
+ server = afs_lookup_server(params->cell, params->key,
+ &vldb->fs_server[i]);
+ if (IS_ERR(server)) {
+ ret = PTR_ERR(server);
+ if (ret == -ENOENT)
+ continue;
+ goto error_2;
+ }
+
+ /* Insertion-sort by server pointer */
+ for (j = 0; j < slist->nr_servers; j++)
+ if (slist->servers[j].server >= server)
+ break;
+ if (j < slist->nr_servers) {
+ if (slist->servers[j].server == server) {
+ afs_put_server(params->net, server);
+ continue;
+ }
+
+ memmove(slist->servers + j + 1,
+ slist->servers + j,
+ (slist->nr_servers - j) * sizeof(struct afs_server_entry));
+ }
+
+ slist->servers[j].server = server;
+ slist->nr_servers++;
+ }
+
+ if (slist->nr_servers == 0) {
+ ret = -EDESTADDRREQ;
+ goto error_2;
+ }
+
+ return volume;
+
+error_2:
+ afs_put_serverlist(params->net, slist);
+error_1:
+ kfree(volume);
+error_0:
+ return ERR_PTR(ret);
+}
/*
- * lookup a volume by name
- * - this can be one of the following:
+ * Look up a VLDB record for a volume.
+ */
+static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
+ struct key *key,
+ const char *volname,
+ size_t volnamesz)
+{
+ struct afs_addr_cursor ac;
+ struct afs_vldb_entry *vldb;
+ int ret;
+
+ ret = afs_set_vl_cursor(&ac, cell);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ while (afs_iterate_addresses(&ac)) {
+ if (!test_bit(ac.index, &ac.alist->probed)) {
+ ret = afs_vl_get_capabilities(cell->net, &ac, key);
+ switch (ret) {
+ case VL_SERVICE:
+ clear_bit(ac.index, &ac.alist->yfs);
+ set_bit(ac.index, &ac.alist->probed);
+ ac.addr->srx_service = ret;
+ break;
+ case YFS_VL_SERVICE:
+ set_bit(ac.index, &ac.alist->yfs);
+ set_bit(ac.index, &ac.alist->probed);
+ ac.addr->srx_service = ret;
+ break;
+ }
+ }
+
+ vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
+ volname, volnamesz);
+ switch (ac.error) {
+ case 0:
+ afs_end_cursor(&ac);
+ return vldb;
+ case -ECONNABORTED:
+ ac.error = afs_abort_to_error(ac.abort_code);
+ goto error;
+ case -ENOMEM:
+ case -ENONET:
+ goto error;
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ break;
+ default:
+ ac.error = -EIO;
+ goto error;
+ }
+ }
+
+error:
+ return ERR_PTR(afs_end_cursor(&ac));
+}
+
+/*
+ * Look up a volume in the VL server and create a candidate volume record for
+ * it.
+ *
+ * The volume name can be one of the following:
* "%[cell:]volume[.]" R/W volume
* "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
* or R/W (rwparent=1) volume
@@ -42,353 +190,218 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
* - Rule 3: If parent volume is R/W, then only mount R/W volume unless
* explicitly told otherwise
*/
-struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
+struct afs_volume *afs_create_volume(struct afs_mount_params *params)
{
- struct afs_vlocation *vlocation = NULL;
- struct afs_volume *volume = NULL;
- struct afs_server *server = NULL;
- char srvtmask;
- int ret, loop;
-
- _enter("{%*.*s,%d}",
- params->volnamesz, params->volnamesz, params->volname, params->rwpath);
-
- /* lookup the volume location record */
- vlocation = afs_vlocation_lookup(params->cell, params->key,
- params->volname, params->volnamesz);
- if (IS_ERR(vlocation)) {
- ret = PTR_ERR(vlocation);
- vlocation = NULL;
- goto error;
- }
+ struct afs_vldb_entry *vldb;
+ struct afs_volume *volume;
+ unsigned long type_mask = 1UL << params->type;
- /* make the final decision on the type we want */
- ret = -ENOMEDIUM;
- if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
- goto error;
+ vldb = afs_vl_lookup_vldb(params->cell, params->key,
+ params->volname, params->volnamesz);
+ if (IS_ERR(vldb))
+ return ERR_CAST(vldb);
- srvtmask = 0;
- for (loop = 0; loop < vlocation->vldb.nservers; loop++)
- srvtmask |= vlocation->vldb.srvtmask[loop];
+ if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
+ volume = ERR_PTR(vldb->error);
+ goto error;
+ }
+ /* Make the final decision on the type we want */
+ volume = ERR_PTR(-ENOMEDIUM);
if (params->force) {
- if (!(srvtmask & (1 << params->type)))
+ if (!(vldb->flags & type_mask))
goto error;
- } else if (srvtmask & AFS_VOL_VTM_RO) {
+ } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
params->type = AFSVL_ROVOL;
- } else if (srvtmask & AFS_VOL_VTM_RW) {
+ } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
params->type = AFSVL_RWVOL;
} else {
goto error;
}
- down_write(&params->cell->vl_sem);
+ type_mask = 1UL << params->type;
+ volume = afs_alloc_volume(params, vldb, type_mask);
- /* is the volume already active? */
- if (vlocation->vols[params->type]) {
- /* yes - re-use it */
- volume = vlocation->vols[params->type];
- afs_get_volume(volume);
- goto success;
- }
+error:
+ kfree(vldb);
+ return volume;
+}
- /* create a new volume record */
- _debug("creating new volume record");
+/*
+ * Destroy a volume record
+ */
+static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
+{
+ _enter("%p", volume);
- ret = -ENOMEM;
- volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
- if (!volume)
- goto error_up;
+#ifdef CONFIG_AFS_FSCACHE
+ ASSERTCMP(volume->cache, ==, NULL);
+#endif
- atomic_set(&volume->usage, 1);
- volume->type = params->type;
- volume->type_force = params->force;
- volume->cell = params->cell;
- volume->vid = vlocation->vldb.vid[params->type];
-
- init_rwsem(&volume->server_sem);
-
- /* look up all the applicable server records */
- for (loop = 0; loop < 8; loop++) {
- if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
- server = afs_lookup_server(
- volume->cell, &vlocation->vldb.servers[loop]);
- if (IS_ERR(server)) {
- ret = PTR_ERR(server);
- goto error_discard;
- }
+ afs_put_serverlist(net, volume->servers);
+ afs_put_cell(net, volume->cell);
+ kfree(volume);
- volume->servers[volume->nservers] = server;
- volume->nservers++;
- }
+ _leave(" [destroyed]");
+}
+
+/*
+ * Drop a reference on a volume record.
+ */
+void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
+{
+ if (volume) {
+ _enter("%s", volume->name);
+
+ if (atomic_dec_and_test(&volume->usage))
+ afs_destroy_volume(cell->net, volume);
}
+}
- /* attach the cache and volume location */
+/*
+ * Activate a volume.
+ */
+void afs_activate_volume(struct afs_volume *volume)
+{
#ifdef CONFIG_AFS_FSCACHE
- volume->cache = fscache_acquire_cookie(vlocation->cache,
+ volume->cache = fscache_acquire_cookie(volume->cell->cache,
&afs_volume_cache_index_def,
volume, true);
#endif
- afs_get_vlocation(vlocation);
- volume->vlocation = vlocation;
-
- vlocation->vols[volume->type] = volume;
-
-success:
- _debug("kAFS selected %s volume %08x",
- afs_voltypes[volume->type], volume->vid);
- up_write(&params->cell->vl_sem);
- afs_put_vlocation(vlocation);
- _leave(" = %p", volume);
- return volume;
-
- /* clean up */
-error_up:
- up_write(&params->cell->vl_sem);
-error:
- afs_put_vlocation(vlocation);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
-
-error_discard:
- up_write(&params->cell->vl_sem);
-
- for (loop = volume->nservers - 1; loop >= 0; loop--)
- afs_put_server(volume->servers[loop]);
- kfree(volume);
- goto error;
+ write_lock(&volume->cell->proc_lock);
+ list_add_tail(&volume->proc_link, &volume->cell->proc_volumes);
+ write_unlock(&volume->cell->proc_lock);
}
/*
- * destroy a volume record
+ * Deactivate a volume.
*/
-void afs_put_volume(struct afs_volume *volume)
+void afs_deactivate_volume(struct afs_volume *volume)
{
- struct afs_vlocation *vlocation;
- int loop;
-
- if (!volume)
- return;
-
- _enter("%p", volume);
+ _enter("%s", volume->name);
- ASSERTCMP(atomic_read(&volume->usage), >, 0);
+ write_lock(&volume->cell->proc_lock);
+ list_del_init(&volume->proc_link);
+ write_unlock(&volume->cell->proc_lock);
- vlocation = volume->vlocation;
-
- /* to prevent a race, the decrement and the dequeue must be effectively
- * atomic */
- down_write(&vlocation->cell->vl_sem);
-
- if (likely(!atomic_dec_and_test(&volume->usage))) {
- up_write(&vlocation->cell->vl_sem);
- _leave("");
- return;
- }
-
- vlocation->vols[volume->type] = NULL;
-
- up_write(&vlocation->cell->vl_sem);
-
- /* finish cleaning up the volume */
#ifdef CONFIG_AFS_FSCACHE
- fscache_relinquish_cookie(volume->cache, 0);
+ fscache_relinquish_cookie(volume->cache,
+ test_bit(AFS_VOLUME_DELETED, &volume->flags));
+ volume->cache = NULL;
#endif
- afs_put_vlocation(vlocation);
-
- for (loop = volume->nservers - 1; loop >= 0; loop--)
- afs_put_server(volume->servers[loop]);
-
- kfree(volume);
- _leave(" [destroyed]");
+ _leave("");
}
/*
- * pick a server to use to try accessing this volume
- * - returns with an elevated usage count on the server chosen
+ * Query the VL service to update the volume status.
*/
-struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
+static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
{
- struct afs_volume *volume = vnode->volume;
- struct afs_server *server;
- int ret, state, loop;
+ struct afs_server_list *new, *old, *discard;
+ struct afs_vldb_entry *vldb;
+ char idbuf[16];
+ int ret, idsz;
+
+ _enter("");
+
+ /* We look up an ID by passing it as a decimal string in the
+ * operation's name parameter.
+ */
+ idsz = sprintf(idbuf, "%u", volume->vid);
- _enter("%s", volume->vlocation->vldb.name);
+ vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
+ if (IS_ERR(vldb)) {
+ ret = PTR_ERR(vldb);
+ goto error;
+ }
- /* stick with the server we're already using if we can */
- if (vnode->server && vnode->server->fs_state == 0) {
- afs_get_server(vnode->server);
- _leave(" = %p [current]", vnode->server);
- return vnode->server;
+ /* See if the volume got renamed. */
+ if (vldb->name_len != volume->name_len ||
+ memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
+ /* TODO: Use RCU'd string. */
+ memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
+ volume->name_len = vldb->name_len;
}
- down_read(&volume->server_sem);
+ /* See if the volume's server list got updated. */
+ new = afs_alloc_server_list(volume->cell, key,
+ vldb, (1 << volume->type));
+ if (IS_ERR(new)) {
+ ret = PTR_ERR(new);
+ goto error_vldb;
+ }
- /* handle the no-server case */
- if (volume->nservers == 0) {
- ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
- up_read(&volume->server_sem);
- _leave(" = %d [no servers]", ret);
- return ERR_PTR(ret);
+ write_lock(&volume->servers_lock);
+
+ discard = new;
+ old = volume->servers;
+ if (afs_annotate_server_list(new, old)) {
+ new->seq = volume->servers_seq + 1;
+ volume->servers = new;
+ smp_wmb();
+ volume->servers_seq++;
+ discard = old;
}
- /* basically, just search the list for the first live server and use
- * that */
+ volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
+ clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
+ write_unlock(&volume->servers_lock);
ret = 0;
- for (loop = 0; loop < volume->nservers; loop++) {
- server = volume->servers[loop];
- state = server->fs_state;
- _debug("consider %d [%d]", loop, state);
+ afs_put_serverlist(volume->cell->net, discard);
+error_vldb:
+ kfree(vldb);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
- switch (state) {
- /* found an apparently healthy server */
- case 0:
- afs_get_server(server);
- up_read(&volume->server_sem);
- _leave(" = %p (picked %08x)",
- server, ntohl(server->addr.s_addr));
- return server;
+/*
+ * Make sure the volume record is up to date.
+ */
+int afs_check_volume_status(struct afs_volume *volume, struct key *key)
+{
+ time64_t now = ktime_get_real_seconds();
+ int ret, retries = 0;
- case -ENETUNREACH:
- if (ret == 0)
- ret = state;
- break;
+ _enter("");
- case -EHOSTUNREACH:
- if (ret == 0 ||
- ret == -ENETUNREACH)
- ret = state;
- break;
+ if (volume->update_at <= now)
+ set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
- case -ECONNREFUSED:
- if (ret == 0 ||
- ret == -ENETUNREACH ||
- ret == -EHOSTUNREACH)
- ret = state;
- break;
+retry:
+ if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) &&
+ !test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
+ _leave(" = 0");
+ return 0;
+ }
- default:
- case -EREMOTEIO:
- if (ret == 0 ||
- ret == -ENETUNREACH ||
- ret == -EHOSTUNREACH ||
- ret == -ECONNREFUSED)
- ret = state;
- break;
- }
+ if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
+ ret = afs_update_volume_status(volume, key);
+ clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
+ clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
+ wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
+ _leave(" = %d", ret);
+ return ret;
}
- /* no available servers
- * - TODO: handle the no active servers case better
- */
- up_read(&volume->server_sem);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
-}
+ if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
+ _leave(" = 0 [no wait]");
+ return 0;
+ }
-/*
- * release a server after use
- * - releases the ref on the server struct that was acquired by picking
- * - records result of using a particular server to access a volume
- * - return 0 to try again, 1 if okay or to issue error
- * - the caller must release the server struct if result was 0
- */
-int afs_volume_release_fileserver(struct afs_vnode *vnode,
- struct afs_server *server,
- int result)
-{
- struct afs_volume *volume = vnode->volume;
- unsigned loop;
-
- _enter("%s,%08x,%d",
- volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
- result);
-
- switch (result) {
- /* success */
- case 0:
- server->fs_act_jif = jiffies;
- server->fs_state = 0;
- _leave("");
- return 1;
-
- /* the fileserver denied all knowledge of the volume */
- case -ENOMEDIUM:
- server->fs_act_jif = jiffies;
- down_write(&volume->server_sem);
-
- /* firstly, find where the server is in the active list (if it
- * is) */
- for (loop = 0; loop < volume->nservers; loop++)
- if (volume->servers[loop] == server)
- goto present;
-
- /* no longer there - may have been discarded by another op */
- goto try_next_server_upw;
-
- present:
- volume->nservers--;
- memmove(&volume->servers[loop],
- &volume->servers[loop + 1],
- sizeof(volume->servers[loop]) *
- (volume->nservers - loop));
- volume->servers[volume->nservers] = NULL;
- afs_put_server(server);
- volume->rjservers++;
-
- if (volume->nservers > 0)
- /* another server might acknowledge its existence */
- goto try_next_server_upw;
-
- /* handle the case where all the fileservers have rejected the
- * volume
- * - TODO: try asking the fileservers for volume information
- * - TODO: contact the VL server again to see if the volume is
- * no longer registered
- */
- up_write(&volume->server_sem);
- afs_put_server(server);
- _leave(" [completely rejected]");
- return 1;
-
- /* problem reaching the server */
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- case -ETIME:
- case -ETIMEDOUT:
- case -EREMOTEIO:
- /* mark the server as dead
- * TODO: vary dead timeout depending on error
- */
- spin_lock(&server->fs_lock);
- if (!server->fs_state) {
- server->fs_dead_jif = jiffies + HZ * 10;
- server->fs_state = result;
- printk("kAFS: SERVER DEAD state=%d\n", result);
- }
- spin_unlock(&server->fs_lock);
- goto try_next_server;
-
- /* miscellaneous error */
- default:
- server->fs_act_jif = jiffies;
- case -ENOMEM:
- case -ENONET:
- /* tell the caller to accept the result */
- afs_put_server(server);
- _leave(" [local failure]");
- return 1;
+ ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE);
+ if (ret == -ERESTARTSYS) {
+ _leave(" = %d", ret);
+ return ret;
}
- /* tell the caller to loop around and try the next server */
-try_next_server_upw:
- up_write(&volume->server_sem);
-try_next_server:
- afs_put_server(server);
- _leave(" [try next server]");
- return 0;
+ retries++;
+ if (retries == 4) {
+ _leave(" = -ESTALE");
+ return -ESTALE;
+ }
+ goto retry;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 11dd0526b96b..18e46e31523c 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -8,6 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/fs.h>
@@ -16,9 +17,6 @@
#include <linux/pagevec.h>
#include "internal.h"
-static int afs_write_back_from_locked_page(struct afs_writeback *wb,
- struct page *page);
-
/*
* mark a page as having been made dirty and thus needing writeback
*/
@@ -29,58 +27,6 @@ int afs_set_page_dirty(struct page *page)
}
/*
- * unlink a writeback record because its usage has reached zero
- * - must be called with the wb->vnode->writeback_lock held
- */
-static void afs_unlink_writeback(struct afs_writeback *wb)
-{
- struct afs_writeback *front;
- struct afs_vnode *vnode = wb->vnode;
-
- list_del_init(&wb->link);
- if (!list_empty(&vnode->writebacks)) {
- /* if an fsync rises to the front of the queue then wake it
- * up */
- front = list_entry(vnode->writebacks.next,
- struct afs_writeback, link);
- if (front->state == AFS_WBACK_SYNCING) {
- _debug("wake up sync");
- front->state = AFS_WBACK_COMPLETE;
- wake_up(&front->waitq);
- }
- }
-}
-
-/*
- * free a writeback record
- */
-static void afs_free_writeback(struct afs_writeback *wb)
-{
- _enter("");
- key_put(wb->key);
- kfree(wb);
-}
-
-/*
- * dispose of a reference to a writeback record
- */
-void afs_put_writeback(struct afs_writeback *wb)
-{
- struct afs_vnode *vnode = wb->vnode;
-
- _enter("{%d}", wb->usage);
-
- spin_lock(&vnode->writeback_lock);
- if (--wb->usage == 0)
- afs_unlink_writeback(wb);
- else
- wb = NULL;
- spin_unlock(&vnode->writeback_lock);
- if (wb)
- afs_free_writeback(wb);
-}
-
-/*
* partly or wholly fill a page that's under preparation for writing
*/
static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
@@ -103,7 +49,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
req->pages[0] = page;
get_page(page);
- ret = afs_vnode_fetch_data(vnode, key, req);
+ ret = afs_fetch_data(vnode, key, req);
afs_put_read(req);
if (ret < 0) {
if (ret == -ENOENT) {
@@ -125,42 +71,32 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- struct afs_writeback *candidate, *wb;
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct page *page;
- struct key *key = file->private_data;
- unsigned from = pos & (PAGE_SIZE - 1);
- unsigned to = from + len;
+ struct key *key = afs_file_key(file);
+ unsigned long priv;
+ unsigned f, from = pos & (PAGE_SIZE - 1);
+ unsigned t, to = from + len;
pgoff_t index = pos >> PAGE_SHIFT;
int ret;
_enter("{%x:%u},{%lx},%u,%u",
vnode->fid.vid, vnode->fid.vnode, index, from, to);
- candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
- if (!candidate)
- return -ENOMEM;
- candidate->vnode = vnode;
- candidate->first = candidate->last = index;
- candidate->offset_first = from;
- candidate->to_last = to;
- INIT_LIST_HEAD(&candidate->link);
- candidate->usage = 1;
- candidate->state = AFS_WBACK_PENDING;
- init_waitqueue_head(&candidate->waitq);
+ /* We want to store information about how much of a page is altered in
+ * page->private.
+ */
+ BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8);
page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- kfree(candidate);
+ if (!page)
return -ENOMEM;
- }
if (!PageUptodate(page) && len != PAGE_SIZE) {
ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
if (ret < 0) {
unlock_page(page);
put_page(page);
- kfree(candidate);
_leave(" = %d [prep]", ret);
return ret;
}
@@ -171,79 +107,54 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
try_again:
- spin_lock(&vnode->writeback_lock);
-
- /* see if this page is already pending a writeback under a suitable key
- * - if so we can just join onto that one */
- wb = (struct afs_writeback *) page_private(page);
- if (wb) {
- if (wb->key == key && wb->state == AFS_WBACK_PENDING)
- goto subsume_in_current_wb;
- goto flush_conflicting_wb;
+ /* See if this page is already partially written in a way that we can
+ * merge the new write with.
+ */
+ t = f = 0;
+ if (PagePrivate(page)) {
+ priv = page_private(page);
+ f = priv & AFS_PRIV_MAX;
+ t = priv >> AFS_PRIV_SHIFT;
+ ASSERTCMP(f, <=, t);
}
- if (index > 0) {
- /* see if we can find an already pending writeback that we can
- * append this page to */
- list_for_each_entry(wb, &vnode->writebacks, link) {
- if (wb->last == index - 1 && wb->key == key &&
- wb->state == AFS_WBACK_PENDING)
- goto append_to_previous_wb;
- }
+ if (f != t) {
+ if (to < f || from > t)
+ goto flush_conflicting_write;
+ if (from < f)
+ f = from;
+ if (to > t)
+ t = to;
+ } else {
+ f = from;
+ t = to;
}
- list_add_tail(&candidate->link, &vnode->writebacks);
- candidate->key = key_get(key);
- spin_unlock(&vnode->writeback_lock);
- SetPagePrivate(page);
- set_page_private(page, (unsigned long) candidate);
- _leave(" = 0 [new]");
- return 0;
-
-subsume_in_current_wb:
- _debug("subsume");
- ASSERTRANGE(wb->first, <=, index, <=, wb->last);
- if (index == wb->first && from < wb->offset_first)
- wb->offset_first = from;
- if (index == wb->last && to > wb->to_last)
- wb->to_last = to;
- spin_unlock(&vnode->writeback_lock);
- kfree(candidate);
- _leave(" = 0 [sub]");
- return 0;
-
-append_to_previous_wb:
- _debug("append into %lx-%lx", wb->first, wb->last);
- wb->usage++;
- wb->last++;
- wb->to_last = to;
- spin_unlock(&vnode->writeback_lock);
+ priv = (unsigned long)t << AFS_PRIV_SHIFT;
+ priv |= f;
+ trace_afs_page_dirty(vnode, tracepoint_string("begin"),
+ page->index, priv);
SetPagePrivate(page);
- set_page_private(page, (unsigned long) wb);
- kfree(candidate);
- _leave(" = 0 [app]");
+ set_page_private(page, priv);
+ _leave(" = 0");
return 0;
- /* the page is currently bound to another context, so if it's dirty we
- * need to flush it before we can use the new context */
-flush_conflicting_wb:
+ /* The previous write and this write aren't adjacent or overlapping, so
+ * flush the page out.
+ */
+flush_conflicting_write:
_debug("flush conflict");
- if (wb->state == AFS_WBACK_PENDING)
- wb->state = AFS_WBACK_CONFLICTING;
- spin_unlock(&vnode->writeback_lock);
- if (clear_page_dirty_for_io(page)) {
- ret = afs_write_back_from_locked_page(wb, page);
- if (ret < 0) {
- afs_put_writeback(candidate);
- _leave(" = %d", ret);
- return ret;
- }
+ ret = write_one_page(page);
+ if (ret < 0) {
+ _leave(" = %d", ret);
+ return ret;
}
- /* the page holds a ref on the writeback record */
- afs_put_writeback(wb);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ ret = lock_page_killable(page);
+ if (ret < 0) {
+ _leave(" = %d", ret);
+ return ret;
+ }
goto try_again;
}
@@ -255,7 +166,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- struct key *key = file->private_data;
+ struct key *key = afs_file_key(file);
loff_t i_size, maybe_i_size;
int ret;
@@ -266,11 +177,11 @@ int afs_write_end(struct file *file, struct address_space *mapping,
i_size = i_size_read(&vnode->vfs_inode);
if (maybe_i_size > i_size) {
- spin_lock(&vnode->writeback_lock);
+ spin_lock(&vnode->wb_lock);
i_size = i_size_read(&vnode->vfs_inode);
if (maybe_i_size > i_size)
i_size_write(&vnode->vfs_inode, maybe_i_size);
- spin_unlock(&vnode->writeback_lock);
+ spin_unlock(&vnode->wb_lock);
}
if (!PageUptodate(page)) {
@@ -299,9 +210,10 @@ int afs_write_end(struct file *file, struct address_space *mapping,
/*
* kill all the pages in the given range
*/
-static void afs_kill_pages(struct afs_vnode *vnode, bool error,
+static void afs_kill_pages(struct address_space *mapping,
pgoff_t first, pgoff_t last)
{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct pagevec pv;
unsigned count, loop;
@@ -316,37 +228,157 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
count = last - first + 1;
if (count > PAGEVEC_SIZE)
count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
- first, count, pv.pages);
+ pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
struct page *page = pv.pages[loop];
ClearPageUptodate(page);
- if (error)
- SetPageError(page);
- if (PageWriteback(page))
- end_page_writeback(page);
+ SetPageError(page);
+ end_page_writeback(page);
if (page->index >= first)
first = page->index + 1;
+ lock_page(page);
+ generic_error_remove_page(mapping, page);
}
__pagevec_release(&pv);
- } while (first < last);
+ } while (first <= last);
_leave("");
}
/*
- * synchronously write back the locked page and any subsequent non-locked dirty
- * pages also covered by the same writeback record
+ * Redirty all the pages in a given range.
+ */
+static void afs_redirty_pages(struct writeback_control *wbc,
+ struct address_space *mapping,
+ pgoff_t first, pgoff_t last)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct pagevec pv;
+ unsigned count, loop;
+
+ _enter("{%x:%u},%lx-%lx",
+ vnode->fid.vid, vnode->fid.vnode, first, last);
+
+ pagevec_init(&pv);
+
+ do {
+ _debug("redirty %lx-%lx", first, last);
+
+ count = last - first + 1;
+ if (count > PAGEVEC_SIZE)
+ count = PAGEVEC_SIZE;
+ pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
+ ASSERTCMP(pv.nr, ==, count);
+
+ for (loop = 0; loop < count; loop++) {
+ struct page *page = pv.pages[loop];
+
+ redirty_page_for_writepage(wbc, page);
+ end_page_writeback(page);
+ if (page->index >= first)
+ first = page->index + 1;
+ }
+
+ __pagevec_release(&pv);
+ } while (first <= last);
+
+ _leave("");
+}
+
+/*
+ * write to a file
+ */
+static int afs_store_data(struct address_space *mapping,
+ pgoff_t first, pgoff_t last,
+ unsigned offset, unsigned to)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct afs_fs_cursor fc;
+ struct afs_wb_key *wbk = NULL;
+ struct list_head *p;
+ int ret = -ENOKEY, ret2;
+
+ _enter("%s{%x:%u.%u},%lx,%lx,%x,%x",
+ vnode->volume->name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ first, last, offset, to);
+
+ spin_lock(&vnode->wb_lock);
+ p = vnode->wb_keys.next;
+
+ /* Iterate through the list looking for a valid key to use. */
+try_next_key:
+ while (p != &vnode->wb_keys) {
+ wbk = list_entry(p, struct afs_wb_key, vnode_link);
+ _debug("wbk %u", key_serial(wbk->key));
+ ret2 = key_validate(wbk->key);
+ if (ret2 == 0)
+ goto found_key;
+ if (ret == -ENOKEY)
+ ret = ret2;
+ p = p->next;
+ }
+
+ spin_unlock(&vnode->wb_lock);
+ afs_put_wb_key(wbk);
+ _leave(" = %d [no keys]", ret);
+ return ret;
+
+found_key:
+ refcount_inc(&wbk->usage);
+ spin_unlock(&vnode->wb_lock);
+
+ _debug("USE WB KEY %u", key_serial(wbk->key));
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_store_data(&fc, mapping, first, last, offset, to);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ switch (ret) {
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ _debug("next");
+ spin_lock(&vnode->wb_lock);
+ p = wbk->vnode_link.next;
+ afs_put_wb_key(wbk);
+ goto try_next_key;
+ }
+
+ afs_put_wb_key(wbk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Synchronously write back the locked page and any subsequent non-locked dirty
+ * pages.
*/
-static int afs_write_back_from_locked_page(struct afs_writeback *wb,
- struct page *primary_page)
+static int afs_write_back_from_locked_page(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct page *primary_page,
+ pgoff_t final_page)
{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct page *pages[8], *page;
- unsigned long count;
- unsigned n, offset, to;
+ unsigned long count, priv;
+ unsigned n, offset, to, f, t;
pgoff_t start, first, last;
int loop, ret;
@@ -356,20 +388,33 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
if (test_set_page_writeback(primary_page))
BUG();
- /* find all consecutive lockable dirty pages, stopping when we find a
- * page that is not immediately lockable, is not dirty or is missing,
- * or we reach the end of the range */
+ /* Find all consecutive lockable dirty pages that have contiguous
+ * written regions, stopping when we find a page that is not
+ * immediately lockable, is not dirty or is missing, or we reach the
+ * end of the range.
+ */
start = primary_page->index;
- if (start >= wb->last)
+ priv = page_private(primary_page);
+ offset = priv & AFS_PRIV_MAX;
+ to = priv >> AFS_PRIV_SHIFT;
+ trace_afs_page_dirty(vnode, tracepoint_string("store"),
+ primary_page->index, priv);
+
+ WARN_ON(offset == to);
+ if (offset == to)
+ trace_afs_page_dirty(vnode, tracepoint_string("WARN"),
+ primary_page->index, priv);
+
+ if (start >= final_page || to < PAGE_SIZE)
goto no_more;
+
start++;
do {
_debug("more %lx [%lx]", start, count);
- n = wb->last - start + 1;
+ n = final_page - start + 1;
if (n > ARRAY_SIZE(pages))
n = ARRAY_SIZE(pages);
- n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
- start, n, pages);
+ n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages);
_debug("fgpc %u", n);
if (n == 0)
goto no_more;
@@ -381,16 +426,30 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
}
for (loop = 0; loop < n; loop++) {
+ if (to != PAGE_SIZE)
+ break;
page = pages[loop];
- if (page->index > wb->last)
+ if (page->index > final_page)
break;
if (!trylock_page(page))
break;
- if (!PageDirty(page) ||
- page_private(page) != (unsigned long) wb) {
+ if (!PageDirty(page) || PageWriteback(page)) {
unlock_page(page);
break;
}
+
+ priv = page_private(page);
+ f = priv & AFS_PRIV_MAX;
+ t = priv >> AFS_PRIV_SHIFT;
+ if (f != 0) {
+ unlock_page(page);
+ break;
+ }
+ to = t;
+
+ trace_afs_page_dirty(vnode, tracepoint_string("store+"),
+ page->index, priv);
+
if (!clear_page_dirty_for_io(page))
BUG();
if (test_set_page_writeback(page))
@@ -406,50 +465,55 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
}
start += loop;
- } while (start <= wb->last && count < 65536);
+ } while (start <= final_page && count < 65536);
no_more:
- /* we now have a contiguous set of dirty pages, each with writeback set
- * and the dirty mark cleared; the first page is locked and must remain
- * so, all the rest are unlocked */
+ /* We now have a contiguous set of dirty pages, each with writeback
+ * set; the first page is still locked at this point, but all the rest
+ * have been unlocked.
+ */
+ unlock_page(primary_page);
+
first = primary_page->index;
last = first + count - 1;
- offset = (first == wb->first) ? wb->offset_first : 0;
- to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
-
_debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
- ret = afs_vnode_store_data(wb, first, last, offset, to);
- if (ret < 0) {
- switch (ret) {
- case -EDQUOT:
- case -ENOSPC:
- mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC);
- break;
- case -EROFS:
- case -EIO:
- case -EREMOTEIO:
- case -EFBIG:
- case -ENOENT:
- case -ENOMEDIUM:
- case -ENXIO:
- afs_kill_pages(wb->vnode, true, first, last);
- mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO);
- break;
- case -EACCES:
- case -EPERM:
- case -ENOKEY:
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- case -EKEYREVOKED:
- afs_kill_pages(wb->vnode, false, first, last);
- break;
- default:
- break;
- }
- } else {
+ ret = afs_store_data(mapping, first, last, offset, to);
+ switch (ret) {
+ case 0:
ret = count;
+ break;
+
+ default:
+ pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
+ /* Fall through */
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ afs_redirty_pages(wbc, mapping, first, last);
+ mapping_set_error(mapping, ret);
+ break;
+
+ case -EDQUOT:
+ case -ENOSPC:
+ afs_redirty_pages(wbc, mapping, first, last);
+ mapping_set_error(mapping, -ENOSPC);
+ break;
+
+ case -EROFS:
+ case -EIO:
+ case -EREMOTEIO:
+ case -EFBIG:
+ case -ENOENT:
+ case -ENOMEDIUM:
+ case -ENXIO:
+ afs_kill_pages(mapping, first, last);
+ mapping_set_error(mapping, ret);
+ break;
}
_leave(" = %d", ret);
@@ -462,16 +526,12 @@ no_more:
*/
int afs_writepage(struct page *page, struct writeback_control *wbc)
{
- struct afs_writeback *wb;
int ret;
_enter("{%lx},", page->index);
- wb = (struct afs_writeback *) page_private(page);
- ASSERT(wb != NULL);
-
- ret = afs_write_back_from_locked_page(wb, page);
- unlock_page(page);
+ ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
+ wbc->range_end >> PAGE_SHIFT);
if (ret < 0) {
_leave(" = %d", ret);
return 0;
@@ -490,7 +550,6 @@ static int afs_writepages_region(struct address_space *mapping,
struct writeback_control *wbc,
pgoff_t index, pgoff_t end, pgoff_t *_next)
{
- struct afs_writeback *wb;
struct page *page;
int ret, n;
@@ -509,7 +568,12 @@ static int afs_writepages_region(struct address_space *mapping,
* (changing page->mapping to NULL), or even swizzled back from
* swapper_space to tmpfs file mapping
*/
- lock_page(page);
+ ret = lock_page_killable(page);
+ if (ret < 0) {
+ put_page(page);
+ _leave(" = %d", ret);
+ return ret;
+ }
if (page->mapping != mapping || !PageDirty(page)) {
unlock_page(page);
@@ -525,17 +589,9 @@ static int afs_writepages_region(struct address_space *mapping,
continue;
}
- wb = (struct afs_writeback *) page_private(page);
- ASSERT(wb != NULL);
-
- spin_lock(&wb->vnode->writeback_lock);
- wb->state = AFS_WBACK_WRITING;
- spin_unlock(&wb->vnode->writeback_lock);
-
if (!clear_page_dirty_for_io(page))
BUG();
- ret = afs_write_back_from_locked_page(wb, page);
- unlock_page(page);
+ ret = afs_write_back_from_locked_page(mapping, wbc, page, end);
put_page(page);
if (ret < 0) {
_leave(" = %d", ret);
@@ -591,17 +647,14 @@ int afs_writepages(struct address_space *mapping,
*/
void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
{
- struct afs_writeback *wb = call->wb;
struct pagevec pv;
+ unsigned long priv;
unsigned count, loop;
pgoff_t first = call->first, last = call->last;
- bool free_wb;
_enter("{%x:%u},{%lx-%lx}",
vnode->fid.vid, vnode->fid.vnode, first, last);
- ASSERT(wb != NULL);
-
pagevec_init(&pv);
do {
@@ -610,35 +663,22 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
count = last - first + 1;
if (count > PAGEVEC_SIZE)
count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(call->mapping, first, count,
- pv.pages);
+ pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
+ first, count, pv.pages);
ASSERTCMP(pv.nr, ==, count);
- spin_lock(&vnode->writeback_lock);
for (loop = 0; loop < count; loop++) {
- struct page *page = pv.pages[loop];
- end_page_writeback(page);
- if (page_private(page) == (unsigned long) wb) {
- set_page_private(page, 0);
- ClearPagePrivate(page);
- wb->usage--;
- }
- }
- free_wb = false;
- if (wb->usage == 0) {
- afs_unlink_writeback(wb);
- free_wb = true;
+ priv = page_private(pv.pages[loop]);
+ trace_afs_page_dirty(vnode, tracepoint_string("clear"),
+ pv.pages[loop]->index, priv);
+ set_page_private(pv.pages[loop], 0);
+ end_page_writeback(pv.pages[loop]);
}
- spin_unlock(&vnode->writeback_lock);
first += count;
- if (free_wb) {
- afs_free_writeback(wb);
- wb = NULL;
- }
-
__pagevec_release(&pv);
} while (first <= last);
+ afs_prune_wb_keys(vnode);
_leave("");
}
@@ -670,28 +710,6 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
}
/*
- * flush the vnode to the fileserver
- */
-int afs_writeback_all(struct afs_vnode *vnode)
-{
- struct address_space *mapping = vnode->vfs_inode.i_mapping;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_cyclic = 1,
- };
- int ret;
-
- _enter("");
-
- ret = mapping->a_ops->writepages(mapping, &wbc);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* flush any dirty pages for this process, and check for write errors.
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
@@ -699,61 +717,13 @@ int afs_writeback_all(struct afs_vnode *vnode)
int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file_inode(file);
- struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(inode);
- int ret;
_enter("{%x:%u},{n=%pD},%d",
vnode->fid.vid, vnode->fid.vnode, file,
datasync);
- ret = file_write_and_wait_range(file, start, end);
- if (ret)
- return ret;
- inode_lock(inode);
-
- /* use a writeback record as a marker in the queue - when this reaches
- * the front of the queue, all the outstanding writes are either
- * completed or rejected */
- wb = kzalloc(sizeof(*wb), GFP_KERNEL);
- if (!wb) {
- ret = -ENOMEM;
- goto out;
- }
- wb->vnode = vnode;
- wb->first = 0;
- wb->last = -1;
- wb->offset_first = 0;
- wb->to_last = PAGE_SIZE;
- wb->usage = 1;
- wb->state = AFS_WBACK_SYNCING;
- init_waitqueue_head(&wb->waitq);
-
- spin_lock(&vnode->writeback_lock);
- list_for_each_entry(xwb, &vnode->writebacks, link) {
- if (xwb->state == AFS_WBACK_PENDING)
- xwb->state = AFS_WBACK_CONFLICTING;
- }
- list_add_tail(&wb->link, &vnode->writebacks);
- spin_unlock(&vnode->writeback_lock);
-
- /* push all the outstanding writebacks to the server */
- ret = afs_writeback_all(vnode);
- if (ret < 0) {
- afs_put_writeback(wb);
- _leave(" = %d [wb]", ret);
- goto out;
- }
-
- /* wait for the preceding writes to actually complete */
- ret = wait_event_interruptible(wb->waitq,
- wb->state == AFS_WBACK_COMPLETE ||
- vnode->writebacks.next == &wb->link);
- afs_put_writeback(wb);
- _leave(" = %d", ret);
-out:
- inode_unlock(inode);
- return ret;
+ return file_write_and_wait_range(file, start, end);
}
/*
@@ -774,19 +744,114 @@ int afs_flush(struct file *file, fl_owner_t id)
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
-int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int afs_page_mkwrite(struct vm_fault *vmf)
{
- struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
+ struct file *file = vmf->vma->vm_file;
+ struct inode *inode = file_inode(file);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ unsigned long priv;
_enter("{{%x:%u}},{%lx}",
- vnode->fid.vid, vnode->fid.vnode, page->index);
+ vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
+
+ sb_start_pagefault(inode->i_sb);
- /* wait for the page to be written to the cache before we allow it to
- * be modified */
+ /* Wait for the page to be written to the cache before we allow it to
+ * be modified. We then assume the entire page will need writing back.
+ */
#ifdef CONFIG_AFS_FSCACHE
- fscache_wait_on_page_write(vnode->cache, page);
+ fscache_wait_on_page_write(vnode->cache, vmf->page);
#endif
- _leave(" = 0");
- return 0;
+ if (PageWriteback(vmf->page) &&
+ wait_on_page_bit_killable(vmf->page, PG_writeback) < 0)
+ return VM_FAULT_RETRY;
+
+ if (lock_page_killable(vmf->page) < 0)
+ return VM_FAULT_RETRY;
+
+ /* We mustn't change page->private until writeback is complete as that
+ * details the portion of the page we need to write back and we might
+ * need to redirty the page if there's a problem.
+ */
+ wait_on_page_writeback(vmf->page);
+
+ priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */
+ priv |= 0; /* From */
+ trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"),
+ vmf->page->index, priv);
+ SetPagePrivate(vmf->page);
+ set_page_private(vmf->page, priv);
+
+ sb_end_pagefault(inode->i_sb);
+ return VM_FAULT_LOCKED;
+}
+
+/*
+ * Prune the keys cached for writeback. The caller must hold vnode->wb_lock.
+ */
+void afs_prune_wb_keys(struct afs_vnode *vnode)
+{
+ LIST_HEAD(graveyard);
+ struct afs_wb_key *wbk, *tmp;
+
+ /* Discard unused keys */
+ spin_lock(&vnode->wb_lock);
+
+ if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
+ !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) {
+ list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
+ if (refcount_read(&wbk->usage) == 1)
+ list_move(&wbk->vnode_link, &graveyard);
+ }
+ }
+
+ spin_unlock(&vnode->wb_lock);
+
+ while (!list_empty(&graveyard)) {
+ wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
+ list_del(&wbk->vnode_link);
+ afs_put_wb_key(wbk);
+ }
+}
+
+/*
+ * Clean up a page during invalidation.
+ */
+int afs_launder_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ unsigned long priv;
+ unsigned int f, t;
+ int ret = 0;
+
+ _enter("{%lx}", page->index);
+
+ priv = page_private(page);
+ if (clear_page_dirty_for_io(page)) {
+ f = 0;
+ t = PAGE_SIZE;
+ if (PagePrivate(page)) {
+ f = priv & AFS_PRIV_MAX;
+ t = priv >> AFS_PRIV_SHIFT;
+ }
+
+ trace_afs_page_dirty(vnode, tracepoint_string("launder"),
+ page->index, priv);
+ ret = afs_store_data(mapping, page->index, page->index, t, f);
+ }
+
+ trace_afs_page_dirty(vnode, tracepoint_string("laundered"),
+ page->index, priv);
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+
+#ifdef CONFIG_AFS_FSCACHE
+ if (PageFsCache(page)) {
+ fscache_wait_on_page_write(vnode->cache, page);
+ fscache_uncache_page(vnode->cache, page);
+ }
+#endif
+ return ret;
}
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 2830e4f48d85..cfcc674e64a5 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -45,7 +45,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler,
struct afs_cell *cell = vnode->volume->cell;
size_t namelen;
- namelen = strlen(cell->name);
+ namelen = cell->name_len;
if (size == 0)
return namelen;
if (namelen > size)
@@ -96,7 +96,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler,
void *buffer, size_t size)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
- const char *volname = vnode->volume->vlocation->vldb.name;
+ const char *volname = vnode->volume->name;
size_t namelen;
namelen = strlen(volname);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 673ac4e01dd0..7208ecef7088 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3992,16 +3992,9 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
btrfs_put_block_group(bg);
}
-static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
-{
- schedule();
- return 0;
-}
-
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
{
- wait_on_atomic_t(&bg->nocow_writers,
- btrfs_wait_nocow_writers_atomic_t,
+ wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
}
@@ -6530,12 +6523,6 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(bg);
}
-static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
-{
- schedule();
- return 0;
-}
-
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
{
struct btrfs_space_info *space_info = bg->space_info;
@@ -6558,8 +6545,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
down_write(&space_info->groups_sem);
up_write(&space_info->groups_sem);
- wait_on_atomic_t(&bg->reservations,
- btrfs_wait_bg_reservations_atomic_t,
+ wait_on_atomic_t(&bg->reservations, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
}
@@ -11059,12 +11045,6 @@ int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
return 1;
}
-static int wait_snapshotting_atomic_t(atomic_t *a)
-{
- schedule();
- return 0;
-}
-
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
{
while (true) {
@@ -11073,8 +11053,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
ret = btrfs_start_write_no_snapshotting(root);
if (ret)
break;
- wait_on_atomic_t(&root->will_be_snapshotted,
- wait_snapshotting_atomic_t,
+ wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
}
}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 40d61077bead..ff84258132bb 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -558,7 +558,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
* have completed.
*/
if (!atomic_dec_and_test(&cookie->n_active))
- wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
+ wait_on_atomic_t(&cookie->n_active, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
/* Make sure any pending writes are cancelled. */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 97ec45110957..0ff4b49a0037 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
}
-extern int fscache_wait_atomic_t(atomic_t *);
-
/*
* object.c
*/
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index b39d487ccfb0..249968dcbf5c 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -195,12 +195,3 @@ static void __exit fscache_exit(void)
}
module_exit(fscache_exit);
-
-/*
- * wait_on_atomic_t() sleep function for uninterruptible waiting
- */
-int fscache_wait_atomic_t(atomic_t *p)
-{
- schedule();
- return 0;
-}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 134d9f560240..1629056aa2c9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -85,9 +85,9 @@ int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
-int nfs_wait_atomic_killable(atomic_t *p)
+int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode)
{
- return nfs_wait_killable(TASK_KILLABLE);
+ return nfs_wait_killable(mode);
}
/**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f9a4a5524bd5..5ab17fd4700a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -388,7 +388,7 @@ extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
-extern int nfs_wait_atomic_killable(atomic_t *p);
+extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
/* super.c */
extern const struct super_operations nfs_sops;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 2cabbcf2f28e..e87279e49ba3 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -129,19 +129,13 @@ static struct kobj_attribute ocfs2_attr_filecheck_set =
ocfs2_filecheck_show,
ocfs2_filecheck_store);
-static int ocfs2_filecheck_sysfs_wait(atomic_t *p)
-{
- schedule();
- return 0;
-}
-
static void
ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
{
struct ocfs2_filecheck_entry *p;
if (!atomic_dec_and_test(&entry->fs_count))
- wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait,
+ wait_on_atomic_t(&entry->fs_count, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
spin_lock(&entry->fs_fcheck->fc_lock);