diff options
author | David Howells <dhowells@redhat.com> | 2018-10-20 02:57:59 +0300 |
---|---|---|
committer | David Howells <dhowells@redhat.com> | 2018-10-24 02:41:09 +0300 |
commit | 3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e (patch) | |
tree | df215e6a6ad11b6ac8158461144667e168591d28 /fs/afs/vl_rotate.c | |
parent | 18ac61853cc4e44eb30e125fc8344a3b25c7b6fe (diff) | |
download | linux-3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e.tar.xz |
afs: Probe multiple fileservers simultaneously
Send probes to all the unprobed fileservers in a fileserver list on all
addresses simultaneously in an attempt to find out the fastest route whilst
not getting stuck for 20s on any server or address that we don't get a
reply from.
This alleviates the problem whereby attempting to access a new server can
take a long time because the rotation algorithm ends up rotating through
all servers and addresses until it finds one that responds.
Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs/vl_rotate.c')
-rw-r--r-- | fs/afs/vl_rotate.c | 159 |
1 files changed, 105 insertions, 54 deletions
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index ead6dedbb561..b64a284b99d2 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -58,8 +58,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) if (!vc->server_list || !vc->server_list->nr_servers) return false; - vc->start = READ_ONCE(vc->server_list->index); - vc->index = vc->start; + vc->untried = (1UL << vc->server_list->nr_servers) - 1; + vc->index = -1; return true; } @@ -71,11 +71,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) { struct afs_addr_list *alist; struct afs_vlserver *vlserver; - int error = vc->ac.error; + u32 rtt; + int error = vc->ac.error, abort_code, i; - _enter("%u/%u,%u/%u,%d,%d", - vc->index, vc->start, - vc->ac.index, vc->ac.start, + _enter("%lx[%d],%lx[%d],%d,%d", + vc->untried, vc->index, + vc->ac.tried, vc->ac.index, error, vc->ac.abort_code); if (vc->flags & AFS_VL_CURSOR_STOP) { @@ -145,23 +146,52 @@ restart_from_beginning: start: _debug("start"); - /* TODO: Consider checking the VL server list */ - if (!afs_start_vl_iteration(vc)) goto failed; -use_server: - _debug("use"); + error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); + if (error < 0) + goto failed_set_error; + +pick_server: + _debug("pick [%lx]", vc->untried); + + error = afs_wait_for_vl_probes(vc->server_list, vc->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. */ + vc->index = vc->server_list->preferred; + if (test_bit(vc->index, &vc->untried)) + goto selected_server; + + vc->index = -1; + rtt = U32_MAX; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + + if (!test_bit(i, &vc->untried) || !s->probe.responded) + continue; + if (s->probe.rtt < rtt) { + vc->index = i; + rtt = s->probe.rtt; + } + } + + if (vc->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", vc->index); + __clear_bit(vc->index, &vc->untried); + /* We're starting on a different vlserver from the list. We need to * check it, find its address list and probe its capabilities before we * use it. */ ASSERTCMP(vc->ac.alist, ==, NULL); vlserver = vc->server_list->servers[vc->index].server; - - // TODO: Check the vlserver occasionally - //if (!afs_check_vlserver_record(vc, vlserver)) - // goto failed; + vc->server = vlserver; _debug("USING VLSERVER: %s", vlserver->name); @@ -173,62 +203,84 @@ use_server: memset(&vc->ac, 0, sizeof(vc->ac)); - /* Probe the current vlserver if we haven't done so yet. */ -#if 0 // TODO - if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) { - vc->ac.alist = afs_get_addrlist(alist); - - if (!afs_probe_vlserver(vc)) { - error = vc->ac.error; - switch (error) { - case -ENOMEM: - case -ERESTARTSYS: - case -EINTR: - goto failed_set_error; - default: - goto next_server; - } - } - } -#endif - if (!vc->ac.alist) vc->ac.alist = alist; else afs_put_addrlist(alist); - vc->ac.start = READ_ONCE(alist->index); - vc->ac.index = vc->ac.start; + vc->ac.index = -1; iterate_address: ASSERT(vc->ac.alist); - _debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ if (!afs_iterate_addresses(&vc->ac)) goto next_server; + _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); return true; next_server: _debug("next"); afs_end_cursor(&vc->ac); - vc->index++; - if (vc->index >= vc->server_list->nr_servers) - vc->index = 0; - if (vc->index != vc->start) - goto use_server; + goto pick_server; +no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ if (vc->flags & AFS_VL_CURSOR_RETRY) goto restart_from_beginning; - goto failed; + abort_code = 0; + error = -EDESTADDRREQ; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + int probe_error = READ_ONCE(s->probe.error); + + switch (probe_error) { + case 0: + continue; + default: + if (error == -ETIMEDOUT || + error == -ETIME) + continue; + case -ETIMEDOUT: + case -ETIME: + if (error == -ENOMEM || + error == -ENONET) + continue; + case -ENOMEM: + case -ENONET: + if (error == -ENETUNREACH) + continue; + case -ENETUNREACH: + if (error == -EHOSTUNREACH) + continue; + case -EHOSTUNREACH: + if (error == -ECONNREFUSED) + continue; + case -ECONNREFUSED: + if (error == -ECONNRESET) + continue; + case -ECONNRESET: /* Responded, but call expired. */ + if (error == -ECONNABORTED) + continue; + case -ECONNABORTED: + abort_code = s->probe.abort_code; + error = probe_error; + continue; + } + } + + if (error == -ECONNABORTED) + error = afs_abort_to_error(abort_code); +failed_set_error: + vc->error = error; failed: vc->flags |= AFS_VL_CURSOR_STOP; afs_end_cursor(&vc->ac); @@ -250,8 +302,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); - pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n", - vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error); + pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", + vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); if (vc->server_list) { const struct afs_vlserver_list *sl = vc->server_list; @@ -259,26 +311,25 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) sl->nr_servers, sl->index); for (i = 0; i < sl->nr_servers; i++) { const struct afs_vlserver *s = sl->servers[i].server; - pr_notice("VC: server fl=%lx %s+%hu\n", - s->flags, s->name, s->port); + pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", + s->name, s->port, s->flags, s->probe.error); if (s->addresses) { const struct afs_addr_list *a = rcu_dereference(s->addresses); - pr_notice("VC: - av=%u nr=%u/%u/%u ax=%u\n", - a->version, + pr_notice("VC: - nr=%u/%u/%u pf=%u\n", a->nr_ipv4, a->nr_addrs, a->max_addrs, - a->index); - pr_notice("VC: - pr=%lx yf=%lx\n", - a->probed, a->yfs); + a->preferred); + pr_notice("VC: - pr=%lx R=%lx F=%lx\n", + a->probed, a->responded, a->failed); if (a == vc->ac.alist) pr_notice("VC: - current\n"); } } } - pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n", - vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error, - vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations); + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, + vc->ac.responded, vc->ac.nr_iterations); rcu_read_unlock(); } |