/* Handle vlserver selection and rotation. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include "internal.h" #include "afs_vl.h" /* * Begin an operation on a volume location server. */ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, struct key *key) { memset(vc, 0, sizeof(*vc)); vc->cell = cell; vc->key = key; vc->error = -EDESTADDRREQ; vc->ac.error = SHRT_MAX; if (signal_pending(current)) { vc->error = -EINTR; vc->flags |= AFS_VL_CURSOR_STOP; return false; } return true; } /* * Begin iteration through a server list, starting with the last used server if * possible, or the last recorded good server if not. */ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) { struct afs_cell *cell = vc->cell; if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, TASK_INTERRUPTIBLE)) { vc->error = -ERESTARTSYS; return false; } read_lock(&cell->vl_servers_lock); vc->server_list = afs_get_vlserverlist( rcu_dereference_protected(cell->vl_servers, lockdep_is_held(&cell->vl_servers_lock))); read_unlock(&cell->vl_servers_lock); if (!vc->server_list || !vc->server_list->nr_servers) return false; vc->untried = (1UL << vc->server_list->nr_servers) - 1; vc->index = -1; return true; } /* * Select the vlserver to use. May be called multiple times to rotate * through the vlservers. */ bool afs_select_vlserver(struct afs_vl_cursor *vc) { struct afs_addr_list *alist; struct afs_vlserver *vlserver; struct afs_error e; u32 rtt; int error = vc->ac.error, i; _enter("%lx[%d],%lx[%d],%d,%d", vc->untried, vc->index, vc->ac.tried, vc->ac.index, error, vc->ac.abort_code); if (vc->flags & AFS_VL_CURSOR_STOP) { _leave(" = f [stopped]"); return false; } vc->nr_iterations++; /* Evaluate the result of the previous operation, if there was one. */ switch (error) { case SHRT_MAX: goto start; default: case 0: /* Success or local failure. Stop. */ vc->error = error; vc->flags |= AFS_VL_CURSOR_STOP; _leave(" = f [okay/local %d]", vc->ac.error); return false; case -ECONNABORTED: /* The far side rejected the operation on some grounds. This * might involve the server being busy or the volume having been moved. */ switch (vc->ac.abort_code) { case AFSVL_IO: case AFSVL_BADVOLOPER: case AFSVL_NOMEM: /* The server went weird. */ vc->error = -EREMOTEIO; //write_lock(&vc->cell->vl_servers_lock); //vc->server_list->weird_mask |= 1 << vc->index; //write_unlock(&vc->cell->vl_servers_lock); goto next_server; default: vc->error = afs_abort_to_error(vc->ac.abort_code); goto failed; } case -ERFKILL: case -EADDRNOTAVAIL: case -ENETUNREACH: case -EHOSTUNREACH: case -EHOSTDOWN: case -ECONNREFUSED: case -ETIMEDOUT: case -ETIME: _debug("no conn %d", error); vc->error = error; goto iterate_address; case -ECONNRESET: _debug("call reset"); vc->error = error; vc->flags |= AFS_VL_CURSOR_RETRY; goto next_server; } restart_from_beginning: _debug("restart"); afs_end_cursor(&vc->ac); afs_put_vlserverlist(vc->cell->net, vc->server_list); vc->server_list = NULL; if (vc->flags & AFS_VL_CURSOR_RETRIED) goto failed; vc->flags |= AFS_VL_CURSOR_RETRIED; start: _debug("start"); if (!afs_start_vl_iteration(vc)) goto failed; error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); if (error < 0) goto failed_set_error; pick_server: _debug("pick [%lx]", vc->untried); error = afs_wait_for_vl_probes(vc->server_list, vc->untried); if (error < 0) goto failed_set_error; /* Pick the untried server with the lowest RTT. */ vc->index = vc->server_list->preferred; if (test_bit(vc->index, &vc->untried)) goto selected_server; vc->index = -1; rtt = U32_MAX; for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; if (!test_bit(i, &vc->untried) || !s->probe.responded) continue; if (s->probe.rtt < rtt) { vc->index = i; rtt = s->probe.rtt; } } if (vc->index == -1) goto no_more_servers; selected_server: _debug("use %d", vc->index); __clear_bit(vc->index, &vc->untried); /* We're starting on a different vlserver from the list. We need to * check it, find its address list and probe its capabilities before we * use it. */ ASSERTCMP(vc->ac.alist, ==, NULL); vlserver = vc->server_list->servers[vc->index].server; vc->server = vlserver; _debug("USING VLSERVER: %s", vlserver->name); read_lock(&vlserver->lock); alist = rcu_dereference_protected(vlserver->addresses, lockdep_is_held(&vlserver->lock)); afs_get_addrlist(alist); read_unlock(&vlserver->lock); memset(&vc->ac, 0, sizeof(vc->ac)); if (!vc->ac.alist) vc->ac.alist = alist; else afs_put_addrlist(alist); vc->ac.index = -1; iterate_address: ASSERT(vc->ac.alist); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ if (!afs_iterate_addresses(&vc->ac)) goto next_server; _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); return true; next_server: _debug("next"); afs_end_cursor(&vc->ac); goto pick_server; no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ if (vc->flags & AFS_VL_CURSOR_RETRY) goto restart_from_beginning; e.error = -EDESTADDRREQ; e.responded = false; for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; afs_prioritise_error(&e, READ_ONCE(s->probe.error), s->probe.abort_code); } failed_set_error: vc->error = error; failed: vc->flags |= AFS_VL_CURSOR_STOP; afs_end_cursor(&vc->ac); _leave(" = f [failed %d]", vc->error); return false; } /* * Dump cursor state in the case of the error being EDESTADDRREQ. */ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) { static int count; int i; if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) return; count++; rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); if (vc->server_list) { const struct afs_vlserver_list *sl = vc->server_list; pr_notice("VC: SL nr=%u ix=%u\n", sl->nr_servers, sl->index); for (i = 0; i < sl->nr_servers; i++) { const struct afs_vlserver *s = sl->servers[i].server; pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", s->name, s->port, s->flags, s->probe.error); if (s->addresses) { const struct afs_addr_list *a = rcu_dereference(s->addresses); pr_notice("VC: - nr=%u/%u/%u pf=%u\n", a->nr_ipv4, a->nr_addrs, a->max_addrs, a->preferred); pr_notice("VC: - pr=%lx R=%lx F=%lx\n", a->probed, a->responded, a->failed); if (a == vc->ac.alist) pr_notice("VC: - current\n"); } } } pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, vc->ac.responded, vc->ac.nr_iterations); rcu_read_unlock(); } /* * Tidy up a volume location server cursor and unlock the vnode. */ int afs_end_vlserver_operation(struct afs_vl_cursor *vc) { struct afs_net *net = vc->cell->net; if (vc->error == -EDESTADDRREQ || vc->error == -EADDRNOTAVAIL || vc->error == -ENETUNREACH || vc->error == -EHOSTUNREACH) afs_vl_dump_edestaddrreq(vc); afs_end_cursor(&vc->ac); afs_put_vlserverlist(net, vc->server_list); if (vc->error == -ECONNABORTED) vc->error = afs_abort_to_error(vc->ac.abort_code); return vc->error; }