1 /* Handle vlserver selection and rotation. 2 * 3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/sched/signal.h> 15 #include "internal.h" 16 #include "afs_vl.h" 17 18 /* 19 * Begin an operation on a volume location server. 20 */ 21 bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, 22 struct key *key) 23 { 24 memset(vc, 0, sizeof(*vc)); 25 vc->cell = cell; 26 vc->key = key; 27 vc->error = -EDESTADDRREQ; 28 vc->ac.error = SHRT_MAX; 29 30 if (signal_pending(current)) { 31 vc->error = -EINTR; 32 vc->flags |= AFS_VL_CURSOR_STOP; 33 return false; 34 } 35 36 return true; 37 } 38 39 /* 40 * Begin iteration through a server list, starting with the last used server if 41 * possible, or the last recorded good server if not. 42 */ 43 static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) 44 { 45 struct afs_cell *cell = vc->cell; 46 47 if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, 48 TASK_INTERRUPTIBLE)) { 49 vc->error = -ERESTARTSYS; 50 return false; 51 } 52 53 read_lock(&cell->vl_servers_lock); 54 vc->server_list = afs_get_vlserverlist( 55 rcu_dereference_protected(cell->vl_servers, 56 lockdep_is_held(&cell->vl_servers_lock))); 57 read_unlock(&cell->vl_servers_lock); 58 if (!vc->server_list || !vc->server_list->nr_servers) 59 return false; 60 61 vc->untried = (1UL << vc->server_list->nr_servers) - 1; 62 vc->index = -1; 63 return true; 64 } 65 66 /* 67 * Select the vlserver to use. May be called multiple times to rotate 68 * through the vlservers. 69 */ 70 bool afs_select_vlserver(struct afs_vl_cursor *vc) 71 { 72 struct afs_addr_list *alist; 73 struct afs_vlserver *vlserver; 74 u32 rtt; 75 int error = vc->ac.error, abort_code, i; 76 77 _enter("%lx[%d],%lx[%d],%d,%d", 78 vc->untried, vc->index, 79 vc->ac.tried, vc->ac.index, 80 error, vc->ac.abort_code); 81 82 if (vc->flags & AFS_VL_CURSOR_STOP) { 83 _leave(" = f [stopped]"); 84 return false; 85 } 86 87 vc->nr_iterations++; 88 89 /* Evaluate the result of the previous operation, if there was one. */ 90 switch (error) { 91 case SHRT_MAX: 92 goto start; 93 94 default: 95 case 0: 96 /* Success or local failure. Stop. */ 97 vc->error = error; 98 vc->flags |= AFS_VL_CURSOR_STOP; 99 _leave(" = f [okay/local %d]", vc->ac.error); 100 return false; 101 102 case -ECONNABORTED: 103 /* The far side rejected the operation on some grounds. This 104 * might involve the server being busy or the volume having been moved. 105 */ 106 switch (vc->ac.abort_code) { 107 case AFSVL_IO: 108 case AFSVL_BADVOLOPER: 109 case AFSVL_NOMEM: 110 /* The server went weird. */ 111 vc->error = -EREMOTEIO; 112 //write_lock(&vc->cell->vl_servers_lock); 113 //vc->server_list->weird_mask |= 1 << vc->index; 114 //write_unlock(&vc->cell->vl_servers_lock); 115 goto next_server; 116 117 default: 118 vc->error = afs_abort_to_error(vc->ac.abort_code); 119 goto failed; 120 } 121 122 case -ENETUNREACH: 123 case -EHOSTUNREACH: 124 case -ECONNREFUSED: 125 case -ETIMEDOUT: 126 case -ETIME: 127 _debug("no conn %d", error); 128 vc->error = error; 129 goto iterate_address; 130 131 case -ECONNRESET: 132 _debug("call reset"); 133 vc->error = error; 134 vc->flags |= AFS_VL_CURSOR_RETRY; 135 goto next_server; 136 } 137 138 restart_from_beginning: 139 _debug("restart"); 140 afs_end_cursor(&vc->ac); 141 afs_put_vlserverlist(vc->cell->net, vc->server_list); 142 vc->server_list = NULL; 143 if (vc->flags & AFS_VL_CURSOR_RETRIED) 144 goto failed; 145 vc->flags |= AFS_VL_CURSOR_RETRIED; 146 start: 147 _debug("start"); 148 149 if (!afs_start_vl_iteration(vc)) 150 goto failed; 151 152 error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); 153 if (error < 0) 154 goto failed_set_error; 155 156 pick_server: 157 _debug("pick [%lx]", vc->untried); 158 159 error = afs_wait_for_vl_probes(vc->server_list, vc->untried); 160 if (error < 0) 161 goto failed_set_error; 162 163 /* Pick the untried server with the lowest RTT. */ 164 vc->index = vc->server_list->preferred; 165 if (test_bit(vc->index, &vc->untried)) 166 goto selected_server; 167 168 vc->index = -1; 169 rtt = U32_MAX; 170 for (i = 0; i < vc->server_list->nr_servers; i++) { 171 struct afs_vlserver *s = vc->server_list->servers[i].server; 172 173 if (!test_bit(i, &vc->untried) || !s->probe.responded) 174 continue; 175 if (s->probe.rtt < rtt) { 176 vc->index = i; 177 rtt = s->probe.rtt; 178 } 179 } 180 181 if (vc->index == -1) 182 goto no_more_servers; 183 184 selected_server: 185 _debug("use %d", vc->index); 186 __clear_bit(vc->index, &vc->untried); 187 188 /* We're starting on a different vlserver from the list. We need to 189 * check it, find its address list and probe its capabilities before we 190 * use it. 191 */ 192 ASSERTCMP(vc->ac.alist, ==, NULL); 193 vlserver = vc->server_list->servers[vc->index].server; 194 vc->server = vlserver; 195 196 _debug("USING VLSERVER: %s", vlserver->name); 197 198 read_lock(&vlserver->lock); 199 alist = rcu_dereference_protected(vlserver->addresses, 200 lockdep_is_held(&vlserver->lock)); 201 afs_get_addrlist(alist); 202 read_unlock(&vlserver->lock); 203 204 memset(&vc->ac, 0, sizeof(vc->ac)); 205 206 if (!vc->ac.alist) 207 vc->ac.alist = alist; 208 else 209 afs_put_addrlist(alist); 210 211 vc->ac.index = -1; 212 213 iterate_address: 214 ASSERT(vc->ac.alist); 215 /* Iterate over the current server's address list to try and find an 216 * address on which it will respond to us. 217 */ 218 if (!afs_iterate_addresses(&vc->ac)) 219 goto next_server; 220 221 _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); 222 223 _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); 224 return true; 225 226 next_server: 227 _debug("next"); 228 afs_end_cursor(&vc->ac); 229 goto pick_server; 230 231 no_more_servers: 232 /* That's all the servers poked to no good effect. Try again if some 233 * of them were busy. 234 */ 235 if (vc->flags & AFS_VL_CURSOR_RETRY) 236 goto restart_from_beginning; 237 238 abort_code = 0; 239 error = -EDESTADDRREQ; 240 for (i = 0; i < vc->server_list->nr_servers; i++) { 241 struct afs_vlserver *s = vc->server_list->servers[i].server; 242 int probe_error = READ_ONCE(s->probe.error); 243 244 switch (probe_error) { 245 case 0: 246 continue; 247 default: 248 if (error == -ETIMEDOUT || 249 error == -ETIME) 250 continue; 251 case -ETIMEDOUT: 252 case -ETIME: 253 if (error == -ENOMEM || 254 error == -ENONET) 255 continue; 256 case -ENOMEM: 257 case -ENONET: 258 if (error == -ENETUNREACH) 259 continue; 260 case -ENETUNREACH: 261 if (error == -EHOSTUNREACH) 262 continue; 263 case -EHOSTUNREACH: 264 if (error == -ECONNREFUSED) 265 continue; 266 case -ECONNREFUSED: 267 if (error == -ECONNRESET) 268 continue; 269 case -ECONNRESET: /* Responded, but call expired. */ 270 if (error == -ECONNABORTED) 271 continue; 272 case -ECONNABORTED: 273 abort_code = s->probe.abort_code; 274 error = probe_error; 275 continue; 276 } 277 } 278 279 if (error == -ECONNABORTED) 280 error = afs_abort_to_error(abort_code); 281 282 failed_set_error: 283 vc->error = error; 284 failed: 285 vc->flags |= AFS_VL_CURSOR_STOP; 286 afs_end_cursor(&vc->ac); 287 _leave(" = f [failed %d]", vc->error); 288 return false; 289 } 290 291 /* 292 * Dump cursor state in the case of the error being EDESTADDRREQ. 293 */ 294 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) 295 { 296 static int count; 297 int i; 298 299 if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) 300 return; 301 count++; 302 303 rcu_read_lock(); 304 pr_notice("EDESTADDR occurred\n"); 305 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", 306 vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); 307 308 if (vc->server_list) { 309 const struct afs_vlserver_list *sl = vc->server_list; 310 pr_notice("VC: SL nr=%u ix=%u\n", 311 sl->nr_servers, sl->index); 312 for (i = 0; i < sl->nr_servers; i++) { 313 const struct afs_vlserver *s = sl->servers[i].server; 314 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", 315 s->name, s->port, s->flags, s->probe.error); 316 if (s->addresses) { 317 const struct afs_addr_list *a = 318 rcu_dereference(s->addresses); 319 pr_notice("VC: - nr=%u/%u/%u pf=%u\n", 320 a->nr_ipv4, a->nr_addrs, a->max_addrs, 321 a->preferred); 322 pr_notice("VC: - pr=%lx R=%lx F=%lx\n", 323 a->probed, a->responded, a->failed); 324 if (a == vc->ac.alist) 325 pr_notice("VC: - current\n"); 326 } 327 } 328 } 329 330 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", 331 vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, 332 vc->ac.responded, vc->ac.nr_iterations); 333 rcu_read_unlock(); 334 } 335 336 /* 337 * Tidy up a volume location server cursor and unlock the vnode. 338 */ 339 int afs_end_vlserver_operation(struct afs_vl_cursor *vc) 340 { 341 struct afs_net *net = vc->cell->net; 342 343 if (vc->error == -EDESTADDRREQ || 344 vc->error == -ENETUNREACH || 345 vc->error == -EHOSTUNREACH) 346 afs_vl_dump_edestaddrreq(vc); 347 348 afs_end_cursor(&vc->ac); 349 afs_put_vlserverlist(net, vc->server_list); 350 351 if (vc->error == -ECONNABORTED) 352 vc->error = afs_abort_to_error(vc->ac.abort_code); 353 354 return vc->error; 355 } 356