1 /* Handle vlserver selection and rotation. 2 * 3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/sched/signal.h> 15 #include "internal.h" 16 #include "afs_vl.h" 17 18 /* 19 * Begin an operation on a volume location server. 20 */ 21 bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, 22 struct key *key) 23 { 24 memset(vc, 0, sizeof(*vc)); 25 vc->cell = cell; 26 vc->key = key; 27 vc->error = -EDESTADDRREQ; 28 vc->ac.error = SHRT_MAX; 29 30 if (signal_pending(current)) { 31 vc->error = -EINTR; 32 vc->flags |= AFS_VL_CURSOR_STOP; 33 return false; 34 } 35 36 return true; 37 } 38 39 /* 40 * Begin iteration through a server list, starting with the last used server if 41 * possible, or the last recorded good server if not. 42 */ 43 static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) 44 { 45 struct afs_cell *cell = vc->cell; 46 unsigned int dns_lookup_count; 47 48 if (cell->dns_source == DNS_RECORD_UNAVAILABLE || 49 cell->dns_expiry <= ktime_get_real_seconds()) { 50 dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); 51 set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); 52 queue_work(afs_wq, &cell->manager); 53 54 if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 55 if (wait_var_event_interruptible( 56 &cell->dns_lookup_count, 57 smp_load_acquire(&cell->dns_lookup_count) 58 != dns_lookup_count) < 0) { 59 vc->error = -ERESTARTSYS; 60 return false; 61 } 62 } 63 64 /* Status load is ordered after lookup counter load */ 65 if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 66 vc->error = -EDESTADDRREQ; 67 return false; 68 } 69 } 70 71 read_lock(&cell->vl_servers_lock); 72 vc->server_list = afs_get_vlserverlist( 73 rcu_dereference_protected(cell->vl_servers, 74 lockdep_is_held(&cell->vl_servers_lock))); 75 read_unlock(&cell->vl_servers_lock); 76 if (!vc->server_list->nr_servers) 77 return false; 78 79 vc->untried = (1UL << vc->server_list->nr_servers) - 1; 80 vc->index = -1; 81 return true; 82 } 83 84 /* 85 * Select the vlserver to use. May be called multiple times to rotate 86 * through the vlservers. 87 */ 88 bool afs_select_vlserver(struct afs_vl_cursor *vc) 89 { 90 struct afs_addr_list *alist; 91 struct afs_vlserver *vlserver; 92 struct afs_error e; 93 u32 rtt; 94 int error = vc->ac.error, i; 95 96 _enter("%lx[%d],%lx[%d],%d,%d", 97 vc->untried, vc->index, 98 vc->ac.tried, vc->ac.index, 99 error, vc->ac.abort_code); 100 101 if (vc->flags & AFS_VL_CURSOR_STOP) { 102 _leave(" = f [stopped]"); 103 return false; 104 } 105 106 vc->nr_iterations++; 107 108 /* Evaluate the result of the previous operation, if there was one. */ 109 switch (error) { 110 case SHRT_MAX: 111 goto start; 112 113 default: 114 case 0: 115 /* Success or local failure. Stop. */ 116 vc->error = error; 117 vc->flags |= AFS_VL_CURSOR_STOP; 118 _leave(" = f [okay/local %d]", vc->ac.error); 119 return false; 120 121 case -ECONNABORTED: 122 /* The far side rejected the operation on some grounds. This 123 * might involve the server being busy or the volume having been moved. 124 */ 125 switch (vc->ac.abort_code) { 126 case AFSVL_IO: 127 case AFSVL_BADVOLOPER: 128 case AFSVL_NOMEM: 129 /* The server went weird. */ 130 vc->error = -EREMOTEIO; 131 //write_lock(&vc->cell->vl_servers_lock); 132 //vc->server_list->weird_mask |= 1 << vc->index; 133 //write_unlock(&vc->cell->vl_servers_lock); 134 goto next_server; 135 136 default: 137 vc->error = afs_abort_to_error(vc->ac.abort_code); 138 goto failed; 139 } 140 141 case -ERFKILL: 142 case -EADDRNOTAVAIL: 143 case -ENETUNREACH: 144 case -EHOSTUNREACH: 145 case -EHOSTDOWN: 146 case -ECONNREFUSED: 147 case -ETIMEDOUT: 148 case -ETIME: 149 _debug("no conn %d", error); 150 vc->error = error; 151 goto iterate_address; 152 153 case -ECONNRESET: 154 _debug("call reset"); 155 vc->error = error; 156 vc->flags |= AFS_VL_CURSOR_RETRY; 157 goto next_server; 158 } 159 160 restart_from_beginning: 161 _debug("restart"); 162 afs_end_cursor(&vc->ac); 163 afs_put_vlserverlist(vc->cell->net, vc->server_list); 164 vc->server_list = NULL; 165 if (vc->flags & AFS_VL_CURSOR_RETRIED) 166 goto failed; 167 vc->flags |= AFS_VL_CURSOR_RETRIED; 168 start: 169 _debug("start"); 170 171 if (!afs_start_vl_iteration(vc)) 172 goto failed; 173 174 error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); 175 if (error < 0) 176 goto failed_set_error; 177 178 pick_server: 179 _debug("pick [%lx]", vc->untried); 180 181 error = afs_wait_for_vl_probes(vc->server_list, vc->untried); 182 if (error < 0) 183 goto failed_set_error; 184 185 /* Pick the untried server with the lowest RTT. */ 186 vc->index = vc->server_list->preferred; 187 if (test_bit(vc->index, &vc->untried)) 188 goto selected_server; 189 190 vc->index = -1; 191 rtt = U32_MAX; 192 for (i = 0; i < vc->server_list->nr_servers; i++) { 193 struct afs_vlserver *s = vc->server_list->servers[i].server; 194 195 if (!test_bit(i, &vc->untried) || !s->probe.responded) 196 continue; 197 if (s->probe.rtt < rtt) { 198 vc->index = i; 199 rtt = s->probe.rtt; 200 } 201 } 202 203 if (vc->index == -1) 204 goto no_more_servers; 205 206 selected_server: 207 _debug("use %d", vc->index); 208 __clear_bit(vc->index, &vc->untried); 209 210 /* We're starting on a different vlserver from the list. We need to 211 * check it, find its address list and probe its capabilities before we 212 * use it. 213 */ 214 ASSERTCMP(vc->ac.alist, ==, NULL); 215 vlserver = vc->server_list->servers[vc->index].server; 216 vc->server = vlserver; 217 218 _debug("USING VLSERVER: %s", vlserver->name); 219 220 read_lock(&vlserver->lock); 221 alist = rcu_dereference_protected(vlserver->addresses, 222 lockdep_is_held(&vlserver->lock)); 223 afs_get_addrlist(alist); 224 read_unlock(&vlserver->lock); 225 226 memset(&vc->ac, 0, sizeof(vc->ac)); 227 228 if (!vc->ac.alist) 229 vc->ac.alist = alist; 230 else 231 afs_put_addrlist(alist); 232 233 vc->ac.index = -1; 234 235 iterate_address: 236 ASSERT(vc->ac.alist); 237 /* Iterate over the current server's address list to try and find an 238 * address on which it will respond to us. 239 */ 240 if (!afs_iterate_addresses(&vc->ac)) 241 goto next_server; 242 243 _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); 244 245 _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); 246 return true; 247 248 next_server: 249 _debug("next"); 250 afs_end_cursor(&vc->ac); 251 goto pick_server; 252 253 no_more_servers: 254 /* That's all the servers poked to no good effect. Try again if some 255 * of them were busy. 256 */ 257 if (vc->flags & AFS_VL_CURSOR_RETRY) 258 goto restart_from_beginning; 259 260 e.error = -EDESTADDRREQ; 261 e.responded = false; 262 for (i = 0; i < vc->server_list->nr_servers; i++) { 263 struct afs_vlserver *s = vc->server_list->servers[i].server; 264 265 afs_prioritise_error(&e, READ_ONCE(s->probe.error), 266 s->probe.abort_code); 267 } 268 269 failed_set_error: 270 vc->error = error; 271 failed: 272 vc->flags |= AFS_VL_CURSOR_STOP; 273 afs_end_cursor(&vc->ac); 274 _leave(" = f [failed %d]", vc->error); 275 return false; 276 } 277 278 /* 279 * Dump cursor state in the case of the error being EDESTADDRREQ. 280 */ 281 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) 282 { 283 static int count; 284 int i; 285 286 if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) 287 return; 288 count++; 289 290 rcu_read_lock(); 291 pr_notice("EDESTADDR occurred\n"); 292 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", 293 vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); 294 295 if (vc->server_list) { 296 const struct afs_vlserver_list *sl = vc->server_list; 297 pr_notice("VC: SL nr=%u ix=%u\n", 298 sl->nr_servers, sl->index); 299 for (i = 0; i < sl->nr_servers; i++) { 300 const struct afs_vlserver *s = sl->servers[i].server; 301 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", 302 s->name, s->port, s->flags, s->probe.error); 303 if (s->addresses) { 304 const struct afs_addr_list *a = 305 rcu_dereference(s->addresses); 306 pr_notice("VC: - nr=%u/%u/%u pf=%u\n", 307 a->nr_ipv4, a->nr_addrs, a->max_addrs, 308 a->preferred); 309 pr_notice("VC: - pr=%lx R=%lx F=%lx\n", 310 a->probed, a->responded, a->failed); 311 if (a == vc->ac.alist) 312 pr_notice("VC: - current\n"); 313 } 314 } 315 } 316 317 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", 318 vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, 319 vc->ac.responded, vc->ac.nr_iterations); 320 rcu_read_unlock(); 321 } 322 323 /* 324 * Tidy up a volume location server cursor and unlock the vnode. 325 */ 326 int afs_end_vlserver_operation(struct afs_vl_cursor *vc) 327 { 328 struct afs_net *net = vc->cell->net; 329 330 if (vc->error == -EDESTADDRREQ || 331 vc->error == -EADDRNOTAVAIL || 332 vc->error == -ENETUNREACH || 333 vc->error == -EHOSTUNREACH) 334 afs_vl_dump_edestaddrreq(vc); 335 336 afs_end_cursor(&vc->ac); 337 afs_put_vlserverlist(net, vc->server_list); 338 339 if (vc->error == -ECONNABORTED) 340 vc->error = afs_abort_to_error(vc->ac.abort_code); 341 342 return vc->error; 343 } 344