1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Handle vlserver selection and rotation. 3 * 4 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/sched.h> 10 #include <linux/sched/signal.h> 11 #include "internal.h" 12 #include "afs_vl.h" 13 14 /* 15 * Begin an operation on a volume location server. 16 */ 17 bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, 18 struct key *key) 19 { 20 memset(vc, 0, sizeof(*vc)); 21 vc->cell = cell; 22 vc->key = key; 23 vc->error = -EDESTADDRREQ; 24 vc->ac.error = SHRT_MAX; 25 26 if (signal_pending(current)) { 27 vc->error = -EINTR; 28 vc->flags |= AFS_VL_CURSOR_STOP; 29 return false; 30 } 31 32 return true; 33 } 34 35 /* 36 * Begin iteration through a server list, starting with the last used server if 37 * possible, or the last recorded good server if not. 38 */ 39 static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) 40 { 41 struct afs_cell *cell = vc->cell; 42 unsigned int dns_lookup_count; 43 44 if (cell->dns_source == DNS_RECORD_UNAVAILABLE || 45 cell->dns_expiry <= ktime_get_real_seconds()) { 46 dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); 47 set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); 48 afs_queue_cell(cell, afs_cell_trace_get_queue_dns); 49 50 if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 51 if (wait_var_event_interruptible( 52 &cell->dns_lookup_count, 53 smp_load_acquire(&cell->dns_lookup_count) 54 != dns_lookup_count) < 0) { 55 vc->error = -ERESTARTSYS; 56 return false; 57 } 58 } 59 60 /* Status load is ordered after lookup counter load */ 61 if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { 62 pr_warn("No record of cell %s\n", cell->name); 63 vc->error = -ENOENT; 64 return false; 65 } 66 67 if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 68 vc->error = -EDESTADDRREQ; 69 return false; 70 } 71 } 72 73 read_lock(&cell->vl_servers_lock); 74 vc->server_list = afs_get_vlserverlist( 75 rcu_dereference_protected(cell->vl_servers, 76 lockdep_is_held(&cell->vl_servers_lock))); 77 read_unlock(&cell->vl_servers_lock); 78 if (!vc->server_list->nr_servers) 79 return false; 80 81 vc->untried = (1UL << vc->server_list->nr_servers) - 1; 82 vc->index = -1; 83 return true; 84 } 85 86 /* 87 * Select the vlserver to use. May be called multiple times to rotate 88 * through the vlservers. 89 */ 90 bool afs_select_vlserver(struct afs_vl_cursor *vc) 91 { 92 struct afs_addr_list *alist; 93 struct afs_vlserver *vlserver; 94 struct afs_error e; 95 u32 rtt; 96 int error = vc->ac.error, i; 97 98 _enter("%lx[%d],%lx[%d],%d,%d", 99 vc->untried, vc->index, 100 vc->ac.tried, vc->ac.index, 101 error, vc->ac.abort_code); 102 103 if (vc->flags & AFS_VL_CURSOR_STOP) { 104 _leave(" = f [stopped]"); 105 return false; 106 } 107 108 vc->nr_iterations++; 109 110 /* Evaluate the result of the previous operation, if there was one. */ 111 switch (error) { 112 case SHRT_MAX: 113 goto start; 114 115 default: 116 case 0: 117 /* Success or local failure. Stop. */ 118 vc->error = error; 119 vc->flags |= AFS_VL_CURSOR_STOP; 120 _leave(" = f [okay/local %d]", vc->ac.error); 121 return false; 122 123 case -ECONNABORTED: 124 /* The far side rejected the operation on some grounds. This 125 * might involve the server being busy or the volume having been moved. 126 */ 127 switch (vc->ac.abort_code) { 128 case AFSVL_IO: 129 case AFSVL_BADVOLOPER: 130 case AFSVL_NOMEM: 131 /* The server went weird. */ 132 vc->error = -EREMOTEIO; 133 //write_lock(&vc->cell->vl_servers_lock); 134 //vc->server_list->weird_mask |= 1 << vc->index; 135 //write_unlock(&vc->cell->vl_servers_lock); 136 goto next_server; 137 138 default: 139 vc->error = afs_abort_to_error(vc->ac.abort_code); 140 goto failed; 141 } 142 143 case -ERFKILL: 144 case -EADDRNOTAVAIL: 145 case -ENETUNREACH: 146 case -EHOSTUNREACH: 147 case -EHOSTDOWN: 148 case -ECONNREFUSED: 149 case -ETIMEDOUT: 150 case -ETIME: 151 _debug("no conn %d", error); 152 vc->error = error; 153 goto iterate_address; 154 155 case -ECONNRESET: 156 _debug("call reset"); 157 vc->error = error; 158 vc->flags |= AFS_VL_CURSOR_RETRY; 159 goto next_server; 160 161 case -EOPNOTSUPP: 162 _debug("notsupp"); 163 goto next_server; 164 } 165 166 restart_from_beginning: 167 _debug("restart"); 168 afs_end_cursor(&vc->ac); 169 afs_put_vlserverlist(vc->cell->net, vc->server_list); 170 vc->server_list = NULL; 171 if (vc->flags & AFS_VL_CURSOR_RETRIED) 172 goto failed; 173 vc->flags |= AFS_VL_CURSOR_RETRIED; 174 start: 175 _debug("start"); 176 177 if (!afs_start_vl_iteration(vc)) 178 goto failed; 179 180 error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); 181 if (error < 0) 182 goto failed_set_error; 183 184 pick_server: 185 _debug("pick [%lx]", vc->untried); 186 187 error = afs_wait_for_vl_probes(vc->server_list, vc->untried); 188 if (error < 0) 189 goto failed_set_error; 190 191 /* Pick the untried server with the lowest RTT. */ 192 vc->index = vc->server_list->preferred; 193 if (test_bit(vc->index, &vc->untried)) 194 goto selected_server; 195 196 vc->index = -1; 197 rtt = U32_MAX; 198 for (i = 0; i < vc->server_list->nr_servers; i++) { 199 struct afs_vlserver *s = vc->server_list->servers[i].server; 200 201 if (!test_bit(i, &vc->untried) || 202 !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) 203 continue; 204 if (s->probe.rtt < rtt) { 205 vc->index = i; 206 rtt = s->probe.rtt; 207 } 208 } 209 210 if (vc->index == -1) 211 goto no_more_servers; 212 213 selected_server: 214 _debug("use %d", vc->index); 215 __clear_bit(vc->index, &vc->untried); 216 217 /* We're starting on a different vlserver from the list. We need to 218 * check it, find its address list and probe its capabilities before we 219 * use it. 220 */ 221 ASSERTCMP(vc->ac.alist, ==, NULL); 222 vlserver = vc->server_list->servers[vc->index].server; 223 vc->server = vlserver; 224 225 _debug("USING VLSERVER: %s", vlserver->name); 226 227 read_lock(&vlserver->lock); 228 alist = rcu_dereference_protected(vlserver->addresses, 229 lockdep_is_held(&vlserver->lock)); 230 afs_get_addrlist(alist); 231 read_unlock(&vlserver->lock); 232 233 memset(&vc->ac, 0, sizeof(vc->ac)); 234 235 if (!vc->ac.alist) 236 vc->ac.alist = alist; 237 else 238 afs_put_addrlist(alist); 239 240 vc->ac.index = -1; 241 242 iterate_address: 243 ASSERT(vc->ac.alist); 244 /* Iterate over the current server's address list to try and find an 245 * address on which it will respond to us. 246 */ 247 if (!afs_iterate_addresses(&vc->ac)) 248 goto next_server; 249 250 _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); 251 252 _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); 253 return true; 254 255 next_server: 256 _debug("next"); 257 afs_end_cursor(&vc->ac); 258 goto pick_server; 259 260 no_more_servers: 261 /* That's all the servers poked to no good effect. Try again if some 262 * of them were busy. 263 */ 264 if (vc->flags & AFS_VL_CURSOR_RETRY) 265 goto restart_from_beginning; 266 267 e.error = -EDESTADDRREQ; 268 e.responded = false; 269 for (i = 0; i < vc->server_list->nr_servers; i++) { 270 struct afs_vlserver *s = vc->server_list->servers[i].server; 271 272 if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) 273 e.responded = true; 274 afs_prioritise_error(&e, READ_ONCE(s->probe.error), 275 s->probe.abort_code); 276 } 277 278 error = e.error; 279 280 failed_set_error: 281 vc->error = error; 282 failed: 283 vc->flags |= AFS_VL_CURSOR_STOP; 284 afs_end_cursor(&vc->ac); 285 _leave(" = f [failed %d]", vc->error); 286 return false; 287 } 288 289 /* 290 * Dump cursor state in the case of the error being EDESTADDRREQ. 291 */ 292 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) 293 { 294 struct afs_cell *cell = vc->cell; 295 static int count; 296 int i; 297 298 if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) 299 return; 300 count++; 301 302 rcu_read_lock(); 303 pr_notice("EDESTADDR occurred\n"); 304 pr_notice("CELL: %s err=%d\n", cell->name, cell->error); 305 pr_notice("DNS: src=%u st=%u lc=%x\n", 306 cell->dns_source, cell->dns_status, cell->dns_lookup_count); 307 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", 308 vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); 309 310 if (vc->server_list) { 311 const struct afs_vlserver_list *sl = vc->server_list; 312 pr_notice("VC: SL nr=%u ix=%u\n", 313 sl->nr_servers, sl->index); 314 for (i = 0; i < sl->nr_servers; i++) { 315 const struct afs_vlserver *s = sl->servers[i].server; 316 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", 317 s->name, s->port, s->flags, s->probe.error); 318 if (s->addresses) { 319 const struct afs_addr_list *a = 320 rcu_dereference(s->addresses); 321 pr_notice("VC: - nr=%u/%u/%u pf=%u\n", 322 a->nr_ipv4, a->nr_addrs, a->max_addrs, 323 a->preferred); 324 pr_notice("VC: - R=%lx F=%lx\n", 325 a->responded, a->failed); 326 if (a == vc->ac.alist) 327 pr_notice("VC: - current\n"); 328 } 329 } 330 } 331 332 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", 333 vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, 334 vc->ac.responded, vc->ac.nr_iterations); 335 rcu_read_unlock(); 336 } 337 338 /* 339 * Tidy up a volume location server cursor and unlock the vnode. 340 */ 341 int afs_end_vlserver_operation(struct afs_vl_cursor *vc) 342 { 343 struct afs_net *net = vc->cell->net; 344 345 if (vc->error == -EDESTADDRREQ || 346 vc->error == -EADDRNOTAVAIL || 347 vc->error == -ENETUNREACH || 348 vc->error == -EHOSTUNREACH) 349 afs_vl_dump_edestaddrreq(vc); 350 351 afs_end_cursor(&vc->ac); 352 afs_put_vlserverlist(net, vc->server_list); 353 354 if (vc->error == -ECONNABORTED) 355 vc->error = afs_abort_to_error(vc->ac.abort_code); 356 357 return vc->error; 358 } 359