1 /* 2 * Device operations for the pnfs nfs4 file layout driver. 3 * 4 * Copyright (c) 2002 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * Garth Goodson <Garth.Goodson@netapp.com> 10 * 11 * Permission is granted to use, copy, create derivative works, and 12 * redistribute this software and such derivative works for any purpose, 13 * so long as the name of the University of Michigan is not used in 14 * any advertising or publicity pertaining to the use or distribution 15 * of this software without specific, written prior authorization. If 16 * the above copyright notice or any other identification of the 17 * University of Michigan is included in any copy of any portion of 18 * this software, then the disclaimer below must also be included. 19 * 20 * This software is provided as is, without representation or warranty 21 * of any kind either express or implied, including without limitation 22 * the implied warranties of merchantability, fitness for a particular 23 * purpose, or noninfringement. The Regents of the University of 24 * Michigan shall not be liable for any damages, including special, 25 * indirect, incidental, or consequential damages, with respect to any 26 * claim arising out of or in connection with the use of the software, 27 * even if it has been or is hereafter advised of the possibility of 28 * such damages. 29 */ 30 31 #include <linux/nfs_fs.h> 32 #include <linux/vmalloc.h> 33 #include <linux/module.h> 34 #include <linux/sunrpc/addr.h> 35 36 #include "../internal.h" 37 #include "../nfs4session.h" 38 #include "filelayout.h" 39 40 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 41 42 static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; 43 static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; 44 45 /* 46 * Data server cache 47 * 48 * Data servers can be mapped to different device ids. 49 * nfs4_pnfs_ds reference counting 50 * - set to 1 on allocation 51 * - incremented when a device id maps a data server already in the cache. 52 * - decremented when deviceid is removed from the cache. 53 */ 54 static DEFINE_SPINLOCK(nfs4_ds_cache_lock); 55 static LIST_HEAD(nfs4_data_server_cache); 56 57 /* Debug routines */ 58 void 59 print_ds(struct nfs4_pnfs_ds *ds) 60 { 61 if (ds == NULL) { 62 printk("%s NULL device\n", __func__); 63 return; 64 } 65 printk(" ds %s\n" 66 " ref count %d\n" 67 " client %p\n" 68 " cl_exchange_flags %x\n", 69 ds->ds_remotestr, 70 atomic_read(&ds->ds_count), ds->ds_clp, 71 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 72 } 73 74 static bool 75 same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) 76 { 77 struct sockaddr_in *a, *b; 78 struct sockaddr_in6 *a6, *b6; 79 80 if (addr1->sa_family != addr2->sa_family) 81 return false; 82 83 switch (addr1->sa_family) { 84 case AF_INET: 85 a = (struct sockaddr_in *)addr1; 86 b = (struct sockaddr_in *)addr2; 87 88 if (a->sin_addr.s_addr == b->sin_addr.s_addr && 89 a->sin_port == b->sin_port) 90 return true; 91 break; 92 93 case AF_INET6: 94 a6 = (struct sockaddr_in6 *)addr1; 95 b6 = (struct sockaddr_in6 *)addr2; 96 97 /* LINKLOCAL addresses must have matching scope_id */ 98 if (ipv6_addr_src_scope(&a6->sin6_addr) == 99 IPV6_ADDR_SCOPE_LINKLOCAL && 100 a6->sin6_scope_id != b6->sin6_scope_id) 101 return false; 102 103 if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && 104 a6->sin6_port == b6->sin6_port) 105 return true; 106 break; 107 108 default: 109 dprintk("%s: unhandled address family: %u\n", 110 __func__, addr1->sa_family); 111 return false; 112 } 113 114 return false; 115 } 116 117 static bool 118 _same_data_server_addrs_locked(const struct list_head *dsaddrs1, 119 const struct list_head *dsaddrs2) 120 { 121 struct nfs4_pnfs_ds_addr *da1, *da2; 122 123 /* step through both lists, comparing as we go */ 124 for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), 125 da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); 126 da1 != NULL && da2 != NULL; 127 da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), 128 da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { 129 if (!same_sockaddr((struct sockaddr *)&da1->da_addr, 130 (struct sockaddr *)&da2->da_addr)) 131 return false; 132 } 133 if (da1 == NULL && da2 == NULL) 134 return true; 135 136 return false; 137 } 138 139 /* 140 * Lookup DS by addresses. nfs4_ds_cache_lock is held 141 */ 142 static struct nfs4_pnfs_ds * 143 _data_server_lookup_locked(const struct list_head *dsaddrs) 144 { 145 struct nfs4_pnfs_ds *ds; 146 147 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) 148 if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) 149 return ds; 150 return NULL; 151 } 152 153 /* 154 * Create an rpc connection to the nfs4_pnfs_ds data server 155 * Currently only supports IPv4 and IPv6 addresses 156 */ 157 static int 158 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) 159 { 160 struct nfs_client *clp = ERR_PTR(-EIO); 161 struct nfs4_pnfs_ds_addr *da; 162 int status = 0; 163 164 dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, 165 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); 166 167 list_for_each_entry(da, &ds->ds_addrs, da_node) { 168 dprintk("%s: DS %s: trying address %s\n", 169 __func__, ds->ds_remotestr, da->da_remotestr); 170 171 clp = nfs4_set_ds_client(mds_srv->nfs_client, 172 (struct sockaddr *)&da->da_addr, 173 da->da_addrlen, IPPROTO_TCP, 174 dataserver_timeo, dataserver_retrans); 175 if (!IS_ERR(clp)) 176 break; 177 } 178 179 if (IS_ERR(clp)) { 180 status = PTR_ERR(clp); 181 goto out; 182 } 183 184 status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); 185 if (status) 186 goto out_put; 187 188 smp_wmb(); 189 ds->ds_clp = clp; 190 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 191 out: 192 return status; 193 out_put: 194 nfs_put_client(clp); 195 goto out; 196 } 197 198 static void 199 destroy_ds(struct nfs4_pnfs_ds *ds) 200 { 201 struct nfs4_pnfs_ds_addr *da; 202 203 dprintk("--> %s\n", __func__); 204 ifdebug(FACILITY) 205 print_ds(ds); 206 207 if (ds->ds_clp) 208 nfs_put_client(ds->ds_clp); 209 210 while (!list_empty(&ds->ds_addrs)) { 211 da = list_first_entry(&ds->ds_addrs, 212 struct nfs4_pnfs_ds_addr, 213 da_node); 214 list_del_init(&da->da_node); 215 kfree(da->da_remotestr); 216 kfree(da); 217 } 218 219 kfree(ds->ds_remotestr); 220 kfree(ds); 221 } 222 223 void 224 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) 225 { 226 struct nfs4_pnfs_ds *ds; 227 int i; 228 229 nfs4_print_deviceid(&dsaddr->id_node.deviceid); 230 231 for (i = 0; i < dsaddr->ds_num; i++) { 232 ds = dsaddr->ds_list[i]; 233 if (ds != NULL) { 234 if (atomic_dec_and_lock(&ds->ds_count, 235 &nfs4_ds_cache_lock)) { 236 list_del_init(&ds->ds_node); 237 spin_unlock(&nfs4_ds_cache_lock); 238 destroy_ds(ds); 239 } 240 } 241 } 242 kfree(dsaddr->stripe_indices); 243 kfree(dsaddr); 244 } 245 246 /* 247 * Create a string with a human readable address and port to avoid 248 * complicated setup around many dprinks. 249 */ 250 static char * 251 nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) 252 { 253 struct nfs4_pnfs_ds_addr *da; 254 char *remotestr; 255 size_t len; 256 char *p; 257 258 len = 3; /* '{', '}' and eol */ 259 list_for_each_entry(da, dsaddrs, da_node) { 260 len += strlen(da->da_remotestr) + 1; /* string plus comma */ 261 } 262 263 remotestr = kzalloc(len, gfp_flags); 264 if (!remotestr) 265 return NULL; 266 267 p = remotestr; 268 *(p++) = '{'; 269 len--; 270 list_for_each_entry(da, dsaddrs, da_node) { 271 size_t ll = strlen(da->da_remotestr); 272 273 if (ll > len) 274 goto out_err; 275 276 memcpy(p, da->da_remotestr, ll); 277 p += ll; 278 len -= ll; 279 280 if (len < 1) 281 goto out_err; 282 (*p++) = ','; 283 len--; 284 } 285 if (len < 2) 286 goto out_err; 287 *(p++) = '}'; 288 *p = '\0'; 289 return remotestr; 290 out_err: 291 kfree(remotestr); 292 return NULL; 293 } 294 295 static struct nfs4_pnfs_ds * 296 nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) 297 { 298 struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; 299 char *remotestr; 300 301 if (list_empty(dsaddrs)) { 302 dprintk("%s: no addresses defined\n", __func__); 303 goto out; 304 } 305 306 ds = kzalloc(sizeof(*ds), gfp_flags); 307 if (!ds) 308 goto out; 309 310 /* this is only used for debugging, so it's ok if its NULL */ 311 remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); 312 313 spin_lock(&nfs4_ds_cache_lock); 314 tmp_ds = _data_server_lookup_locked(dsaddrs); 315 if (tmp_ds == NULL) { 316 INIT_LIST_HEAD(&ds->ds_addrs); 317 list_splice_init(dsaddrs, &ds->ds_addrs); 318 ds->ds_remotestr = remotestr; 319 atomic_set(&ds->ds_count, 1); 320 INIT_LIST_HEAD(&ds->ds_node); 321 ds->ds_clp = NULL; 322 list_add(&ds->ds_node, &nfs4_data_server_cache); 323 dprintk("%s add new data server %s\n", __func__, 324 ds->ds_remotestr); 325 } else { 326 kfree(remotestr); 327 kfree(ds); 328 atomic_inc(&tmp_ds->ds_count); 329 dprintk("%s data server %s found, inc'ed ds_count to %d\n", 330 __func__, tmp_ds->ds_remotestr, 331 atomic_read(&tmp_ds->ds_count)); 332 ds = tmp_ds; 333 } 334 spin_unlock(&nfs4_ds_cache_lock); 335 out: 336 return ds; 337 } 338 339 /* 340 * Currently only supports ipv4, ipv6 and one multi-path address. 341 */ 342 static struct nfs4_pnfs_ds_addr * 343 decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) 344 { 345 struct nfs4_pnfs_ds_addr *da = NULL; 346 char *buf, *portstr; 347 __be16 port; 348 int nlen, rlen; 349 int tmp[2]; 350 __be32 *p; 351 char *netid, *match_netid; 352 size_t len, match_netid_len; 353 char *startsep = ""; 354 char *endsep = ""; 355 356 357 /* r_netid */ 358 p = xdr_inline_decode(streamp, 4); 359 if (unlikely(!p)) 360 goto out_err; 361 nlen = be32_to_cpup(p++); 362 363 p = xdr_inline_decode(streamp, nlen); 364 if (unlikely(!p)) 365 goto out_err; 366 367 netid = kmalloc(nlen+1, gfp_flags); 368 if (unlikely(!netid)) 369 goto out_err; 370 371 netid[nlen] = '\0'; 372 memcpy(netid, p, nlen); 373 374 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ 375 p = xdr_inline_decode(streamp, 4); 376 if (unlikely(!p)) 377 goto out_free_netid; 378 rlen = be32_to_cpup(p); 379 380 p = xdr_inline_decode(streamp, rlen); 381 if (unlikely(!p)) 382 goto out_free_netid; 383 384 /* port is ".ABC.DEF", 8 chars max */ 385 if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { 386 dprintk("%s: Invalid address, length %d\n", __func__, 387 rlen); 388 goto out_free_netid; 389 } 390 buf = kmalloc(rlen + 1, gfp_flags); 391 if (!buf) { 392 dprintk("%s: Not enough memory\n", __func__); 393 goto out_free_netid; 394 } 395 buf[rlen] = '\0'; 396 memcpy(buf, p, rlen); 397 398 /* replace port '.' with '-' */ 399 portstr = strrchr(buf, '.'); 400 if (!portstr) { 401 dprintk("%s: Failed finding expected dot in port\n", 402 __func__); 403 goto out_free_buf; 404 } 405 *portstr = '-'; 406 407 /* find '.' between address and port */ 408 portstr = strrchr(buf, '.'); 409 if (!portstr) { 410 dprintk("%s: Failed finding expected dot between address and " 411 "port\n", __func__); 412 goto out_free_buf; 413 } 414 *portstr = '\0'; 415 416 da = kzalloc(sizeof(*da), gfp_flags); 417 if (unlikely(!da)) 418 goto out_free_buf; 419 420 INIT_LIST_HEAD(&da->da_node); 421 422 if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, 423 sizeof(da->da_addr))) { 424 dprintk("%s: error parsing address %s\n", __func__, buf); 425 goto out_free_da; 426 } 427 428 portstr++; 429 sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); 430 port = htons((tmp[0] << 8) | (tmp[1])); 431 432 switch (da->da_addr.ss_family) { 433 case AF_INET: 434 ((struct sockaddr_in *)&da->da_addr)->sin_port = port; 435 da->da_addrlen = sizeof(struct sockaddr_in); 436 match_netid = "tcp"; 437 match_netid_len = 3; 438 break; 439 440 case AF_INET6: 441 ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; 442 da->da_addrlen = sizeof(struct sockaddr_in6); 443 match_netid = "tcp6"; 444 match_netid_len = 4; 445 startsep = "["; 446 endsep = "]"; 447 break; 448 449 default: 450 dprintk("%s: unsupported address family: %u\n", 451 __func__, da->da_addr.ss_family); 452 goto out_free_da; 453 } 454 455 if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { 456 dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", 457 __func__, netid, match_netid); 458 goto out_free_da; 459 } 460 461 /* save human readable address */ 462 len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; 463 da->da_remotestr = kzalloc(len, gfp_flags); 464 465 /* NULL is ok, only used for dprintk */ 466 if (da->da_remotestr) 467 snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, 468 buf, endsep, ntohs(port)); 469 470 dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); 471 kfree(buf); 472 kfree(netid); 473 return da; 474 475 out_free_da: 476 kfree(da); 477 out_free_buf: 478 dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); 479 kfree(buf); 480 out_free_netid: 481 kfree(netid); 482 out_err: 483 return NULL; 484 } 485 486 /* Decode opaque device data and return the result */ 487 static struct nfs4_file_layout_dsaddr* 488 decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) 489 { 490 int i; 491 u32 cnt, num; 492 u8 *indexp; 493 __be32 *p; 494 u8 *stripe_indices; 495 u8 max_stripe_index; 496 struct nfs4_file_layout_dsaddr *dsaddr = NULL; 497 struct xdr_stream stream; 498 struct xdr_buf buf; 499 struct page *scratch; 500 struct list_head dsaddrs; 501 struct nfs4_pnfs_ds_addr *da; 502 503 /* set up xdr stream */ 504 scratch = alloc_page(gfp_flags); 505 if (!scratch) 506 goto out_err; 507 508 xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 509 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 510 511 /* Get the stripe count (number of stripe index) */ 512 p = xdr_inline_decode(&stream, 4); 513 if (unlikely(!p)) 514 goto out_err_free_scratch; 515 516 cnt = be32_to_cpup(p); 517 dprintk("%s stripe count %d\n", __func__, cnt); 518 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 519 printk(KERN_WARNING "NFS: %s: stripe count %d greater than " 520 "supported maximum %d\n", __func__, 521 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 522 goto out_err_free_scratch; 523 } 524 525 /* read stripe indices */ 526 stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); 527 if (!stripe_indices) 528 goto out_err_free_scratch; 529 530 p = xdr_inline_decode(&stream, cnt << 2); 531 if (unlikely(!p)) 532 goto out_err_free_stripe_indices; 533 534 indexp = &stripe_indices[0]; 535 max_stripe_index = 0; 536 for (i = 0; i < cnt; i++) { 537 *indexp = be32_to_cpup(p++); 538 max_stripe_index = max(max_stripe_index, *indexp); 539 indexp++; 540 } 541 542 /* Check the multipath list count */ 543 p = xdr_inline_decode(&stream, 4); 544 if (unlikely(!p)) 545 goto out_err_free_stripe_indices; 546 547 num = be32_to_cpup(p); 548 dprintk("%s ds_num %u\n", __func__, num); 549 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 550 printk(KERN_WARNING "NFS: %s: multipath count %d greater than " 551 "supported maximum %d\n", __func__, 552 num, NFS4_PNFS_MAX_MULTI_CNT); 553 goto out_err_free_stripe_indices; 554 } 555 556 /* validate stripe indices are all < num */ 557 if (max_stripe_index >= num) { 558 printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", 559 __func__, max_stripe_index, num); 560 goto out_err_free_stripe_indices; 561 } 562 563 dsaddr = kzalloc(sizeof(*dsaddr) + 564 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 565 gfp_flags); 566 if (!dsaddr) 567 goto out_err_free_stripe_indices; 568 569 dsaddr->stripe_count = cnt; 570 dsaddr->stripe_indices = stripe_indices; 571 stripe_indices = NULL; 572 dsaddr->ds_num = num; 573 nfs4_init_deviceid_node(&dsaddr->id_node, 574 NFS_SERVER(ino)->pnfs_curr_ld, 575 NFS_SERVER(ino)->nfs_client, 576 &pdev->dev_id); 577 578 INIT_LIST_HEAD(&dsaddrs); 579 580 for (i = 0; i < dsaddr->ds_num; i++) { 581 int j; 582 u32 mp_count; 583 584 p = xdr_inline_decode(&stream, 4); 585 if (unlikely(!p)) 586 goto out_err_free_deviceid; 587 588 mp_count = be32_to_cpup(p); /* multipath count */ 589 for (j = 0; j < mp_count; j++) { 590 da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, 591 &stream, gfp_flags); 592 if (da) 593 list_add_tail(&da->da_node, &dsaddrs); 594 } 595 if (list_empty(&dsaddrs)) { 596 dprintk("%s: no suitable DS addresses found\n", 597 __func__); 598 goto out_err_free_deviceid; 599 } 600 601 dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); 602 if (!dsaddr->ds_list[i]) 603 goto out_err_drain_dsaddrs; 604 605 /* If DS was already in cache, free ds addrs */ 606 while (!list_empty(&dsaddrs)) { 607 da = list_first_entry(&dsaddrs, 608 struct nfs4_pnfs_ds_addr, 609 da_node); 610 list_del_init(&da->da_node); 611 kfree(da->da_remotestr); 612 kfree(da); 613 } 614 } 615 616 __free_page(scratch); 617 return dsaddr; 618 619 out_err_drain_dsaddrs: 620 while (!list_empty(&dsaddrs)) { 621 da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, 622 da_node); 623 list_del_init(&da->da_node); 624 kfree(da->da_remotestr); 625 kfree(da); 626 } 627 out_err_free_deviceid: 628 nfs4_fl_free_deviceid(dsaddr); 629 /* stripe_indicies was part of dsaddr */ 630 goto out_err_free_scratch; 631 out_err_free_stripe_indices: 632 kfree(stripe_indices); 633 out_err_free_scratch: 634 __free_page(scratch); 635 out_err: 636 dprintk("%s ERROR: returning NULL\n", __func__); 637 return NULL; 638 } 639 640 /* 641 * Decode the opaque device specified in 'dev' and add it to the cache of 642 * available devices. 643 */ 644 static struct nfs4_file_layout_dsaddr * 645 decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) 646 { 647 struct nfs4_deviceid_node *d; 648 struct nfs4_file_layout_dsaddr *n, *new; 649 650 new = decode_device(inode, dev, gfp_flags); 651 if (!new) { 652 printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", 653 __func__); 654 return NULL; 655 } 656 657 d = nfs4_insert_deviceid_node(&new->id_node); 658 n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 659 if (n != new) { 660 nfs4_fl_free_deviceid(new); 661 return n; 662 } 663 664 return new; 665 } 666 667 /* 668 * Retrieve the information for dev_id, add it to the list 669 * of available devices, and return it. 670 */ 671 struct nfs4_file_layout_dsaddr * 672 filelayout_get_device_info(struct inode *inode, 673 struct nfs4_deviceid *dev_id, 674 struct rpc_cred *cred, 675 gfp_t gfp_flags) 676 { 677 struct pnfs_device *pdev = NULL; 678 u32 max_resp_sz; 679 int max_pages; 680 struct page **pages = NULL; 681 struct nfs4_file_layout_dsaddr *dsaddr = NULL; 682 int rc, i; 683 struct nfs_server *server = NFS_SERVER(inode); 684 685 /* 686 * Use the session max response size as the basis for setting 687 * GETDEVICEINFO's maxcount 688 */ 689 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 690 max_pages = nfs_page_array_len(0, max_resp_sz); 691 dprintk("%s inode %p max_resp_sz %u max_pages %d\n", 692 __func__, inode, max_resp_sz, max_pages); 693 694 pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); 695 if (pdev == NULL) 696 return NULL; 697 698 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 699 if (pages == NULL) { 700 kfree(pdev); 701 return NULL; 702 } 703 for (i = 0; i < max_pages; i++) { 704 pages[i] = alloc_page(gfp_flags); 705 if (!pages[i]) 706 goto out_free; 707 } 708 709 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 710 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 711 pdev->pages = pages; 712 pdev->pgbase = 0; 713 pdev->pglen = max_resp_sz; 714 pdev->mincount = 0; 715 pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; 716 717 rc = nfs4_proc_getdeviceinfo(server, pdev, cred); 718 dprintk("%s getdevice info returns %d\n", __func__, rc); 719 if (rc) 720 goto out_free; 721 722 /* 723 * Found new device, need to decode it and then add it to the 724 * list of known devices for this mountpoint. 725 */ 726 dsaddr = decode_and_add_device(inode, pdev, gfp_flags); 727 out_free: 728 for (i = 0; i < max_pages; i++) 729 __free_page(pages[i]); 730 kfree(pages); 731 kfree(pdev); 732 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); 733 return dsaddr; 734 } 735 736 void 737 nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) 738 { 739 nfs4_put_deviceid_node(&dsaddr->id_node); 740 } 741 742 /* 743 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit 744 * Then: ((res + fsi) % dsaddr->stripe_count) 745 */ 746 u32 747 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) 748 { 749 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); 750 u64 tmp; 751 752 tmp = offset - flseg->pattern_offset; 753 do_div(tmp, flseg->stripe_unit); 754 tmp += flseg->first_stripe_index; 755 return do_div(tmp, flseg->dsaddr->stripe_count); 756 } 757 758 u32 759 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) 760 { 761 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; 762 } 763 764 struct nfs_fh * 765 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) 766 { 767 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); 768 u32 i; 769 770 if (flseg->stripe_type == STRIPE_SPARSE) { 771 if (flseg->num_fh == 1) 772 i = 0; 773 else if (flseg->num_fh == 0) 774 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ 775 return NULL; 776 else 777 i = nfs4_fl_calc_ds_index(lseg, j); 778 } else 779 i = j; 780 return flseg->fh_array[i]; 781 } 782 783 static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 784 { 785 might_sleep(); 786 wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, 787 nfs_wait_bit_killable, TASK_KILLABLE); 788 } 789 790 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 791 { 792 smp_mb__before_atomic(); 793 clear_bit(NFS4DS_CONNECTING, &ds->ds_state); 794 smp_mb__after_atomic(); 795 wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); 796 } 797 798 799 struct nfs4_pnfs_ds * 800 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 801 { 802 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; 803 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 804 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); 805 struct nfs4_pnfs_ds *ret = ds; 806 807 if (ds == NULL) { 808 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 809 __func__, ds_idx); 810 filelayout_mark_devid_invalid(devid); 811 goto out; 812 } 813 smp_rmb(); 814 if (ds->ds_clp) 815 goto out_test_devid; 816 817 if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { 818 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 819 int err; 820 821 err = nfs4_ds_connect(s, ds); 822 if (err) 823 nfs4_mark_deviceid_unavailable(devid); 824 nfs4_clear_ds_conn_bit(ds); 825 } else { 826 /* Either ds is connected, or ds is NULL */ 827 nfs4_wait_ds_connect(ds); 828 } 829 out_test_devid: 830 if (filelayout_test_devid_unavailable(devid)) 831 ret = NULL; 832 out: 833 return ret; 834 } 835 836 module_param(dataserver_retrans, uint, 0644); 837 MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " 838 "retries a request before it attempts further " 839 " recovery action."); 840 module_param(dataserver_timeo, uint, 0644); 841 MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " 842 "NFSv4.1 client waits for a response from a " 843 " data server before it retries an NFS request."); 844