1 /* 2 * Copyright (c) 2014-2016 Christoph Hellwig. 3 */ 4 #include <linux/sunrpc/svc.h> 5 #include <linux/blkdev.h> 6 #include <linux/nfs4.h> 7 #include <linux/nfs_fs.h> 8 #include <linux/nfs_xdr.h> 9 #include <linux/pr.h> 10 11 #include "blocklayout.h" 12 13 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 14 15 static void 16 bl_free_device(struct pnfs_block_dev *dev) 17 { 18 if (dev->nr_children) { 19 int i; 20 21 for (i = 0; i < dev->nr_children; i++) 22 bl_free_device(&dev->children[i]); 23 kfree(dev->children); 24 } else { 25 if (dev->pr_registered) { 26 const struct pr_ops *ops = 27 dev->bdev->bd_disk->fops->pr_ops; 28 int error; 29 30 error = ops->pr_register(dev->bdev, dev->pr_key, 0, 31 false); 32 if (error) 33 pr_err("failed to unregister PR key.\n"); 34 } 35 36 if (dev->bdev) 37 blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); 38 } 39 } 40 41 void 42 bl_free_deviceid_node(struct nfs4_deviceid_node *d) 43 { 44 struct pnfs_block_dev *dev = 45 container_of(d, struct pnfs_block_dev, node); 46 47 bl_free_device(dev); 48 kfree_rcu(dev, node.rcu); 49 } 50 51 static int 52 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) 53 { 54 __be32 *p; 55 int i; 56 57 p = xdr_inline_decode(xdr, 4); 58 if (!p) 59 return -EIO; 60 b->type = be32_to_cpup(p++); 61 62 switch (b->type) { 63 case PNFS_BLOCK_VOLUME_SIMPLE: 64 p = xdr_inline_decode(xdr, 4); 65 if (!p) 66 return -EIO; 67 b->simple.nr_sigs = be32_to_cpup(p++); 68 if (!b->simple.nr_sigs) { 69 dprintk("no signature\n"); 70 return -EIO; 71 } 72 73 b->simple.len = 4 + 4; 74 for (i = 0; i < b->simple.nr_sigs; i++) { 75 p = xdr_inline_decode(xdr, 8 + 4); 76 if (!p) 77 return -EIO; 78 p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); 79 b->simple.sigs[i].sig_len = be32_to_cpup(p++); 80 if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) { 81 pr_info("signature too long: %d\n", 82 b->simple.sigs[i].sig_len); 83 return -EIO; 84 } 85 86 p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); 87 if (!p) 88 return -EIO; 89 memcpy(&b->simple.sigs[i].sig, p, 90 b->simple.sigs[i].sig_len); 91 92 b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; 93 } 94 break; 95 case PNFS_BLOCK_VOLUME_SLICE: 96 p = xdr_inline_decode(xdr, 8 + 8 + 4); 97 if (!p) 98 return -EIO; 99 p = xdr_decode_hyper(p, &b->slice.start); 100 p = xdr_decode_hyper(p, &b->slice.len); 101 b->slice.volume = be32_to_cpup(p++); 102 break; 103 case PNFS_BLOCK_VOLUME_CONCAT: 104 p = xdr_inline_decode(xdr, 4); 105 if (!p) 106 return -EIO; 107 b->concat.volumes_count = be32_to_cpup(p++); 108 109 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); 110 if (!p) 111 return -EIO; 112 for (i = 0; i < b->concat.volumes_count; i++) 113 b->concat.volumes[i] = be32_to_cpup(p++); 114 break; 115 case PNFS_BLOCK_VOLUME_STRIPE: 116 p = xdr_inline_decode(xdr, 8 + 4); 117 if (!p) 118 return -EIO; 119 p = xdr_decode_hyper(p, &b->stripe.chunk_size); 120 b->stripe.volumes_count = be32_to_cpup(p++); 121 122 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); 123 if (!p) 124 return -EIO; 125 for (i = 0; i < b->stripe.volumes_count; i++) 126 b->stripe.volumes[i] = be32_to_cpup(p++); 127 break; 128 case PNFS_BLOCK_VOLUME_SCSI: 129 p = xdr_inline_decode(xdr, 4 + 4 + 4); 130 if (!p) 131 return -EIO; 132 b->scsi.code_set = be32_to_cpup(p++); 133 b->scsi.designator_type = be32_to_cpup(p++); 134 b->scsi.designator_len = be32_to_cpup(p++); 135 p = xdr_inline_decode(xdr, b->scsi.designator_len); 136 if (!p) 137 return -EIO; 138 if (b->scsi.designator_len > 256) 139 return -EIO; 140 memcpy(&b->scsi.designator, p, b->scsi.designator_len); 141 p = xdr_inline_decode(xdr, 8); 142 if (!p) 143 return -EIO; 144 p = xdr_decode_hyper(p, &b->scsi.pr_key); 145 break; 146 default: 147 dprintk("unknown volume type!\n"); 148 return -EIO; 149 } 150 151 return 0; 152 } 153 154 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, 155 struct pnfs_block_dev_map *map) 156 { 157 map->start = dev->start; 158 map->len = dev->len; 159 map->disk_offset = dev->disk_offset; 160 map->bdev = dev->bdev; 161 return true; 162 } 163 164 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, 165 struct pnfs_block_dev_map *map) 166 { 167 int i; 168 169 for (i = 0; i < dev->nr_children; i++) { 170 struct pnfs_block_dev *child = &dev->children[i]; 171 172 if (child->start > offset || 173 child->start + child->len <= offset) 174 continue; 175 176 child->map(child, offset - child->start, map); 177 return true; 178 } 179 180 dprintk("%s: ran off loop!\n", __func__); 181 return false; 182 } 183 184 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, 185 struct pnfs_block_dev_map *map) 186 { 187 struct pnfs_block_dev *child; 188 u64 chunk; 189 u32 chunk_idx; 190 u64 disk_offset; 191 192 chunk = div_u64(offset, dev->chunk_size); 193 div_u64_rem(chunk, dev->nr_children, &chunk_idx); 194 195 if (chunk_idx > dev->nr_children) { 196 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", 197 __func__, chunk_idx, offset, dev->chunk_size); 198 /* error, should not happen */ 199 return false; 200 } 201 202 /* truncate offset to the beginning of the stripe */ 203 offset = chunk * dev->chunk_size; 204 205 /* disk offset of the stripe */ 206 disk_offset = div_u64(offset, dev->nr_children); 207 208 child = &dev->children[chunk_idx]; 209 child->map(child, disk_offset, map); 210 211 map->start += offset; 212 map->disk_offset += disk_offset; 213 map->len = dev->chunk_size; 214 return true; 215 } 216 217 static int 218 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, 219 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); 220 221 222 static int 223 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, 224 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 225 { 226 struct pnfs_block_volume *v = &volumes[idx]; 227 dev_t dev; 228 229 dev = bl_resolve_deviceid(server, v, gfp_mask); 230 if (!dev) 231 return -EIO; 232 233 d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); 234 if (IS_ERR(d->bdev)) { 235 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", 236 MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); 237 return PTR_ERR(d->bdev); 238 } 239 240 241 d->len = i_size_read(d->bdev->bd_inode); 242 d->map = bl_map_simple; 243 244 printk(KERN_INFO "pNFS: using block device %s\n", 245 d->bdev->bd_disk->disk_name); 246 return 0; 247 } 248 249 static bool 250 bl_validate_designator(struct pnfs_block_volume *v) 251 { 252 switch (v->scsi.designator_type) { 253 case PS_DESIGNATOR_EUI64: 254 if (v->scsi.code_set != PS_CODE_SET_BINARY) 255 return false; 256 257 if (v->scsi.designator_len != 8 && 258 v->scsi.designator_len != 10 && 259 v->scsi.designator_len != 16) 260 return false; 261 262 return true; 263 case PS_DESIGNATOR_NAA: 264 if (v->scsi.code_set != PS_CODE_SET_BINARY) 265 return false; 266 267 if (v->scsi.designator_len != 8 && 268 v->scsi.designator_len != 16) 269 return false; 270 271 return true; 272 case PS_DESIGNATOR_T10: 273 case PS_DESIGNATOR_NAME: 274 pr_err("pNFS: unsupported designator " 275 "(code set %d, type %d, len %d.\n", 276 v->scsi.code_set, 277 v->scsi.designator_type, 278 v->scsi.designator_len); 279 return false; 280 default: 281 pr_err("pNFS: invalid designator " 282 "(code set %d, type %d, len %d.\n", 283 v->scsi.code_set, 284 v->scsi.designator_type, 285 v->scsi.designator_len); 286 return false; 287 } 288 } 289 290 static int 291 bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, 292 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 293 { 294 struct pnfs_block_volume *v = &volumes[idx]; 295 const struct pr_ops *ops; 296 const char *devname; 297 int error; 298 299 if (!bl_validate_designator(v)) 300 return -EINVAL; 301 302 switch (v->scsi.designator_len) { 303 case 8: 304 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN", 305 v->scsi.designator); 306 break; 307 case 12: 308 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN", 309 v->scsi.designator); 310 break; 311 case 16: 312 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN", 313 v->scsi.designator); 314 break; 315 default: 316 return -EINVAL; 317 } 318 319 d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL); 320 if (IS_ERR(d->bdev)) { 321 pr_warn("pNFS: failed to open device %s (%ld)\n", 322 devname, PTR_ERR(d->bdev)); 323 kfree(devname); 324 return PTR_ERR(d->bdev); 325 } 326 327 kfree(devname); 328 329 d->len = i_size_read(d->bdev->bd_inode); 330 d->map = bl_map_simple; 331 d->pr_key = v->scsi.pr_key; 332 333 pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", 334 d->bdev->bd_disk->disk_name, d->pr_key); 335 336 ops = d->bdev->bd_disk->fops->pr_ops; 337 if (!ops) { 338 pr_err("pNFS: block device %s does not support reservations.", 339 d->bdev->bd_disk->disk_name); 340 error = -EINVAL; 341 goto out_blkdev_put; 342 } 343 344 error = ops->pr_register(d->bdev, 0, d->pr_key, true); 345 if (error) { 346 pr_err("pNFS: failed to register key for block device %s.", 347 d->bdev->bd_disk->disk_name); 348 goto out_blkdev_put; 349 } 350 351 d->pr_registered = true; 352 return 0; 353 354 out_blkdev_put: 355 blkdev_put(d->bdev, FMODE_READ); 356 return error; 357 } 358 359 static int 360 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, 361 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 362 { 363 struct pnfs_block_volume *v = &volumes[idx]; 364 int ret; 365 366 ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); 367 if (ret) 368 return ret; 369 370 d->disk_offset = v->slice.start; 371 d->len = v->slice.len; 372 return 0; 373 } 374 375 static int 376 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, 377 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 378 { 379 struct pnfs_block_volume *v = &volumes[idx]; 380 u64 len = 0; 381 int ret, i; 382 383 d->children = kcalloc(v->concat.volumes_count, 384 sizeof(struct pnfs_block_dev), GFP_KERNEL); 385 if (!d->children) 386 return -ENOMEM; 387 388 for (i = 0; i < v->concat.volumes_count; i++) { 389 ret = bl_parse_deviceid(server, &d->children[i], 390 volumes, v->concat.volumes[i], gfp_mask); 391 if (ret) 392 return ret; 393 394 d->nr_children++; 395 d->children[i].start += len; 396 len += d->children[i].len; 397 } 398 399 d->len = len; 400 d->map = bl_map_concat; 401 return 0; 402 } 403 404 static int 405 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, 406 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 407 { 408 struct pnfs_block_volume *v = &volumes[idx]; 409 u64 len = 0; 410 int ret, i; 411 412 d->children = kcalloc(v->stripe.volumes_count, 413 sizeof(struct pnfs_block_dev), GFP_KERNEL); 414 if (!d->children) 415 return -ENOMEM; 416 417 for (i = 0; i < v->stripe.volumes_count; i++) { 418 ret = bl_parse_deviceid(server, &d->children[i], 419 volumes, v->stripe.volumes[i], gfp_mask); 420 if (ret) 421 return ret; 422 423 d->nr_children++; 424 len += d->children[i].len; 425 } 426 427 d->len = len; 428 d->chunk_size = v->stripe.chunk_size; 429 d->map = bl_map_stripe; 430 return 0; 431 } 432 433 static int 434 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, 435 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 436 { 437 switch (volumes[idx].type) { 438 case PNFS_BLOCK_VOLUME_SIMPLE: 439 return bl_parse_simple(server, d, volumes, idx, gfp_mask); 440 case PNFS_BLOCK_VOLUME_SLICE: 441 return bl_parse_slice(server, d, volumes, idx, gfp_mask); 442 case PNFS_BLOCK_VOLUME_CONCAT: 443 return bl_parse_concat(server, d, volumes, idx, gfp_mask); 444 case PNFS_BLOCK_VOLUME_STRIPE: 445 return bl_parse_stripe(server, d, volumes, idx, gfp_mask); 446 case PNFS_BLOCK_VOLUME_SCSI: 447 return bl_parse_scsi(server, d, volumes, idx, gfp_mask); 448 default: 449 dprintk("unsupported volume type: %d\n", volumes[idx].type); 450 return -EIO; 451 } 452 } 453 454 struct nfs4_deviceid_node * 455 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 456 gfp_t gfp_mask) 457 { 458 struct nfs4_deviceid_node *node = NULL; 459 struct pnfs_block_volume *volumes; 460 struct pnfs_block_dev *top; 461 struct xdr_stream xdr; 462 struct xdr_buf buf; 463 struct page *scratch; 464 int nr_volumes, ret, i; 465 __be32 *p; 466 467 scratch = alloc_page(gfp_mask); 468 if (!scratch) 469 goto out; 470 471 xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); 472 xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); 473 474 p = xdr_inline_decode(&xdr, sizeof(__be32)); 475 if (!p) 476 goto out_free_scratch; 477 nr_volumes = be32_to_cpup(p++); 478 479 volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), 480 gfp_mask); 481 if (!volumes) 482 goto out_free_scratch; 483 484 for (i = 0; i < nr_volumes; i++) { 485 ret = nfs4_block_decode_volume(&xdr, &volumes[i]); 486 if (ret < 0) 487 goto out_free_volumes; 488 } 489 490 top = kzalloc(sizeof(*top), gfp_mask); 491 if (!top) 492 goto out_free_volumes; 493 494 ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); 495 if (ret) { 496 bl_free_device(top); 497 kfree(top); 498 goto out_free_volumes; 499 } 500 501 node = &top->node; 502 nfs4_init_deviceid_node(node, server, &pdev->dev_id); 503 504 out_free_volumes: 505 kfree(volumes); 506 out_free_scratch: 507 __free_page(scratch); 508 out: 509 return node; 510 } 511