1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_inode.h" 19 #include "xfs_alloc.h" 20 #include "xfs_trans.h" 21 #include "xfs_inode_item.h" 22 #include "xfs_bmap.h" 23 #include "xfs_bmap_util.h" 24 #include "xfs_attr.h" 25 #include "xfs_attr_leaf.h" 26 #include "xfs_attr_remote.h" 27 #include "xfs_trans_space.h" 28 #include "xfs_trace.h" 29 #include "xfs_cksum.h" 30 #include "xfs_buf_item.h" 31 #include "xfs_error.h" 32 33 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 34 35 /* 36 * Each contiguous block has a header, so it is not just a simple attribute 37 * length to FSB conversion. 38 */ 39 int 40 xfs_attr3_rmt_blocks( 41 struct xfs_mount *mp, 42 int attrlen) 43 { 44 if (xfs_sb_version_hascrc(&mp->m_sb)) { 45 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); 46 return (attrlen + buflen - 1) / buflen; 47 } 48 return XFS_B_TO_FSB(mp, attrlen); 49 } 50 51 /* 52 * Checking of the remote attribute header is split into two parts. The verifier 53 * does CRC, location and bounds checking, the unpacking function checks the 54 * attribute parameters and owner. 55 */ 56 static xfs_failaddr_t 57 xfs_attr3_rmt_hdr_ok( 58 void *ptr, 59 xfs_ino_t ino, 60 uint32_t offset, 61 uint32_t size, 62 xfs_daddr_t bno) 63 { 64 struct xfs_attr3_rmt_hdr *rmt = ptr; 65 66 if (bno != be64_to_cpu(rmt->rm_blkno)) 67 return __this_address; 68 if (offset != be32_to_cpu(rmt->rm_offset)) 69 return __this_address; 70 if (size != be32_to_cpu(rmt->rm_bytes)) 71 return __this_address; 72 if (ino != be64_to_cpu(rmt->rm_owner)) 73 return __this_address; 74 75 /* ok */ 76 return NULL; 77 } 78 79 static xfs_failaddr_t 80 xfs_attr3_rmt_verify( 81 struct xfs_mount *mp, 82 void *ptr, 83 int fsbsize, 84 xfs_daddr_t bno) 85 { 86 struct xfs_attr3_rmt_hdr *rmt = ptr; 87 88 if (!xfs_sb_version_hascrc(&mp->m_sb)) 89 return __this_address; 90 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) 91 return __this_address; 92 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 93 return __this_address; 94 if (be64_to_cpu(rmt->rm_blkno) != bno) 95 return __this_address; 96 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 97 return __this_address; 98 if (be32_to_cpu(rmt->rm_offset) + 99 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 100 return __this_address; 101 if (rmt->rm_owner == 0) 102 return __this_address; 103 104 return NULL; 105 } 106 107 static int 108 __xfs_attr3_rmt_read_verify( 109 struct xfs_buf *bp, 110 bool check_crc, 111 xfs_failaddr_t *failaddr) 112 { 113 struct xfs_mount *mp = bp->b_target->bt_mount; 114 char *ptr; 115 int len; 116 xfs_daddr_t bno; 117 int blksize = mp->m_attr_geo->blksize; 118 119 /* no verification of non-crc buffers */ 120 if (!xfs_sb_version_hascrc(&mp->m_sb)) 121 return 0; 122 123 ptr = bp->b_addr; 124 bno = bp->b_bn; 125 len = BBTOB(bp->b_length); 126 ASSERT(len >= blksize); 127 128 while (len > 0) { 129 if (check_crc && 130 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 131 *failaddr = __this_address; 132 return -EFSBADCRC; 133 } 134 *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); 135 if (*failaddr) 136 return -EFSCORRUPTED; 137 len -= blksize; 138 ptr += blksize; 139 bno += BTOBB(blksize); 140 } 141 142 if (len != 0) { 143 *failaddr = __this_address; 144 return -EFSCORRUPTED; 145 } 146 147 return 0; 148 } 149 150 static void 151 xfs_attr3_rmt_read_verify( 152 struct xfs_buf *bp) 153 { 154 xfs_failaddr_t fa; 155 int error; 156 157 error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 158 if (error) 159 xfs_verifier_error(bp, error, fa); 160 } 161 162 static xfs_failaddr_t 163 xfs_attr3_rmt_verify_struct( 164 struct xfs_buf *bp) 165 { 166 xfs_failaddr_t fa; 167 int error; 168 169 error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 170 return error ? fa : NULL; 171 } 172 173 static void 174 xfs_attr3_rmt_write_verify( 175 struct xfs_buf *bp) 176 { 177 struct xfs_mount *mp = bp->b_target->bt_mount; 178 xfs_failaddr_t fa; 179 int blksize = mp->m_attr_geo->blksize; 180 char *ptr; 181 int len; 182 xfs_daddr_t bno; 183 184 /* no verification of non-crc buffers */ 185 if (!xfs_sb_version_hascrc(&mp->m_sb)) 186 return; 187 188 ptr = bp->b_addr; 189 bno = bp->b_bn; 190 len = BBTOB(bp->b_length); 191 ASSERT(len >= blksize); 192 193 while (len > 0) { 194 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 195 196 fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); 197 if (fa) { 198 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 199 return; 200 } 201 202 /* 203 * Ensure we aren't writing bogus LSNs to disk. See 204 * xfs_attr3_rmt_hdr_set() for the explanation. 205 */ 206 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 207 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 208 return; 209 } 210 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 211 212 len -= blksize; 213 ptr += blksize; 214 bno += BTOBB(blksize); 215 } 216 217 if (len != 0) 218 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 219 } 220 221 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 222 .name = "xfs_attr3_rmt", 223 .verify_read = xfs_attr3_rmt_read_verify, 224 .verify_write = xfs_attr3_rmt_write_verify, 225 .verify_struct = xfs_attr3_rmt_verify_struct, 226 }; 227 228 STATIC int 229 xfs_attr3_rmt_hdr_set( 230 struct xfs_mount *mp, 231 void *ptr, 232 xfs_ino_t ino, 233 uint32_t offset, 234 uint32_t size, 235 xfs_daddr_t bno) 236 { 237 struct xfs_attr3_rmt_hdr *rmt = ptr; 238 239 if (!xfs_sb_version_hascrc(&mp->m_sb)) 240 return 0; 241 242 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); 243 rmt->rm_offset = cpu_to_be32(offset); 244 rmt->rm_bytes = cpu_to_be32(size); 245 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); 246 rmt->rm_owner = cpu_to_be64(ino); 247 rmt->rm_blkno = cpu_to_be64(bno); 248 249 /* 250 * Remote attribute blocks are written synchronously, so we don't 251 * have an LSN that we can stamp in them that makes any sense to log 252 * recovery. To ensure that log recovery handles overwrites of these 253 * blocks sanely (i.e. once they've been freed and reallocated as some 254 * other type of metadata) we need to ensure that the LSN has a value 255 * that tells log recovery to ignore the LSN and overwrite the buffer 256 * with whatever is in it's log. To do this, we use the magic 257 * NULLCOMMITLSN to indicate that the LSN is invalid. 258 */ 259 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); 260 261 return sizeof(struct xfs_attr3_rmt_hdr); 262 } 263 264 /* 265 * Helper functions to copy attribute data in and out of the one disk extents 266 */ 267 STATIC int 268 xfs_attr_rmtval_copyout( 269 struct xfs_mount *mp, 270 struct xfs_buf *bp, 271 xfs_ino_t ino, 272 int *offset, 273 int *valuelen, 274 uint8_t **dst) 275 { 276 char *src = bp->b_addr; 277 xfs_daddr_t bno = bp->b_bn; 278 int len = BBTOB(bp->b_length); 279 int blksize = mp->m_attr_geo->blksize; 280 281 ASSERT(len >= blksize); 282 283 while (len > 0 && *valuelen > 0) { 284 int hdr_size = 0; 285 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 286 287 byte_cnt = min(*valuelen, byte_cnt); 288 289 if (xfs_sb_version_hascrc(&mp->m_sb)) { 290 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 291 byte_cnt, bno)) { 292 xfs_alert(mp, 293 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 294 bno, *offset, byte_cnt, ino); 295 return -EFSCORRUPTED; 296 } 297 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 298 } 299 300 memcpy(*dst, src + hdr_size, byte_cnt); 301 302 /* roll buffer forwards */ 303 len -= blksize; 304 src += blksize; 305 bno += BTOBB(blksize); 306 307 /* roll attribute data forwards */ 308 *valuelen -= byte_cnt; 309 *dst += byte_cnt; 310 *offset += byte_cnt; 311 } 312 return 0; 313 } 314 315 STATIC void 316 xfs_attr_rmtval_copyin( 317 struct xfs_mount *mp, 318 struct xfs_buf *bp, 319 xfs_ino_t ino, 320 int *offset, 321 int *valuelen, 322 uint8_t **src) 323 { 324 char *dst = bp->b_addr; 325 xfs_daddr_t bno = bp->b_bn; 326 int len = BBTOB(bp->b_length); 327 int blksize = mp->m_attr_geo->blksize; 328 329 ASSERT(len >= blksize); 330 331 while (len > 0 && *valuelen > 0) { 332 int hdr_size; 333 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 334 335 byte_cnt = min(*valuelen, byte_cnt); 336 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, 337 byte_cnt, bno); 338 339 memcpy(dst + hdr_size, *src, byte_cnt); 340 341 /* 342 * If this is the last block, zero the remainder of it. 343 * Check that we are actually the last block, too. 344 */ 345 if (byte_cnt + hdr_size < blksize) { 346 ASSERT(*valuelen - byte_cnt == 0); 347 ASSERT(len == blksize); 348 memset(dst + hdr_size + byte_cnt, 0, 349 blksize - hdr_size - byte_cnt); 350 } 351 352 /* roll buffer forwards */ 353 len -= blksize; 354 dst += blksize; 355 bno += BTOBB(blksize); 356 357 /* roll attribute data forwards */ 358 *valuelen -= byte_cnt; 359 *src += byte_cnt; 360 *offset += byte_cnt; 361 } 362 } 363 364 /* 365 * Read the value associated with an attribute from the out-of-line buffer 366 * that we stored it in. 367 */ 368 int 369 xfs_attr_rmtval_get( 370 struct xfs_da_args *args) 371 { 372 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 373 struct xfs_mount *mp = args->dp->i_mount; 374 struct xfs_buf *bp; 375 xfs_dablk_t lblkno = args->rmtblkno; 376 uint8_t *dst = args->value; 377 int valuelen; 378 int nmap; 379 int error; 380 int blkcnt = args->rmtblkcnt; 381 int i; 382 int offset = 0; 383 384 trace_xfs_attr_rmtval_get(args); 385 386 ASSERT(!(args->flags & ATTR_KERNOVAL)); 387 ASSERT(args->rmtvaluelen == args->valuelen); 388 389 valuelen = args->rmtvaluelen; 390 while (valuelen > 0) { 391 nmap = ATTR_RMTVALUE_MAPSIZE; 392 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 393 blkcnt, map, &nmap, 394 XFS_BMAPI_ATTRFORK); 395 if (error) 396 return error; 397 ASSERT(nmap >= 1); 398 399 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 400 xfs_daddr_t dblkno; 401 int dblkcnt; 402 403 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 404 (map[i].br_startblock != HOLESTARTBLOCK)); 405 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 406 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 407 error = xfs_trans_read_buf(mp, args->trans, 408 mp->m_ddev_targp, 409 dblkno, dblkcnt, 0, &bp, 410 &xfs_attr3_rmt_buf_ops); 411 if (error) 412 return error; 413 414 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, 415 &offset, &valuelen, 416 &dst); 417 xfs_trans_brelse(args->trans, bp); 418 if (error) 419 return error; 420 421 /* roll attribute extent map forwards */ 422 lblkno += map[i].br_blockcount; 423 blkcnt -= map[i].br_blockcount; 424 } 425 } 426 ASSERT(valuelen == 0); 427 return 0; 428 } 429 430 /* 431 * Write the value associated with an attribute into the out-of-line buffer 432 * that we have defined for it. 433 */ 434 int 435 xfs_attr_rmtval_set( 436 struct xfs_da_args *args) 437 { 438 struct xfs_inode *dp = args->dp; 439 struct xfs_mount *mp = dp->i_mount; 440 struct xfs_bmbt_irec map; 441 xfs_dablk_t lblkno; 442 xfs_fileoff_t lfileoff = 0; 443 uint8_t *src = args->value; 444 int blkcnt; 445 int valuelen; 446 int nmap; 447 int error; 448 int offset = 0; 449 450 trace_xfs_attr_rmtval_set(args); 451 452 /* 453 * Find a "hole" in the attribute address space large enough for 454 * us to drop the new attribute's value into. Because CRC enable 455 * attributes have headers, we can't just do a straight byte to FSB 456 * conversion and have to take the header space into account. 457 */ 458 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); 459 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 460 XFS_ATTR_FORK); 461 if (error) 462 return error; 463 464 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 465 args->rmtblkcnt = blkcnt; 466 467 /* 468 * Roll through the "value", allocating blocks on disk as required. 469 */ 470 while (blkcnt > 0) { 471 /* 472 * Allocate a single extent, up to the size of the value. 473 * 474 * Note that we have to consider this a data allocation as we 475 * write the remote attribute without logging the contents. 476 * Hence we must ensure that we aren't using blocks that are on 477 * the busy list so that we don't overwrite blocks which have 478 * recently been freed but their transactions are not yet 479 * committed to disk. If we overwrite the contents of a busy 480 * extent and then crash then the block may not contain the 481 * correct metadata after log recovery occurs. 482 */ 483 nmap = 1; 484 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 485 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 486 &nmap); 487 if (error) 488 goto out_defer_cancel; 489 error = xfs_defer_finish(&args->trans); 490 if (error) 491 return error; 492 493 ASSERT(nmap == 1); 494 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 495 (map.br_startblock != HOLESTARTBLOCK)); 496 lblkno += map.br_blockcount; 497 blkcnt -= map.br_blockcount; 498 499 /* 500 * Start the next trans in the chain. 501 */ 502 error = xfs_trans_roll_inode(&args->trans, dp); 503 if (error) 504 return error; 505 } 506 507 /* 508 * Roll through the "value", copying the attribute value to the 509 * already-allocated blocks. Blocks are written synchronously 510 * so that we can know they are all on disk before we turn off 511 * the INCOMPLETE flag. 512 */ 513 lblkno = args->rmtblkno; 514 blkcnt = args->rmtblkcnt; 515 valuelen = args->rmtvaluelen; 516 while (valuelen > 0) { 517 struct xfs_buf *bp; 518 xfs_daddr_t dblkno; 519 int dblkcnt; 520 521 ASSERT(blkcnt > 0); 522 523 nmap = 1; 524 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 525 blkcnt, &map, &nmap, 526 XFS_BMAPI_ATTRFORK); 527 if (error) 528 return error; 529 ASSERT(nmap == 1); 530 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 531 (map.br_startblock != HOLESTARTBLOCK)); 532 533 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 534 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 535 536 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); 537 if (!bp) 538 return -ENOMEM; 539 bp->b_ops = &xfs_attr3_rmt_buf_ops; 540 541 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 542 &valuelen, &src); 543 544 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 545 xfs_buf_relse(bp); 546 if (error) 547 return error; 548 549 550 /* roll attribute extent map forwards */ 551 lblkno += map.br_blockcount; 552 blkcnt -= map.br_blockcount; 553 } 554 ASSERT(valuelen == 0); 555 return 0; 556 out_defer_cancel: 557 xfs_defer_cancel(args->trans); 558 return error; 559 } 560 561 /* 562 * Remove the value associated with an attribute by deleting the 563 * out-of-line buffer that it is stored on. 564 */ 565 int 566 xfs_attr_rmtval_remove( 567 struct xfs_da_args *args) 568 { 569 struct xfs_mount *mp = args->dp->i_mount; 570 xfs_dablk_t lblkno; 571 int blkcnt; 572 int error; 573 int done; 574 575 trace_xfs_attr_rmtval_remove(args); 576 577 /* 578 * Roll through the "value", invalidating the attribute value's blocks. 579 */ 580 lblkno = args->rmtblkno; 581 blkcnt = args->rmtblkcnt; 582 while (blkcnt > 0) { 583 struct xfs_bmbt_irec map; 584 struct xfs_buf *bp; 585 xfs_daddr_t dblkno; 586 int dblkcnt; 587 int nmap; 588 589 /* 590 * Try to remember where we decided to put the value. 591 */ 592 nmap = 1; 593 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 594 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 595 if (error) 596 return error; 597 ASSERT(nmap == 1); 598 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 599 (map.br_startblock != HOLESTARTBLOCK)); 600 601 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 602 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 603 604 /* 605 * If the "remote" value is in the cache, remove it. 606 */ 607 bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); 608 if (bp) { 609 xfs_buf_stale(bp); 610 xfs_buf_relse(bp); 611 bp = NULL; 612 } 613 614 lblkno += map.br_blockcount; 615 blkcnt -= map.br_blockcount; 616 } 617 618 /* 619 * Keep de-allocating extents until the remote-value region is gone. 620 */ 621 lblkno = args->rmtblkno; 622 blkcnt = args->rmtblkcnt; 623 done = 0; 624 while (!done) { 625 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 626 XFS_BMAPI_ATTRFORK, 1, &done); 627 if (error) 628 goto out_defer_cancel; 629 error = xfs_defer_finish(&args->trans); 630 if (error) 631 return error; 632 633 /* 634 * Close out trans and start the next one in the chain. 635 */ 636 error = xfs_trans_roll_inode(&args->trans, args->dp); 637 if (error) 638 return error; 639 } 640 return 0; 641 out_defer_cancel: 642 xfs_defer_cancel(args->trans); 643 return error; 644 } 645