1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_inode.h" 19 #include "xfs_alloc.h" 20 #include "xfs_trans.h" 21 #include "xfs_inode_item.h" 22 #include "xfs_bmap.h" 23 #include "xfs_bmap_util.h" 24 #include "xfs_attr.h" 25 #include "xfs_attr_leaf.h" 26 #include "xfs_attr_remote.h" 27 #include "xfs_trans_space.h" 28 #include "xfs_trace.h" 29 #include "xfs_cksum.h" 30 #include "xfs_buf_item.h" 31 #include "xfs_error.h" 32 33 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 34 35 /* 36 * Each contiguous block has a header, so it is not just a simple attribute 37 * length to FSB conversion. 38 */ 39 int 40 xfs_attr3_rmt_blocks( 41 struct xfs_mount *mp, 42 int attrlen) 43 { 44 if (xfs_sb_version_hascrc(&mp->m_sb)) { 45 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); 46 return (attrlen + buflen - 1) / buflen; 47 } 48 return XFS_B_TO_FSB(mp, attrlen); 49 } 50 51 /* 52 * Checking of the remote attribute header is split into two parts. The verifier 53 * does CRC, location and bounds checking, the unpacking function checks the 54 * attribute parameters and owner. 55 */ 56 static xfs_failaddr_t 57 xfs_attr3_rmt_hdr_ok( 58 void *ptr, 59 xfs_ino_t ino, 60 uint32_t offset, 61 uint32_t size, 62 xfs_daddr_t bno) 63 { 64 struct xfs_attr3_rmt_hdr *rmt = ptr; 65 66 if (bno != be64_to_cpu(rmt->rm_blkno)) 67 return __this_address; 68 if (offset != be32_to_cpu(rmt->rm_offset)) 69 return __this_address; 70 if (size != be32_to_cpu(rmt->rm_bytes)) 71 return __this_address; 72 if (ino != be64_to_cpu(rmt->rm_owner)) 73 return __this_address; 74 75 /* ok */ 76 return NULL; 77 } 78 79 static xfs_failaddr_t 80 xfs_attr3_rmt_verify( 81 struct xfs_mount *mp, 82 struct xfs_buf *bp, 83 void *ptr, 84 int fsbsize, 85 xfs_daddr_t bno) 86 { 87 struct xfs_attr3_rmt_hdr *rmt = ptr; 88 89 if (!xfs_sb_version_hascrc(&mp->m_sb)) 90 return __this_address; 91 if (!xfs_verify_magic(bp, rmt->rm_magic)) 92 return __this_address; 93 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 94 return __this_address; 95 if (be64_to_cpu(rmt->rm_blkno) != bno) 96 return __this_address; 97 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 98 return __this_address; 99 if (be32_to_cpu(rmt->rm_offset) + 100 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 101 return __this_address; 102 if (rmt->rm_owner == 0) 103 return __this_address; 104 105 return NULL; 106 } 107 108 static int 109 __xfs_attr3_rmt_read_verify( 110 struct xfs_buf *bp, 111 bool check_crc, 112 xfs_failaddr_t *failaddr) 113 { 114 struct xfs_mount *mp = bp->b_target->bt_mount; 115 char *ptr; 116 int len; 117 xfs_daddr_t bno; 118 int blksize = mp->m_attr_geo->blksize; 119 120 /* no verification of non-crc buffers */ 121 if (!xfs_sb_version_hascrc(&mp->m_sb)) 122 return 0; 123 124 ptr = bp->b_addr; 125 bno = bp->b_bn; 126 len = BBTOB(bp->b_length); 127 ASSERT(len >= blksize); 128 129 while (len > 0) { 130 if (check_crc && 131 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 132 *failaddr = __this_address; 133 return -EFSBADCRC; 134 } 135 *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 136 if (*failaddr) 137 return -EFSCORRUPTED; 138 len -= blksize; 139 ptr += blksize; 140 bno += BTOBB(blksize); 141 } 142 143 if (len != 0) { 144 *failaddr = __this_address; 145 return -EFSCORRUPTED; 146 } 147 148 return 0; 149 } 150 151 static void 152 xfs_attr3_rmt_read_verify( 153 struct xfs_buf *bp) 154 { 155 xfs_failaddr_t fa; 156 int error; 157 158 error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 159 if (error) 160 xfs_verifier_error(bp, error, fa); 161 } 162 163 static xfs_failaddr_t 164 xfs_attr3_rmt_verify_struct( 165 struct xfs_buf *bp) 166 { 167 xfs_failaddr_t fa; 168 int error; 169 170 error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 171 return error ? fa : NULL; 172 } 173 174 static void 175 xfs_attr3_rmt_write_verify( 176 struct xfs_buf *bp) 177 { 178 struct xfs_mount *mp = bp->b_target->bt_mount; 179 xfs_failaddr_t fa; 180 int blksize = mp->m_attr_geo->blksize; 181 char *ptr; 182 int len; 183 xfs_daddr_t bno; 184 185 /* no verification of non-crc buffers */ 186 if (!xfs_sb_version_hascrc(&mp->m_sb)) 187 return; 188 189 ptr = bp->b_addr; 190 bno = bp->b_bn; 191 len = BBTOB(bp->b_length); 192 ASSERT(len >= blksize); 193 194 while (len > 0) { 195 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 196 197 fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 198 if (fa) { 199 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 200 return; 201 } 202 203 /* 204 * Ensure we aren't writing bogus LSNs to disk. See 205 * xfs_attr3_rmt_hdr_set() for the explanation. 206 */ 207 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 208 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 209 return; 210 } 211 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 212 213 len -= blksize; 214 ptr += blksize; 215 bno += BTOBB(blksize); 216 } 217 218 if (len != 0) 219 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 220 } 221 222 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 223 .name = "xfs_attr3_rmt", 224 .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) }, 225 .verify_read = xfs_attr3_rmt_read_verify, 226 .verify_write = xfs_attr3_rmt_write_verify, 227 .verify_struct = xfs_attr3_rmt_verify_struct, 228 }; 229 230 STATIC int 231 xfs_attr3_rmt_hdr_set( 232 struct xfs_mount *mp, 233 void *ptr, 234 xfs_ino_t ino, 235 uint32_t offset, 236 uint32_t size, 237 xfs_daddr_t bno) 238 { 239 struct xfs_attr3_rmt_hdr *rmt = ptr; 240 241 if (!xfs_sb_version_hascrc(&mp->m_sb)) 242 return 0; 243 244 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); 245 rmt->rm_offset = cpu_to_be32(offset); 246 rmt->rm_bytes = cpu_to_be32(size); 247 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); 248 rmt->rm_owner = cpu_to_be64(ino); 249 rmt->rm_blkno = cpu_to_be64(bno); 250 251 /* 252 * Remote attribute blocks are written synchronously, so we don't 253 * have an LSN that we can stamp in them that makes any sense to log 254 * recovery. To ensure that log recovery handles overwrites of these 255 * blocks sanely (i.e. once they've been freed and reallocated as some 256 * other type of metadata) we need to ensure that the LSN has a value 257 * that tells log recovery to ignore the LSN and overwrite the buffer 258 * with whatever is in it's log. To do this, we use the magic 259 * NULLCOMMITLSN to indicate that the LSN is invalid. 260 */ 261 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); 262 263 return sizeof(struct xfs_attr3_rmt_hdr); 264 } 265 266 /* 267 * Helper functions to copy attribute data in and out of the one disk extents 268 */ 269 STATIC int 270 xfs_attr_rmtval_copyout( 271 struct xfs_mount *mp, 272 struct xfs_buf *bp, 273 xfs_ino_t ino, 274 int *offset, 275 int *valuelen, 276 uint8_t **dst) 277 { 278 char *src = bp->b_addr; 279 xfs_daddr_t bno = bp->b_bn; 280 int len = BBTOB(bp->b_length); 281 int blksize = mp->m_attr_geo->blksize; 282 283 ASSERT(len >= blksize); 284 285 while (len > 0 && *valuelen > 0) { 286 int hdr_size = 0; 287 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 288 289 byte_cnt = min(*valuelen, byte_cnt); 290 291 if (xfs_sb_version_hascrc(&mp->m_sb)) { 292 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 293 byte_cnt, bno)) { 294 xfs_alert(mp, 295 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 296 bno, *offset, byte_cnt, ino); 297 return -EFSCORRUPTED; 298 } 299 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 300 } 301 302 memcpy(*dst, src + hdr_size, byte_cnt); 303 304 /* roll buffer forwards */ 305 len -= blksize; 306 src += blksize; 307 bno += BTOBB(blksize); 308 309 /* roll attribute data forwards */ 310 *valuelen -= byte_cnt; 311 *dst += byte_cnt; 312 *offset += byte_cnt; 313 } 314 return 0; 315 } 316 317 STATIC void 318 xfs_attr_rmtval_copyin( 319 struct xfs_mount *mp, 320 struct xfs_buf *bp, 321 xfs_ino_t ino, 322 int *offset, 323 int *valuelen, 324 uint8_t **src) 325 { 326 char *dst = bp->b_addr; 327 xfs_daddr_t bno = bp->b_bn; 328 int len = BBTOB(bp->b_length); 329 int blksize = mp->m_attr_geo->blksize; 330 331 ASSERT(len >= blksize); 332 333 while (len > 0 && *valuelen > 0) { 334 int hdr_size; 335 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 336 337 byte_cnt = min(*valuelen, byte_cnt); 338 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, 339 byte_cnt, bno); 340 341 memcpy(dst + hdr_size, *src, byte_cnt); 342 343 /* 344 * If this is the last block, zero the remainder of it. 345 * Check that we are actually the last block, too. 346 */ 347 if (byte_cnt + hdr_size < blksize) { 348 ASSERT(*valuelen - byte_cnt == 0); 349 ASSERT(len == blksize); 350 memset(dst + hdr_size + byte_cnt, 0, 351 blksize - hdr_size - byte_cnt); 352 } 353 354 /* roll buffer forwards */ 355 len -= blksize; 356 dst += blksize; 357 bno += BTOBB(blksize); 358 359 /* roll attribute data forwards */ 360 *valuelen -= byte_cnt; 361 *src += byte_cnt; 362 *offset += byte_cnt; 363 } 364 } 365 366 /* 367 * Read the value associated with an attribute from the out-of-line buffer 368 * that we stored it in. 369 */ 370 int 371 xfs_attr_rmtval_get( 372 struct xfs_da_args *args) 373 { 374 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 375 struct xfs_mount *mp = args->dp->i_mount; 376 struct xfs_buf *bp; 377 xfs_dablk_t lblkno = args->rmtblkno; 378 uint8_t *dst = args->value; 379 int valuelen; 380 int nmap; 381 int error; 382 int blkcnt = args->rmtblkcnt; 383 int i; 384 int offset = 0; 385 386 trace_xfs_attr_rmtval_get(args); 387 388 ASSERT(!(args->flags & ATTR_KERNOVAL)); 389 ASSERT(args->rmtvaluelen == args->valuelen); 390 391 valuelen = args->rmtvaluelen; 392 while (valuelen > 0) { 393 nmap = ATTR_RMTVALUE_MAPSIZE; 394 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 395 blkcnt, map, &nmap, 396 XFS_BMAPI_ATTRFORK); 397 if (error) 398 return error; 399 ASSERT(nmap >= 1); 400 401 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 402 xfs_daddr_t dblkno; 403 int dblkcnt; 404 405 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 406 (map[i].br_startblock != HOLESTARTBLOCK)); 407 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 408 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 409 error = xfs_trans_read_buf(mp, args->trans, 410 mp->m_ddev_targp, 411 dblkno, dblkcnt, 0, &bp, 412 &xfs_attr3_rmt_buf_ops); 413 if (error) 414 return error; 415 416 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, 417 &offset, &valuelen, 418 &dst); 419 xfs_trans_brelse(args->trans, bp); 420 if (error) 421 return error; 422 423 /* roll attribute extent map forwards */ 424 lblkno += map[i].br_blockcount; 425 blkcnt -= map[i].br_blockcount; 426 } 427 } 428 ASSERT(valuelen == 0); 429 return 0; 430 } 431 432 /* 433 * Write the value associated with an attribute into the out-of-line buffer 434 * that we have defined for it. 435 */ 436 int 437 xfs_attr_rmtval_set( 438 struct xfs_da_args *args) 439 { 440 struct xfs_inode *dp = args->dp; 441 struct xfs_mount *mp = dp->i_mount; 442 struct xfs_bmbt_irec map; 443 xfs_dablk_t lblkno; 444 xfs_fileoff_t lfileoff = 0; 445 uint8_t *src = args->value; 446 int blkcnt; 447 int valuelen; 448 int nmap; 449 int error; 450 int offset = 0; 451 452 trace_xfs_attr_rmtval_set(args); 453 454 /* 455 * Find a "hole" in the attribute address space large enough for 456 * us to drop the new attribute's value into. Because CRC enable 457 * attributes have headers, we can't just do a straight byte to FSB 458 * conversion and have to take the header space into account. 459 */ 460 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); 461 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 462 XFS_ATTR_FORK); 463 if (error) 464 return error; 465 466 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 467 args->rmtblkcnt = blkcnt; 468 469 /* 470 * Roll through the "value", allocating blocks on disk as required. 471 */ 472 while (blkcnt > 0) { 473 /* 474 * Allocate a single extent, up to the size of the value. 475 * 476 * Note that we have to consider this a data allocation as we 477 * write the remote attribute without logging the contents. 478 * Hence we must ensure that we aren't using blocks that are on 479 * the busy list so that we don't overwrite blocks which have 480 * recently been freed but their transactions are not yet 481 * committed to disk. If we overwrite the contents of a busy 482 * extent and then crash then the block may not contain the 483 * correct metadata after log recovery occurs. 484 */ 485 nmap = 1; 486 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 487 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 488 &nmap); 489 if (error) 490 return error; 491 error = xfs_defer_finish(&args->trans); 492 if (error) 493 return error; 494 495 ASSERT(nmap == 1); 496 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 497 (map.br_startblock != HOLESTARTBLOCK)); 498 lblkno += map.br_blockcount; 499 blkcnt -= map.br_blockcount; 500 501 /* 502 * Start the next trans in the chain. 503 */ 504 error = xfs_trans_roll_inode(&args->trans, dp); 505 if (error) 506 return error; 507 } 508 509 /* 510 * Roll through the "value", copying the attribute value to the 511 * already-allocated blocks. Blocks are written synchronously 512 * so that we can know they are all on disk before we turn off 513 * the INCOMPLETE flag. 514 */ 515 lblkno = args->rmtblkno; 516 blkcnt = args->rmtblkcnt; 517 valuelen = args->rmtvaluelen; 518 while (valuelen > 0) { 519 struct xfs_buf *bp; 520 xfs_daddr_t dblkno; 521 int dblkcnt; 522 523 ASSERT(blkcnt > 0); 524 525 nmap = 1; 526 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 527 blkcnt, &map, &nmap, 528 XFS_BMAPI_ATTRFORK); 529 if (error) 530 return error; 531 ASSERT(nmap == 1); 532 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 533 (map.br_startblock != HOLESTARTBLOCK)); 534 535 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 536 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 537 538 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); 539 if (!bp) 540 return -ENOMEM; 541 bp->b_ops = &xfs_attr3_rmt_buf_ops; 542 543 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 544 &valuelen, &src); 545 546 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 547 xfs_buf_relse(bp); 548 if (error) 549 return error; 550 551 552 /* roll attribute extent map forwards */ 553 lblkno += map.br_blockcount; 554 blkcnt -= map.br_blockcount; 555 } 556 ASSERT(valuelen == 0); 557 return 0; 558 } 559 560 /* 561 * Remove the value associated with an attribute by deleting the 562 * out-of-line buffer that it is stored on. 563 */ 564 int 565 xfs_attr_rmtval_remove( 566 struct xfs_da_args *args) 567 { 568 struct xfs_mount *mp = args->dp->i_mount; 569 xfs_dablk_t lblkno; 570 int blkcnt; 571 int error; 572 int done; 573 574 trace_xfs_attr_rmtval_remove(args); 575 576 /* 577 * Roll through the "value", invalidating the attribute value's blocks. 578 */ 579 lblkno = args->rmtblkno; 580 blkcnt = args->rmtblkcnt; 581 while (blkcnt > 0) { 582 struct xfs_bmbt_irec map; 583 struct xfs_buf *bp; 584 xfs_daddr_t dblkno; 585 int dblkcnt; 586 int nmap; 587 588 /* 589 * Try to remember where we decided to put the value. 590 */ 591 nmap = 1; 592 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 593 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 594 if (error) 595 return error; 596 ASSERT(nmap == 1); 597 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 598 (map.br_startblock != HOLESTARTBLOCK)); 599 600 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 601 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 602 603 /* 604 * If the "remote" value is in the cache, remove it. 605 */ 606 bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); 607 if (bp) { 608 xfs_buf_stale(bp); 609 xfs_buf_relse(bp); 610 bp = NULL; 611 } 612 613 lblkno += map.br_blockcount; 614 blkcnt -= map.br_blockcount; 615 } 616 617 /* 618 * Keep de-allocating extents until the remote-value region is gone. 619 */ 620 lblkno = args->rmtblkno; 621 blkcnt = args->rmtblkcnt; 622 done = 0; 623 while (!done) { 624 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 625 XFS_BMAPI_ATTRFORK, 1, &done); 626 if (error) 627 return error; 628 error = xfs_defer_finish(&args->trans); 629 if (error) 630 return error; 631 632 /* 633 * Close out trans and start the next one in the chain. 634 */ 635 error = xfs_trans_roll_inode(&args->trans, args->dp); 636 if (error) 637 return error; 638 } 639 return 0; 640 } 641