1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_inode.h" 19 #include "xfs_trans.h" 20 #include "xfs_bmap.h" 21 #include "xfs_attr.h" 22 #include "xfs_trace.h" 23 #include "xfs_error.h" 24 25 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 26 27 /* 28 * Each contiguous block has a header, so it is not just a simple attribute 29 * length to FSB conversion. 30 */ 31 int 32 xfs_attr3_rmt_blocks( 33 struct xfs_mount *mp, 34 int attrlen) 35 { 36 if (xfs_sb_version_hascrc(&mp->m_sb)) { 37 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); 38 return (attrlen + buflen - 1) / buflen; 39 } 40 return XFS_B_TO_FSB(mp, attrlen); 41 } 42 43 /* 44 * Checking of the remote attribute header is split into two parts. The verifier 45 * does CRC, location and bounds checking, the unpacking function checks the 46 * attribute parameters and owner. 47 */ 48 static xfs_failaddr_t 49 xfs_attr3_rmt_hdr_ok( 50 void *ptr, 51 xfs_ino_t ino, 52 uint32_t offset, 53 uint32_t size, 54 xfs_daddr_t bno) 55 { 56 struct xfs_attr3_rmt_hdr *rmt = ptr; 57 58 if (bno != be64_to_cpu(rmt->rm_blkno)) 59 return __this_address; 60 if (offset != be32_to_cpu(rmt->rm_offset)) 61 return __this_address; 62 if (size != be32_to_cpu(rmt->rm_bytes)) 63 return __this_address; 64 if (ino != be64_to_cpu(rmt->rm_owner)) 65 return __this_address; 66 67 /* ok */ 68 return NULL; 69 } 70 71 static xfs_failaddr_t 72 xfs_attr3_rmt_verify( 73 struct xfs_mount *mp, 74 struct xfs_buf *bp, 75 void *ptr, 76 int fsbsize, 77 xfs_daddr_t bno) 78 { 79 struct xfs_attr3_rmt_hdr *rmt = ptr; 80 81 if (!xfs_sb_version_hascrc(&mp->m_sb)) 82 return __this_address; 83 if (!xfs_verify_magic(bp, rmt->rm_magic)) 84 return __this_address; 85 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 86 return __this_address; 87 if (be64_to_cpu(rmt->rm_blkno) != bno) 88 return __this_address; 89 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 90 return __this_address; 91 if (be32_to_cpu(rmt->rm_offset) + 92 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 93 return __this_address; 94 if (rmt->rm_owner == 0) 95 return __this_address; 96 97 return NULL; 98 } 99 100 static int 101 __xfs_attr3_rmt_read_verify( 102 struct xfs_buf *bp, 103 bool check_crc, 104 xfs_failaddr_t *failaddr) 105 { 106 struct xfs_mount *mp = bp->b_mount; 107 char *ptr; 108 int len; 109 xfs_daddr_t bno; 110 int blksize = mp->m_attr_geo->blksize; 111 112 /* no verification of non-crc buffers */ 113 if (!xfs_sb_version_hascrc(&mp->m_sb)) 114 return 0; 115 116 ptr = bp->b_addr; 117 bno = bp->b_bn; 118 len = BBTOB(bp->b_length); 119 ASSERT(len >= blksize); 120 121 while (len > 0) { 122 if (check_crc && 123 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 124 *failaddr = __this_address; 125 return -EFSBADCRC; 126 } 127 *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 128 if (*failaddr) 129 return -EFSCORRUPTED; 130 len -= blksize; 131 ptr += blksize; 132 bno += BTOBB(blksize); 133 } 134 135 if (len != 0) { 136 *failaddr = __this_address; 137 return -EFSCORRUPTED; 138 } 139 140 return 0; 141 } 142 143 static void 144 xfs_attr3_rmt_read_verify( 145 struct xfs_buf *bp) 146 { 147 xfs_failaddr_t fa; 148 int error; 149 150 error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 151 if (error) 152 xfs_verifier_error(bp, error, fa); 153 } 154 155 static xfs_failaddr_t 156 xfs_attr3_rmt_verify_struct( 157 struct xfs_buf *bp) 158 { 159 xfs_failaddr_t fa; 160 int error; 161 162 error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 163 return error ? fa : NULL; 164 } 165 166 static void 167 xfs_attr3_rmt_write_verify( 168 struct xfs_buf *bp) 169 { 170 struct xfs_mount *mp = bp->b_mount; 171 xfs_failaddr_t fa; 172 int blksize = mp->m_attr_geo->blksize; 173 char *ptr; 174 int len; 175 xfs_daddr_t bno; 176 177 /* no verification of non-crc buffers */ 178 if (!xfs_sb_version_hascrc(&mp->m_sb)) 179 return; 180 181 ptr = bp->b_addr; 182 bno = bp->b_bn; 183 len = BBTOB(bp->b_length); 184 ASSERT(len >= blksize); 185 186 while (len > 0) { 187 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 188 189 fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 190 if (fa) { 191 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 192 return; 193 } 194 195 /* 196 * Ensure we aren't writing bogus LSNs to disk. See 197 * xfs_attr3_rmt_hdr_set() for the explanation. 198 */ 199 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 200 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 201 return; 202 } 203 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 204 205 len -= blksize; 206 ptr += blksize; 207 bno += BTOBB(blksize); 208 } 209 210 if (len != 0) 211 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 212 } 213 214 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 215 .name = "xfs_attr3_rmt", 216 .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) }, 217 .verify_read = xfs_attr3_rmt_read_verify, 218 .verify_write = xfs_attr3_rmt_write_verify, 219 .verify_struct = xfs_attr3_rmt_verify_struct, 220 }; 221 222 STATIC int 223 xfs_attr3_rmt_hdr_set( 224 struct xfs_mount *mp, 225 void *ptr, 226 xfs_ino_t ino, 227 uint32_t offset, 228 uint32_t size, 229 xfs_daddr_t bno) 230 { 231 struct xfs_attr3_rmt_hdr *rmt = ptr; 232 233 if (!xfs_sb_version_hascrc(&mp->m_sb)) 234 return 0; 235 236 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); 237 rmt->rm_offset = cpu_to_be32(offset); 238 rmt->rm_bytes = cpu_to_be32(size); 239 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); 240 rmt->rm_owner = cpu_to_be64(ino); 241 rmt->rm_blkno = cpu_to_be64(bno); 242 243 /* 244 * Remote attribute blocks are written synchronously, so we don't 245 * have an LSN that we can stamp in them that makes any sense to log 246 * recovery. To ensure that log recovery handles overwrites of these 247 * blocks sanely (i.e. once they've been freed and reallocated as some 248 * other type of metadata) we need to ensure that the LSN has a value 249 * that tells log recovery to ignore the LSN and overwrite the buffer 250 * with whatever is in it's log. To do this, we use the magic 251 * NULLCOMMITLSN to indicate that the LSN is invalid. 252 */ 253 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); 254 255 return sizeof(struct xfs_attr3_rmt_hdr); 256 } 257 258 /* 259 * Helper functions to copy attribute data in and out of the one disk extents 260 */ 261 STATIC int 262 xfs_attr_rmtval_copyout( 263 struct xfs_mount *mp, 264 struct xfs_buf *bp, 265 xfs_ino_t ino, 266 int *offset, 267 int *valuelen, 268 uint8_t **dst) 269 { 270 char *src = bp->b_addr; 271 xfs_daddr_t bno = bp->b_bn; 272 int len = BBTOB(bp->b_length); 273 int blksize = mp->m_attr_geo->blksize; 274 275 ASSERT(len >= blksize); 276 277 while (len > 0 && *valuelen > 0) { 278 int hdr_size = 0; 279 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 280 281 byte_cnt = min(*valuelen, byte_cnt); 282 283 if (xfs_sb_version_hascrc(&mp->m_sb)) { 284 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 285 byte_cnt, bno)) { 286 xfs_alert(mp, 287 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 288 bno, *offset, byte_cnt, ino); 289 return -EFSCORRUPTED; 290 } 291 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 292 } 293 294 memcpy(*dst, src + hdr_size, byte_cnt); 295 296 /* roll buffer forwards */ 297 len -= blksize; 298 src += blksize; 299 bno += BTOBB(blksize); 300 301 /* roll attribute data forwards */ 302 *valuelen -= byte_cnt; 303 *dst += byte_cnt; 304 *offset += byte_cnt; 305 } 306 return 0; 307 } 308 309 STATIC void 310 xfs_attr_rmtval_copyin( 311 struct xfs_mount *mp, 312 struct xfs_buf *bp, 313 xfs_ino_t ino, 314 int *offset, 315 int *valuelen, 316 uint8_t **src) 317 { 318 char *dst = bp->b_addr; 319 xfs_daddr_t bno = bp->b_bn; 320 int len = BBTOB(bp->b_length); 321 int blksize = mp->m_attr_geo->blksize; 322 323 ASSERT(len >= blksize); 324 325 while (len > 0 && *valuelen > 0) { 326 int hdr_size; 327 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 328 329 byte_cnt = min(*valuelen, byte_cnt); 330 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, 331 byte_cnt, bno); 332 333 memcpy(dst + hdr_size, *src, byte_cnt); 334 335 /* 336 * If this is the last block, zero the remainder of it. 337 * Check that we are actually the last block, too. 338 */ 339 if (byte_cnt + hdr_size < blksize) { 340 ASSERT(*valuelen - byte_cnt == 0); 341 ASSERT(len == blksize); 342 memset(dst + hdr_size + byte_cnt, 0, 343 blksize - hdr_size - byte_cnt); 344 } 345 346 /* roll buffer forwards */ 347 len -= blksize; 348 dst += blksize; 349 bno += BTOBB(blksize); 350 351 /* roll attribute data forwards */ 352 *valuelen -= byte_cnt; 353 *src += byte_cnt; 354 *offset += byte_cnt; 355 } 356 } 357 358 /* 359 * Read the value associated with an attribute from the out-of-line buffer 360 * that we stored it in. 361 * 362 * Returns 0 on successful retrieval, otherwise an error. 363 */ 364 int 365 xfs_attr_rmtval_get( 366 struct xfs_da_args *args) 367 { 368 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 369 struct xfs_mount *mp = args->dp->i_mount; 370 struct xfs_buf *bp; 371 xfs_dablk_t lblkno = args->rmtblkno; 372 uint8_t *dst = args->value; 373 int valuelen; 374 int nmap; 375 int error; 376 int blkcnt = args->rmtblkcnt; 377 int i; 378 int offset = 0; 379 380 trace_xfs_attr_rmtval_get(args); 381 382 ASSERT(!(args->flags & ATTR_KERNOVAL)); 383 ASSERT(args->rmtvaluelen == args->valuelen); 384 385 valuelen = args->rmtvaluelen; 386 while (valuelen > 0) { 387 nmap = ATTR_RMTVALUE_MAPSIZE; 388 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 389 blkcnt, map, &nmap, 390 XFS_BMAPI_ATTRFORK); 391 if (error) 392 return error; 393 ASSERT(nmap >= 1); 394 395 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 396 xfs_daddr_t dblkno; 397 int dblkcnt; 398 399 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 400 (map[i].br_startblock != HOLESTARTBLOCK)); 401 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 402 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 403 error = xfs_trans_read_buf(mp, args->trans, 404 mp->m_ddev_targp, 405 dblkno, dblkcnt, 0, &bp, 406 &xfs_attr3_rmt_buf_ops); 407 if (error) 408 return error; 409 410 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, 411 &offset, &valuelen, 412 &dst); 413 xfs_trans_brelse(args->trans, bp); 414 if (error) 415 return error; 416 417 /* roll attribute extent map forwards */ 418 lblkno += map[i].br_blockcount; 419 blkcnt -= map[i].br_blockcount; 420 } 421 } 422 ASSERT(valuelen == 0); 423 return 0; 424 } 425 426 /* 427 * Write the value associated with an attribute into the out-of-line buffer 428 * that we have defined for it. 429 */ 430 int 431 xfs_attr_rmtval_set( 432 struct xfs_da_args *args) 433 { 434 struct xfs_inode *dp = args->dp; 435 struct xfs_mount *mp = dp->i_mount; 436 struct xfs_bmbt_irec map; 437 xfs_dablk_t lblkno; 438 xfs_fileoff_t lfileoff = 0; 439 uint8_t *src = args->value; 440 int blkcnt; 441 int valuelen; 442 int nmap; 443 int error; 444 int offset = 0; 445 446 trace_xfs_attr_rmtval_set(args); 447 448 /* 449 * Find a "hole" in the attribute address space large enough for 450 * us to drop the new attribute's value into. Because CRC enable 451 * attributes have headers, we can't just do a straight byte to FSB 452 * conversion and have to take the header space into account. 453 */ 454 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); 455 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 456 XFS_ATTR_FORK); 457 if (error) 458 return error; 459 460 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 461 args->rmtblkcnt = blkcnt; 462 463 /* 464 * Roll through the "value", allocating blocks on disk as required. 465 */ 466 while (blkcnt > 0) { 467 /* 468 * Allocate a single extent, up to the size of the value. 469 * 470 * Note that we have to consider this a data allocation as we 471 * write the remote attribute without logging the contents. 472 * Hence we must ensure that we aren't using blocks that are on 473 * the busy list so that we don't overwrite blocks which have 474 * recently been freed but their transactions are not yet 475 * committed to disk. If we overwrite the contents of a busy 476 * extent and then crash then the block may not contain the 477 * correct metadata after log recovery occurs. 478 */ 479 nmap = 1; 480 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 481 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 482 &nmap); 483 if (error) 484 return error; 485 error = xfs_defer_finish(&args->trans); 486 if (error) 487 return error; 488 489 ASSERT(nmap == 1); 490 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 491 (map.br_startblock != HOLESTARTBLOCK)); 492 lblkno += map.br_blockcount; 493 blkcnt -= map.br_blockcount; 494 495 /* 496 * Start the next trans in the chain. 497 */ 498 error = xfs_trans_roll_inode(&args->trans, dp); 499 if (error) 500 return error; 501 } 502 503 /* 504 * Roll through the "value", copying the attribute value to the 505 * already-allocated blocks. Blocks are written synchronously 506 * so that we can know they are all on disk before we turn off 507 * the INCOMPLETE flag. 508 */ 509 lblkno = args->rmtblkno; 510 blkcnt = args->rmtblkcnt; 511 valuelen = args->rmtvaluelen; 512 while (valuelen > 0) { 513 struct xfs_buf *bp; 514 xfs_daddr_t dblkno; 515 int dblkcnt; 516 517 ASSERT(blkcnt > 0); 518 519 nmap = 1; 520 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 521 blkcnt, &map, &nmap, 522 XFS_BMAPI_ATTRFORK); 523 if (error) 524 return error; 525 ASSERT(nmap == 1); 526 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 527 (map.br_startblock != HOLESTARTBLOCK)); 528 529 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 530 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 531 532 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); 533 if (!bp) 534 return -ENOMEM; 535 bp->b_ops = &xfs_attr3_rmt_buf_ops; 536 537 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 538 &valuelen, &src); 539 540 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 541 xfs_buf_relse(bp); 542 if (error) 543 return error; 544 545 546 /* roll attribute extent map forwards */ 547 lblkno += map.br_blockcount; 548 blkcnt -= map.br_blockcount; 549 } 550 ASSERT(valuelen == 0); 551 return 0; 552 } 553 554 /* 555 * Remove the value associated with an attribute by deleting the 556 * out-of-line buffer that it is stored on. 557 */ 558 int 559 xfs_attr_rmtval_remove( 560 struct xfs_da_args *args) 561 { 562 struct xfs_mount *mp = args->dp->i_mount; 563 xfs_dablk_t lblkno; 564 int blkcnt; 565 int error; 566 int done; 567 568 trace_xfs_attr_rmtval_remove(args); 569 570 /* 571 * Roll through the "value", invalidating the attribute value's blocks. 572 */ 573 lblkno = args->rmtblkno; 574 blkcnt = args->rmtblkcnt; 575 while (blkcnt > 0) { 576 struct xfs_bmbt_irec map; 577 struct xfs_buf *bp; 578 xfs_daddr_t dblkno; 579 int dblkcnt; 580 int nmap; 581 582 /* 583 * Try to remember where we decided to put the value. 584 */ 585 nmap = 1; 586 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 587 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 588 if (error) 589 return error; 590 ASSERT(nmap == 1); 591 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 592 (map.br_startblock != HOLESTARTBLOCK)); 593 594 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 595 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 596 597 /* 598 * If the "remote" value is in the cache, remove it. 599 */ 600 bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); 601 if (bp) { 602 xfs_buf_stale(bp); 603 xfs_buf_relse(bp); 604 bp = NULL; 605 } 606 607 lblkno += map.br_blockcount; 608 blkcnt -= map.br_blockcount; 609 } 610 611 /* 612 * Keep de-allocating extents until the remote-value region is gone. 613 */ 614 lblkno = args->rmtblkno; 615 blkcnt = args->rmtblkcnt; 616 done = 0; 617 while (!done) { 618 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 619 XFS_BMAPI_ATTRFORK, 1, &done); 620 if (error) 621 return error; 622 error = xfs_defer_finish(&args->trans); 623 if (error) 624 return error; 625 626 /* 627 * Close out trans and start the next one in the chain. 628 */ 629 error = xfs_trans_roll_inode(&args->trans, args->dp); 630 if (error) 631 return error; 632 } 633 return 0; 634 } 635