1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_inode.h" 19 #include "xfs_trans.h" 20 #include "xfs_bmap.h" 21 #include "xfs_attr.h" 22 #include "xfs_attr_remote.h" 23 #include "xfs_trace.h" 24 #include "xfs_error.h" 25 26 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 27 28 /* 29 * Each contiguous block has a header, so it is not just a simple attribute 30 * length to FSB conversion. 31 */ 32 int 33 xfs_attr3_rmt_blocks( 34 struct xfs_mount *mp, 35 int attrlen) 36 { 37 if (xfs_sb_version_hascrc(&mp->m_sb)) { 38 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); 39 return (attrlen + buflen - 1) / buflen; 40 } 41 return XFS_B_TO_FSB(mp, attrlen); 42 } 43 44 /* 45 * Checking of the remote attribute header is split into two parts. The verifier 46 * does CRC, location and bounds checking, the unpacking function checks the 47 * attribute parameters and owner. 48 */ 49 static xfs_failaddr_t 50 xfs_attr3_rmt_hdr_ok( 51 void *ptr, 52 xfs_ino_t ino, 53 uint32_t offset, 54 uint32_t size, 55 xfs_daddr_t bno) 56 { 57 struct xfs_attr3_rmt_hdr *rmt = ptr; 58 59 if (bno != be64_to_cpu(rmt->rm_blkno)) 60 return __this_address; 61 if (offset != be32_to_cpu(rmt->rm_offset)) 62 return __this_address; 63 if (size != be32_to_cpu(rmt->rm_bytes)) 64 return __this_address; 65 if (ino != be64_to_cpu(rmt->rm_owner)) 66 return __this_address; 67 68 /* ok */ 69 return NULL; 70 } 71 72 static xfs_failaddr_t 73 xfs_attr3_rmt_verify( 74 struct xfs_mount *mp, 75 struct xfs_buf *bp, 76 void *ptr, 77 int fsbsize, 78 xfs_daddr_t bno) 79 { 80 struct xfs_attr3_rmt_hdr *rmt = ptr; 81 82 if (!xfs_sb_version_hascrc(&mp->m_sb)) 83 return __this_address; 84 if (!xfs_verify_magic(bp, rmt->rm_magic)) 85 return __this_address; 86 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 87 return __this_address; 88 if (be64_to_cpu(rmt->rm_blkno) != bno) 89 return __this_address; 90 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 91 return __this_address; 92 if (be32_to_cpu(rmt->rm_offset) + 93 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 94 return __this_address; 95 if (rmt->rm_owner == 0) 96 return __this_address; 97 98 return NULL; 99 } 100 101 static int 102 __xfs_attr3_rmt_read_verify( 103 struct xfs_buf *bp, 104 bool check_crc, 105 xfs_failaddr_t *failaddr) 106 { 107 struct xfs_mount *mp = bp->b_mount; 108 char *ptr; 109 int len; 110 xfs_daddr_t bno; 111 int blksize = mp->m_attr_geo->blksize; 112 113 /* no verification of non-crc buffers */ 114 if (!xfs_sb_version_hascrc(&mp->m_sb)) 115 return 0; 116 117 ptr = bp->b_addr; 118 bno = bp->b_bn; 119 len = BBTOB(bp->b_length); 120 ASSERT(len >= blksize); 121 122 while (len > 0) { 123 if (check_crc && 124 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 125 *failaddr = __this_address; 126 return -EFSBADCRC; 127 } 128 *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 129 if (*failaddr) 130 return -EFSCORRUPTED; 131 len -= blksize; 132 ptr += blksize; 133 bno += BTOBB(blksize); 134 } 135 136 if (len != 0) { 137 *failaddr = __this_address; 138 return -EFSCORRUPTED; 139 } 140 141 return 0; 142 } 143 144 static void 145 xfs_attr3_rmt_read_verify( 146 struct xfs_buf *bp) 147 { 148 xfs_failaddr_t fa; 149 int error; 150 151 error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 152 if (error) 153 xfs_verifier_error(bp, error, fa); 154 } 155 156 static xfs_failaddr_t 157 xfs_attr3_rmt_verify_struct( 158 struct xfs_buf *bp) 159 { 160 xfs_failaddr_t fa; 161 int error; 162 163 error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 164 return error ? fa : NULL; 165 } 166 167 static void 168 xfs_attr3_rmt_write_verify( 169 struct xfs_buf *bp) 170 { 171 struct xfs_mount *mp = bp->b_mount; 172 xfs_failaddr_t fa; 173 int blksize = mp->m_attr_geo->blksize; 174 char *ptr; 175 int len; 176 xfs_daddr_t bno; 177 178 /* no verification of non-crc buffers */ 179 if (!xfs_sb_version_hascrc(&mp->m_sb)) 180 return; 181 182 ptr = bp->b_addr; 183 bno = bp->b_bn; 184 len = BBTOB(bp->b_length); 185 ASSERT(len >= blksize); 186 187 while (len > 0) { 188 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 189 190 fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 191 if (fa) { 192 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 193 return; 194 } 195 196 /* 197 * Ensure we aren't writing bogus LSNs to disk. See 198 * xfs_attr3_rmt_hdr_set() for the explanation. 199 */ 200 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 201 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 202 return; 203 } 204 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 205 206 len -= blksize; 207 ptr += blksize; 208 bno += BTOBB(blksize); 209 } 210 211 if (len != 0) 212 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 213 } 214 215 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 216 .name = "xfs_attr3_rmt", 217 .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) }, 218 .verify_read = xfs_attr3_rmt_read_verify, 219 .verify_write = xfs_attr3_rmt_write_verify, 220 .verify_struct = xfs_attr3_rmt_verify_struct, 221 }; 222 223 STATIC int 224 xfs_attr3_rmt_hdr_set( 225 struct xfs_mount *mp, 226 void *ptr, 227 xfs_ino_t ino, 228 uint32_t offset, 229 uint32_t size, 230 xfs_daddr_t bno) 231 { 232 struct xfs_attr3_rmt_hdr *rmt = ptr; 233 234 if (!xfs_sb_version_hascrc(&mp->m_sb)) 235 return 0; 236 237 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); 238 rmt->rm_offset = cpu_to_be32(offset); 239 rmt->rm_bytes = cpu_to_be32(size); 240 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); 241 rmt->rm_owner = cpu_to_be64(ino); 242 rmt->rm_blkno = cpu_to_be64(bno); 243 244 /* 245 * Remote attribute blocks are written synchronously, so we don't 246 * have an LSN that we can stamp in them that makes any sense to log 247 * recovery. To ensure that log recovery handles overwrites of these 248 * blocks sanely (i.e. once they've been freed and reallocated as some 249 * other type of metadata) we need to ensure that the LSN has a value 250 * that tells log recovery to ignore the LSN and overwrite the buffer 251 * with whatever is in it's log. To do this, we use the magic 252 * NULLCOMMITLSN to indicate that the LSN is invalid. 253 */ 254 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); 255 256 return sizeof(struct xfs_attr3_rmt_hdr); 257 } 258 259 /* 260 * Helper functions to copy attribute data in and out of the one disk extents 261 */ 262 STATIC int 263 xfs_attr_rmtval_copyout( 264 struct xfs_mount *mp, 265 struct xfs_buf *bp, 266 xfs_ino_t ino, 267 int *offset, 268 int *valuelen, 269 uint8_t **dst) 270 { 271 char *src = bp->b_addr; 272 xfs_daddr_t bno = bp->b_bn; 273 int len = BBTOB(bp->b_length); 274 int blksize = mp->m_attr_geo->blksize; 275 276 ASSERT(len >= blksize); 277 278 while (len > 0 && *valuelen > 0) { 279 int hdr_size = 0; 280 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 281 282 byte_cnt = min(*valuelen, byte_cnt); 283 284 if (xfs_sb_version_hascrc(&mp->m_sb)) { 285 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 286 byte_cnt, bno)) { 287 xfs_alert(mp, 288 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 289 bno, *offset, byte_cnt, ino); 290 return -EFSCORRUPTED; 291 } 292 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 293 } 294 295 memcpy(*dst, src + hdr_size, byte_cnt); 296 297 /* roll buffer forwards */ 298 len -= blksize; 299 src += blksize; 300 bno += BTOBB(blksize); 301 302 /* roll attribute data forwards */ 303 *valuelen -= byte_cnt; 304 *dst += byte_cnt; 305 *offset += byte_cnt; 306 } 307 return 0; 308 } 309 310 STATIC void 311 xfs_attr_rmtval_copyin( 312 struct xfs_mount *mp, 313 struct xfs_buf *bp, 314 xfs_ino_t ino, 315 int *offset, 316 int *valuelen, 317 uint8_t **src) 318 { 319 char *dst = bp->b_addr; 320 xfs_daddr_t bno = bp->b_bn; 321 int len = BBTOB(bp->b_length); 322 int blksize = mp->m_attr_geo->blksize; 323 324 ASSERT(len >= blksize); 325 326 while (len > 0 && *valuelen > 0) { 327 int hdr_size; 328 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 329 330 byte_cnt = min(*valuelen, byte_cnt); 331 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, 332 byte_cnt, bno); 333 334 memcpy(dst + hdr_size, *src, byte_cnt); 335 336 /* 337 * If this is the last block, zero the remainder of it. 338 * Check that we are actually the last block, too. 339 */ 340 if (byte_cnt + hdr_size < blksize) { 341 ASSERT(*valuelen - byte_cnt == 0); 342 ASSERT(len == blksize); 343 memset(dst + hdr_size + byte_cnt, 0, 344 blksize - hdr_size - byte_cnt); 345 } 346 347 /* roll buffer forwards */ 348 len -= blksize; 349 dst += blksize; 350 bno += BTOBB(blksize); 351 352 /* roll attribute data forwards */ 353 *valuelen -= byte_cnt; 354 *src += byte_cnt; 355 *offset += byte_cnt; 356 } 357 } 358 359 /* 360 * Read the value associated with an attribute from the out-of-line buffer 361 * that we stored it in. 362 * 363 * Returns 0 on successful retrieval, otherwise an error. 364 */ 365 int 366 xfs_attr_rmtval_get( 367 struct xfs_da_args *args) 368 { 369 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 370 struct xfs_mount *mp = args->dp->i_mount; 371 struct xfs_buf *bp; 372 xfs_dablk_t lblkno = args->rmtblkno; 373 uint8_t *dst = args->value; 374 int valuelen; 375 int nmap; 376 int error; 377 int blkcnt = args->rmtblkcnt; 378 int i; 379 int offset = 0; 380 381 trace_xfs_attr_rmtval_get(args); 382 383 ASSERT(!(args->flags & ATTR_KERNOVAL)); 384 ASSERT(args->rmtvaluelen == args->valuelen); 385 386 valuelen = args->rmtvaluelen; 387 while (valuelen > 0) { 388 nmap = ATTR_RMTVALUE_MAPSIZE; 389 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 390 blkcnt, map, &nmap, 391 XFS_BMAPI_ATTRFORK); 392 if (error) 393 return error; 394 ASSERT(nmap >= 1); 395 396 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 397 xfs_daddr_t dblkno; 398 int dblkcnt; 399 400 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 401 (map[i].br_startblock != HOLESTARTBLOCK)); 402 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 403 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 404 error = xfs_trans_read_buf(mp, args->trans, 405 mp->m_ddev_targp, 406 dblkno, dblkcnt, 0, &bp, 407 &xfs_attr3_rmt_buf_ops); 408 if (error) 409 return error; 410 411 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, 412 &offset, &valuelen, 413 &dst); 414 xfs_trans_brelse(args->trans, bp); 415 if (error) 416 return error; 417 418 /* roll attribute extent map forwards */ 419 lblkno += map[i].br_blockcount; 420 blkcnt -= map[i].br_blockcount; 421 } 422 } 423 ASSERT(valuelen == 0); 424 return 0; 425 } 426 427 /* 428 * Write the value associated with an attribute into the out-of-line buffer 429 * that we have defined for it. 430 */ 431 int 432 xfs_attr_rmtval_set( 433 struct xfs_da_args *args) 434 { 435 struct xfs_inode *dp = args->dp; 436 struct xfs_mount *mp = dp->i_mount; 437 struct xfs_bmbt_irec map; 438 xfs_dablk_t lblkno; 439 xfs_fileoff_t lfileoff = 0; 440 uint8_t *src = args->value; 441 int blkcnt; 442 int valuelen; 443 int nmap; 444 int error; 445 int offset = 0; 446 447 trace_xfs_attr_rmtval_set(args); 448 449 /* 450 * Find a "hole" in the attribute address space large enough for 451 * us to drop the new attribute's value into. Because CRC enable 452 * attributes have headers, we can't just do a straight byte to FSB 453 * conversion and have to take the header space into account. 454 */ 455 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); 456 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 457 XFS_ATTR_FORK); 458 if (error) 459 return error; 460 461 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 462 args->rmtblkcnt = blkcnt; 463 464 /* 465 * Roll through the "value", allocating blocks on disk as required. 466 */ 467 while (blkcnt > 0) { 468 /* 469 * Allocate a single extent, up to the size of the value. 470 * 471 * Note that we have to consider this a data allocation as we 472 * write the remote attribute without logging the contents. 473 * Hence we must ensure that we aren't using blocks that are on 474 * the busy list so that we don't overwrite blocks which have 475 * recently been freed but their transactions are not yet 476 * committed to disk. If we overwrite the contents of a busy 477 * extent and then crash then the block may not contain the 478 * correct metadata after log recovery occurs. 479 */ 480 nmap = 1; 481 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 482 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 483 &nmap); 484 if (error) 485 return error; 486 error = xfs_defer_finish(&args->trans); 487 if (error) 488 return error; 489 490 ASSERT(nmap == 1); 491 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 492 (map.br_startblock != HOLESTARTBLOCK)); 493 lblkno += map.br_blockcount; 494 blkcnt -= map.br_blockcount; 495 496 /* 497 * Start the next trans in the chain. 498 */ 499 error = xfs_trans_roll_inode(&args->trans, dp); 500 if (error) 501 return error; 502 } 503 504 /* 505 * Roll through the "value", copying the attribute value to the 506 * already-allocated blocks. Blocks are written synchronously 507 * so that we can know they are all on disk before we turn off 508 * the INCOMPLETE flag. 509 */ 510 lblkno = args->rmtblkno; 511 blkcnt = args->rmtblkcnt; 512 valuelen = args->rmtvaluelen; 513 while (valuelen > 0) { 514 struct xfs_buf *bp; 515 xfs_daddr_t dblkno; 516 int dblkcnt; 517 518 ASSERT(blkcnt > 0); 519 520 nmap = 1; 521 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 522 blkcnt, &map, &nmap, 523 XFS_BMAPI_ATTRFORK); 524 if (error) 525 return error; 526 ASSERT(nmap == 1); 527 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 528 (map.br_startblock != HOLESTARTBLOCK)); 529 530 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 531 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 532 533 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); 534 if (!bp) 535 return -ENOMEM; 536 bp->b_ops = &xfs_attr3_rmt_buf_ops; 537 538 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 539 &valuelen, &src); 540 541 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 542 xfs_buf_relse(bp); 543 if (error) 544 return error; 545 546 547 /* roll attribute extent map forwards */ 548 lblkno += map.br_blockcount; 549 blkcnt -= map.br_blockcount; 550 } 551 ASSERT(valuelen == 0); 552 return 0; 553 } 554 555 /* 556 * Remove the value associated with an attribute by deleting the 557 * out-of-line buffer that it is stored on. 558 */ 559 int 560 xfs_attr_rmtval_remove( 561 struct xfs_da_args *args) 562 { 563 struct xfs_mount *mp = args->dp->i_mount; 564 xfs_dablk_t lblkno; 565 int blkcnt; 566 int error; 567 int done; 568 569 trace_xfs_attr_rmtval_remove(args); 570 571 /* 572 * Roll through the "value", invalidating the attribute value's blocks. 573 */ 574 lblkno = args->rmtblkno; 575 blkcnt = args->rmtblkcnt; 576 while (blkcnt > 0) { 577 struct xfs_bmbt_irec map; 578 struct xfs_buf *bp; 579 xfs_daddr_t dblkno; 580 int dblkcnt; 581 int nmap; 582 583 /* 584 * Try to remember where we decided to put the value. 585 */ 586 nmap = 1; 587 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 588 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 589 if (error) 590 return error; 591 ASSERT(nmap == 1); 592 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 593 (map.br_startblock != HOLESTARTBLOCK)); 594 595 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 596 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 597 598 /* 599 * If the "remote" value is in the cache, remove it. 600 */ 601 bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); 602 if (bp) { 603 xfs_buf_stale(bp); 604 xfs_buf_relse(bp); 605 bp = NULL; 606 } 607 608 lblkno += map.br_blockcount; 609 blkcnt -= map.br_blockcount; 610 } 611 612 /* 613 * Keep de-allocating extents until the remote-value region is gone. 614 */ 615 lblkno = args->rmtblkno; 616 blkcnt = args->rmtblkcnt; 617 done = 0; 618 while (!done) { 619 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 620 XFS_BMAPI_ATTRFORK, 1, &done); 621 if (error) 622 return error; 623 error = xfs_defer_finish(&args->trans); 624 if (error) 625 return error; 626 627 /* 628 * Close out trans and start the next one in the chain. 629 */ 630 error = xfs_trans_roll_inode(&args->trans, args->dp); 631 if (error) 632 return error; 633 } 634 return 0; 635 } 636