1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_inode.h" 19 #include "xfs_trans.h" 20 #include "xfs_bmap.h" 21 #include "xfs_attr.h" 22 #include "xfs_trace.h" 23 #include "xfs_error.h" 24 25 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 26 27 /* 28 * Each contiguous block has a header, so it is not just a simple attribute 29 * length to FSB conversion. 30 */ 31 int 32 xfs_attr3_rmt_blocks( 33 struct xfs_mount *mp, 34 int attrlen) 35 { 36 if (xfs_sb_version_hascrc(&mp->m_sb)) { 37 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); 38 return (attrlen + buflen - 1) / buflen; 39 } 40 return XFS_B_TO_FSB(mp, attrlen); 41 } 42 43 /* 44 * Checking of the remote attribute header is split into two parts. The verifier 45 * does CRC, location and bounds checking, the unpacking function checks the 46 * attribute parameters and owner. 47 */ 48 static xfs_failaddr_t 49 xfs_attr3_rmt_hdr_ok( 50 void *ptr, 51 xfs_ino_t ino, 52 uint32_t offset, 53 uint32_t size, 54 xfs_daddr_t bno) 55 { 56 struct xfs_attr3_rmt_hdr *rmt = ptr; 57 58 if (bno != be64_to_cpu(rmt->rm_blkno)) 59 return __this_address; 60 if (offset != be32_to_cpu(rmt->rm_offset)) 61 return __this_address; 62 if (size != be32_to_cpu(rmt->rm_bytes)) 63 return __this_address; 64 if (ino != be64_to_cpu(rmt->rm_owner)) 65 return __this_address; 66 67 /* ok */ 68 return NULL; 69 } 70 71 static xfs_failaddr_t 72 xfs_attr3_rmt_verify( 73 struct xfs_mount *mp, 74 struct xfs_buf *bp, 75 void *ptr, 76 int fsbsize, 77 xfs_daddr_t bno) 78 { 79 struct xfs_attr3_rmt_hdr *rmt = ptr; 80 81 if (!xfs_sb_version_hascrc(&mp->m_sb)) 82 return __this_address; 83 if (!xfs_verify_magic(bp, rmt->rm_magic)) 84 return __this_address; 85 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 86 return __this_address; 87 if (be64_to_cpu(rmt->rm_blkno) != bno) 88 return __this_address; 89 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 90 return __this_address; 91 if (be32_to_cpu(rmt->rm_offset) + 92 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 93 return __this_address; 94 if (rmt->rm_owner == 0) 95 return __this_address; 96 97 return NULL; 98 } 99 100 static int 101 __xfs_attr3_rmt_read_verify( 102 struct xfs_buf *bp, 103 bool check_crc, 104 xfs_failaddr_t *failaddr) 105 { 106 struct xfs_mount *mp = bp->b_mount; 107 char *ptr; 108 int len; 109 xfs_daddr_t bno; 110 int blksize = mp->m_attr_geo->blksize; 111 112 /* no verification of non-crc buffers */ 113 if (!xfs_sb_version_hascrc(&mp->m_sb)) 114 return 0; 115 116 ptr = bp->b_addr; 117 bno = bp->b_bn; 118 len = BBTOB(bp->b_length); 119 ASSERT(len >= blksize); 120 121 while (len > 0) { 122 if (check_crc && 123 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 124 *failaddr = __this_address; 125 return -EFSBADCRC; 126 } 127 *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 128 if (*failaddr) 129 return -EFSCORRUPTED; 130 len -= blksize; 131 ptr += blksize; 132 bno += BTOBB(blksize); 133 } 134 135 if (len != 0) { 136 *failaddr = __this_address; 137 return -EFSCORRUPTED; 138 } 139 140 return 0; 141 } 142 143 static void 144 xfs_attr3_rmt_read_verify( 145 struct xfs_buf *bp) 146 { 147 xfs_failaddr_t fa; 148 int error; 149 150 error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 151 if (error) 152 xfs_verifier_error(bp, error, fa); 153 } 154 155 static xfs_failaddr_t 156 xfs_attr3_rmt_verify_struct( 157 struct xfs_buf *bp) 158 { 159 xfs_failaddr_t fa; 160 int error; 161 162 error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 163 return error ? fa : NULL; 164 } 165 166 static void 167 xfs_attr3_rmt_write_verify( 168 struct xfs_buf *bp) 169 { 170 struct xfs_mount *mp = bp->b_mount; 171 xfs_failaddr_t fa; 172 int blksize = mp->m_attr_geo->blksize; 173 char *ptr; 174 int len; 175 xfs_daddr_t bno; 176 177 /* no verification of non-crc buffers */ 178 if (!xfs_sb_version_hascrc(&mp->m_sb)) 179 return; 180 181 ptr = bp->b_addr; 182 bno = bp->b_bn; 183 len = BBTOB(bp->b_length); 184 ASSERT(len >= blksize); 185 186 while (len > 0) { 187 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 188 189 fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 190 if (fa) { 191 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 192 return; 193 } 194 195 /* 196 * Ensure we aren't writing bogus LSNs to disk. See 197 * xfs_attr3_rmt_hdr_set() for the explanation. 198 */ 199 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 200 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 201 return; 202 } 203 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 204 205 len -= blksize; 206 ptr += blksize; 207 bno += BTOBB(blksize); 208 } 209 210 if (len != 0) 211 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 212 } 213 214 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 215 .name = "xfs_attr3_rmt", 216 .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) }, 217 .verify_read = xfs_attr3_rmt_read_verify, 218 .verify_write = xfs_attr3_rmt_write_verify, 219 .verify_struct = xfs_attr3_rmt_verify_struct, 220 }; 221 222 STATIC int 223 xfs_attr3_rmt_hdr_set( 224 struct xfs_mount *mp, 225 void *ptr, 226 xfs_ino_t ino, 227 uint32_t offset, 228 uint32_t size, 229 xfs_daddr_t bno) 230 { 231 struct xfs_attr3_rmt_hdr *rmt = ptr; 232 233 if (!xfs_sb_version_hascrc(&mp->m_sb)) 234 return 0; 235 236 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); 237 rmt->rm_offset = cpu_to_be32(offset); 238 rmt->rm_bytes = cpu_to_be32(size); 239 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); 240 rmt->rm_owner = cpu_to_be64(ino); 241 rmt->rm_blkno = cpu_to_be64(bno); 242 243 /* 244 * Remote attribute blocks are written synchronously, so we don't 245 * have an LSN that we can stamp in them that makes any sense to log 246 * recovery. To ensure that log recovery handles overwrites of these 247 * blocks sanely (i.e. once they've been freed and reallocated as some 248 * other type of metadata) we need to ensure that the LSN has a value 249 * that tells log recovery to ignore the LSN and overwrite the buffer 250 * with whatever is in it's log. To do this, we use the magic 251 * NULLCOMMITLSN to indicate that the LSN is invalid. 252 */ 253 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); 254 255 return sizeof(struct xfs_attr3_rmt_hdr); 256 } 257 258 /* 259 * Helper functions to copy attribute data in and out of the one disk extents 260 */ 261 STATIC int 262 xfs_attr_rmtval_copyout( 263 struct xfs_mount *mp, 264 struct xfs_buf *bp, 265 xfs_ino_t ino, 266 int *offset, 267 int *valuelen, 268 uint8_t **dst) 269 { 270 char *src = bp->b_addr; 271 xfs_daddr_t bno = bp->b_bn; 272 int len = BBTOB(bp->b_length); 273 int blksize = mp->m_attr_geo->blksize; 274 275 ASSERT(len >= blksize); 276 277 while (len > 0 && *valuelen > 0) { 278 int hdr_size = 0; 279 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 280 281 byte_cnt = min(*valuelen, byte_cnt); 282 283 if (xfs_sb_version_hascrc(&mp->m_sb)) { 284 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 285 byte_cnt, bno)) { 286 xfs_alert(mp, 287 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 288 bno, *offset, byte_cnt, ino); 289 return -EFSCORRUPTED; 290 } 291 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 292 } 293 294 memcpy(*dst, src + hdr_size, byte_cnt); 295 296 /* roll buffer forwards */ 297 len -= blksize; 298 src += blksize; 299 bno += BTOBB(blksize); 300 301 /* roll attribute data forwards */ 302 *valuelen -= byte_cnt; 303 *dst += byte_cnt; 304 *offset += byte_cnt; 305 } 306 return 0; 307 } 308 309 STATIC void 310 xfs_attr_rmtval_copyin( 311 struct xfs_mount *mp, 312 struct xfs_buf *bp, 313 xfs_ino_t ino, 314 int *offset, 315 int *valuelen, 316 uint8_t **src) 317 { 318 char *dst = bp->b_addr; 319 xfs_daddr_t bno = bp->b_bn; 320 int len = BBTOB(bp->b_length); 321 int blksize = mp->m_attr_geo->blksize; 322 323 ASSERT(len >= blksize); 324 325 while (len > 0 && *valuelen > 0) { 326 int hdr_size; 327 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 328 329 byte_cnt = min(*valuelen, byte_cnt); 330 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, 331 byte_cnt, bno); 332 333 memcpy(dst + hdr_size, *src, byte_cnt); 334 335 /* 336 * If this is the last block, zero the remainder of it. 337 * Check that we are actually the last block, too. 338 */ 339 if (byte_cnt + hdr_size < blksize) { 340 ASSERT(*valuelen - byte_cnt == 0); 341 ASSERT(len == blksize); 342 memset(dst + hdr_size + byte_cnt, 0, 343 blksize - hdr_size - byte_cnt); 344 } 345 346 /* roll buffer forwards */ 347 len -= blksize; 348 dst += blksize; 349 bno += BTOBB(blksize); 350 351 /* roll attribute data forwards */ 352 *valuelen -= byte_cnt; 353 *src += byte_cnt; 354 *offset += byte_cnt; 355 } 356 } 357 358 /* 359 * Read the value associated with an attribute from the out-of-line buffer 360 * that we stored it in. 361 */ 362 int 363 xfs_attr_rmtval_get( 364 struct xfs_da_args *args) 365 { 366 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 367 struct xfs_mount *mp = args->dp->i_mount; 368 struct xfs_buf *bp; 369 xfs_dablk_t lblkno = args->rmtblkno; 370 uint8_t *dst = args->value; 371 int valuelen; 372 int nmap; 373 int error; 374 int blkcnt = args->rmtblkcnt; 375 int i; 376 int offset = 0; 377 378 trace_xfs_attr_rmtval_get(args); 379 380 ASSERT(!(args->flags & ATTR_KERNOVAL)); 381 ASSERT(args->rmtvaluelen == args->valuelen); 382 383 valuelen = args->rmtvaluelen; 384 while (valuelen > 0) { 385 nmap = ATTR_RMTVALUE_MAPSIZE; 386 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 387 blkcnt, map, &nmap, 388 XFS_BMAPI_ATTRFORK); 389 if (error) 390 return error; 391 ASSERT(nmap >= 1); 392 393 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 394 xfs_daddr_t dblkno; 395 int dblkcnt; 396 397 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 398 (map[i].br_startblock != HOLESTARTBLOCK)); 399 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 400 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 401 error = xfs_trans_read_buf(mp, args->trans, 402 mp->m_ddev_targp, 403 dblkno, dblkcnt, 0, &bp, 404 &xfs_attr3_rmt_buf_ops); 405 if (error) 406 return error; 407 408 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, 409 &offset, &valuelen, 410 &dst); 411 xfs_trans_brelse(args->trans, bp); 412 if (error) 413 return error; 414 415 /* roll attribute extent map forwards */ 416 lblkno += map[i].br_blockcount; 417 blkcnt -= map[i].br_blockcount; 418 } 419 } 420 ASSERT(valuelen == 0); 421 return 0; 422 } 423 424 /* 425 * Write the value associated with an attribute into the out-of-line buffer 426 * that we have defined for it. 427 */ 428 int 429 xfs_attr_rmtval_set( 430 struct xfs_da_args *args) 431 { 432 struct xfs_inode *dp = args->dp; 433 struct xfs_mount *mp = dp->i_mount; 434 struct xfs_bmbt_irec map; 435 xfs_dablk_t lblkno; 436 xfs_fileoff_t lfileoff = 0; 437 uint8_t *src = args->value; 438 int blkcnt; 439 int valuelen; 440 int nmap; 441 int error; 442 int offset = 0; 443 444 trace_xfs_attr_rmtval_set(args); 445 446 /* 447 * Find a "hole" in the attribute address space large enough for 448 * us to drop the new attribute's value into. Because CRC enable 449 * attributes have headers, we can't just do a straight byte to FSB 450 * conversion and have to take the header space into account. 451 */ 452 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); 453 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 454 XFS_ATTR_FORK); 455 if (error) 456 return error; 457 458 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 459 args->rmtblkcnt = blkcnt; 460 461 /* 462 * Roll through the "value", allocating blocks on disk as required. 463 */ 464 while (blkcnt > 0) { 465 /* 466 * Allocate a single extent, up to the size of the value. 467 * 468 * Note that we have to consider this a data allocation as we 469 * write the remote attribute without logging the contents. 470 * Hence we must ensure that we aren't using blocks that are on 471 * the busy list so that we don't overwrite blocks which have 472 * recently been freed but their transactions are not yet 473 * committed to disk. If we overwrite the contents of a busy 474 * extent and then crash then the block may not contain the 475 * correct metadata after log recovery occurs. 476 */ 477 nmap = 1; 478 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 479 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 480 &nmap); 481 if (error) 482 return error; 483 error = xfs_defer_finish(&args->trans); 484 if (error) 485 return error; 486 487 ASSERT(nmap == 1); 488 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 489 (map.br_startblock != HOLESTARTBLOCK)); 490 lblkno += map.br_blockcount; 491 blkcnt -= map.br_blockcount; 492 493 /* 494 * Start the next trans in the chain. 495 */ 496 error = xfs_trans_roll_inode(&args->trans, dp); 497 if (error) 498 return error; 499 } 500 501 /* 502 * Roll through the "value", copying the attribute value to the 503 * already-allocated blocks. Blocks are written synchronously 504 * so that we can know they are all on disk before we turn off 505 * the INCOMPLETE flag. 506 */ 507 lblkno = args->rmtblkno; 508 blkcnt = args->rmtblkcnt; 509 valuelen = args->rmtvaluelen; 510 while (valuelen > 0) { 511 struct xfs_buf *bp; 512 xfs_daddr_t dblkno; 513 int dblkcnt; 514 515 ASSERT(blkcnt > 0); 516 517 nmap = 1; 518 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 519 blkcnt, &map, &nmap, 520 XFS_BMAPI_ATTRFORK); 521 if (error) 522 return error; 523 ASSERT(nmap == 1); 524 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 525 (map.br_startblock != HOLESTARTBLOCK)); 526 527 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 528 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 529 530 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); 531 if (!bp) 532 return -ENOMEM; 533 bp->b_ops = &xfs_attr3_rmt_buf_ops; 534 535 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 536 &valuelen, &src); 537 538 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 539 xfs_buf_relse(bp); 540 if (error) 541 return error; 542 543 544 /* roll attribute extent map forwards */ 545 lblkno += map.br_blockcount; 546 blkcnt -= map.br_blockcount; 547 } 548 ASSERT(valuelen == 0); 549 return 0; 550 } 551 552 /* 553 * Remove the value associated with an attribute by deleting the 554 * out-of-line buffer that it is stored on. 555 */ 556 int 557 xfs_attr_rmtval_remove( 558 struct xfs_da_args *args) 559 { 560 struct xfs_mount *mp = args->dp->i_mount; 561 xfs_dablk_t lblkno; 562 int blkcnt; 563 int error; 564 int done; 565 566 trace_xfs_attr_rmtval_remove(args); 567 568 /* 569 * Roll through the "value", invalidating the attribute value's blocks. 570 */ 571 lblkno = args->rmtblkno; 572 blkcnt = args->rmtblkcnt; 573 while (blkcnt > 0) { 574 struct xfs_bmbt_irec map; 575 struct xfs_buf *bp; 576 xfs_daddr_t dblkno; 577 int dblkcnt; 578 int nmap; 579 580 /* 581 * Try to remember where we decided to put the value. 582 */ 583 nmap = 1; 584 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 585 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 586 if (error) 587 return error; 588 ASSERT(nmap == 1); 589 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 590 (map.br_startblock != HOLESTARTBLOCK)); 591 592 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 593 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 594 595 /* 596 * If the "remote" value is in the cache, remove it. 597 */ 598 bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); 599 if (bp) { 600 xfs_buf_stale(bp); 601 xfs_buf_relse(bp); 602 bp = NULL; 603 } 604 605 lblkno += map.br_blockcount; 606 blkcnt -= map.br_blockcount; 607 } 608 609 /* 610 * Keep de-allocating extents until the remote-value region is gone. 611 */ 612 lblkno = args->rmtblkno; 613 blkcnt = args->rmtblkcnt; 614 done = 0; 615 while (!done) { 616 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 617 XFS_BMAPI_ATTRFORK, 1, &done); 618 if (error) 619 return error; 620 error = xfs_defer_finish(&args->trans); 621 if (error) 622 return error; 623 624 /* 625 * Close out trans and start the next one in the chain. 626 */ 627 error = xfs_trans_roll_inode(&args->trans, args->dp); 628 if (error) 629 return error; 630 } 631 return 0; 632 } 633