1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include <linux/backing-dev.h> 8 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_trace.h" 16 #include "xfs_log.h" 17 #include "xfs_errortag.h" 18 #include "xfs_error.h" 19 20 static kmem_zone_t *xfs_buf_zone; 21 22 #define xb_to_gfp(flags) \ 23 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) 24 25 /* 26 * Locking orders 27 * 28 * xfs_buf_ioacct_inc: 29 * xfs_buf_ioacct_dec: 30 * b_sema (caller holds) 31 * b_lock 32 * 33 * xfs_buf_stale: 34 * b_sema (caller holds) 35 * b_lock 36 * lru_lock 37 * 38 * xfs_buf_rele: 39 * b_lock 40 * pag_buf_lock 41 * lru_lock 42 * 43 * xfs_buftarg_wait_rele 44 * lru_lock 45 * b_lock (trylock due to inversion) 46 * 47 * xfs_buftarg_isolate 48 * lru_lock 49 * b_lock (trylock due to inversion) 50 */ 51 52 static inline int 53 xfs_buf_is_vmapped( 54 struct xfs_buf *bp) 55 { 56 /* 57 * Return true if the buffer is vmapped. 58 * 59 * b_addr is null if the buffer is not mapped, but the code is clever 60 * enough to know it doesn't have to map a single page, so the check has 61 * to be both for b_addr and bp->b_page_count > 1. 62 */ 63 return bp->b_addr && bp->b_page_count > 1; 64 } 65 66 static inline int 67 xfs_buf_vmap_len( 68 struct xfs_buf *bp) 69 { 70 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; 71 } 72 73 /* 74 * Bump the I/O in flight count on the buftarg if we haven't yet done so for 75 * this buffer. The count is incremented once per buffer (per hold cycle) 76 * because the corresponding decrement is deferred to buffer release. Buffers 77 * can undergo I/O multiple times in a hold-release cycle and per buffer I/O 78 * tracking adds unnecessary overhead. This is used for sychronization purposes 79 * with unmount (see xfs_wait_buftarg()), so all we really need is a count of 80 * in-flight buffers. 81 * 82 * Buffers that are never released (e.g., superblock, iclog buffers) must set 83 * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count 84 * never reaches zero and unmount hangs indefinitely. 85 */ 86 static inline void 87 xfs_buf_ioacct_inc( 88 struct xfs_buf *bp) 89 { 90 if (bp->b_flags & XBF_NO_IOACCT) 91 return; 92 93 ASSERT(bp->b_flags & XBF_ASYNC); 94 spin_lock(&bp->b_lock); 95 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { 96 bp->b_state |= XFS_BSTATE_IN_FLIGHT; 97 percpu_counter_inc(&bp->b_target->bt_io_count); 98 } 99 spin_unlock(&bp->b_lock); 100 } 101 102 /* 103 * Clear the in-flight state on a buffer about to be released to the LRU or 104 * freed and unaccount from the buftarg. 105 */ 106 static inline void 107 __xfs_buf_ioacct_dec( 108 struct xfs_buf *bp) 109 { 110 lockdep_assert_held(&bp->b_lock); 111 112 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { 113 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; 114 percpu_counter_dec(&bp->b_target->bt_io_count); 115 } 116 } 117 118 static inline void 119 xfs_buf_ioacct_dec( 120 struct xfs_buf *bp) 121 { 122 spin_lock(&bp->b_lock); 123 __xfs_buf_ioacct_dec(bp); 124 spin_unlock(&bp->b_lock); 125 } 126 127 /* 128 * When we mark a buffer stale, we remove the buffer from the LRU and clear the 129 * b_lru_ref count so that the buffer is freed immediately when the buffer 130 * reference count falls to zero. If the buffer is already on the LRU, we need 131 * to remove the reference that LRU holds on the buffer. 132 * 133 * This prevents build-up of stale buffers on the LRU. 134 */ 135 void 136 xfs_buf_stale( 137 struct xfs_buf *bp) 138 { 139 ASSERT(xfs_buf_islocked(bp)); 140 141 bp->b_flags |= XBF_STALE; 142 143 /* 144 * Clear the delwri status so that a delwri queue walker will not 145 * flush this buffer to disk now that it is stale. The delwri queue has 146 * a reference to the buffer, so this is safe to do. 147 */ 148 bp->b_flags &= ~_XBF_DELWRI_Q; 149 150 /* 151 * Once the buffer is marked stale and unlocked, a subsequent lookup 152 * could reset b_flags. There is no guarantee that the buffer is 153 * unaccounted (released to LRU) before that occurs. Drop in-flight 154 * status now to preserve accounting consistency. 155 */ 156 spin_lock(&bp->b_lock); 157 __xfs_buf_ioacct_dec(bp); 158 159 atomic_set(&bp->b_lru_ref, 0); 160 if (!(bp->b_state & XFS_BSTATE_DISPOSE) && 161 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) 162 atomic_dec(&bp->b_hold); 163 164 ASSERT(atomic_read(&bp->b_hold) >= 1); 165 spin_unlock(&bp->b_lock); 166 } 167 168 static int 169 xfs_buf_get_maps( 170 struct xfs_buf *bp, 171 int map_count) 172 { 173 ASSERT(bp->b_maps == NULL); 174 bp->b_map_count = map_count; 175 176 if (map_count == 1) { 177 bp->b_maps = &bp->__b_map; 178 return 0; 179 } 180 181 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), 182 KM_NOFS); 183 if (!bp->b_maps) 184 return -ENOMEM; 185 return 0; 186 } 187 188 /* 189 * Frees b_pages if it was allocated. 190 */ 191 static void 192 xfs_buf_free_maps( 193 struct xfs_buf *bp) 194 { 195 if (bp->b_maps != &bp->__b_map) { 196 kmem_free(bp->b_maps); 197 bp->b_maps = NULL; 198 } 199 } 200 201 static int 202 _xfs_buf_alloc( 203 struct xfs_buftarg *target, 204 struct xfs_buf_map *map, 205 int nmaps, 206 xfs_buf_flags_t flags, 207 struct xfs_buf **bpp) 208 { 209 struct xfs_buf *bp; 210 int error; 211 int i; 212 213 *bpp = NULL; 214 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); 215 if (unlikely(!bp)) 216 return -ENOMEM; 217 218 /* 219 * We don't want certain flags to appear in b_flags unless they are 220 * specifically set by later operations on the buffer. 221 */ 222 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); 223 224 atomic_set(&bp->b_hold, 1); 225 atomic_set(&bp->b_lru_ref, 1); 226 init_completion(&bp->b_iowait); 227 INIT_LIST_HEAD(&bp->b_lru); 228 INIT_LIST_HEAD(&bp->b_list); 229 INIT_LIST_HEAD(&bp->b_li_list); 230 sema_init(&bp->b_sema, 0); /* held, no waiters */ 231 spin_lock_init(&bp->b_lock); 232 bp->b_target = target; 233 bp->b_mount = target->bt_mount; 234 bp->b_flags = flags; 235 236 /* 237 * Set length and io_length to the same value initially. 238 * I/O routines should use io_length, which will be the same in 239 * most cases but may be reset (e.g. XFS recovery). 240 */ 241 error = xfs_buf_get_maps(bp, nmaps); 242 if (error) { 243 kmem_cache_free(xfs_buf_zone, bp); 244 return error; 245 } 246 247 bp->b_bn = map[0].bm_bn; 248 bp->b_length = 0; 249 for (i = 0; i < nmaps; i++) { 250 bp->b_maps[i].bm_bn = map[i].bm_bn; 251 bp->b_maps[i].bm_len = map[i].bm_len; 252 bp->b_length += map[i].bm_len; 253 } 254 255 atomic_set(&bp->b_pin_count, 0); 256 init_waitqueue_head(&bp->b_waiters); 257 258 XFS_STATS_INC(bp->b_mount, xb_create); 259 trace_xfs_buf_init(bp, _RET_IP_); 260 261 *bpp = bp; 262 return 0; 263 } 264 265 /* 266 * Allocate a page array capable of holding a specified number 267 * of pages, and point the page buf at it. 268 */ 269 STATIC int 270 _xfs_buf_get_pages( 271 xfs_buf_t *bp, 272 int page_count) 273 { 274 /* Make sure that we have a page list */ 275 if (bp->b_pages == NULL) { 276 bp->b_page_count = page_count; 277 if (page_count <= XB_PAGES) { 278 bp->b_pages = bp->b_page_array; 279 } else { 280 bp->b_pages = kmem_alloc(sizeof(struct page *) * 281 page_count, KM_NOFS); 282 if (bp->b_pages == NULL) 283 return -ENOMEM; 284 } 285 memset(bp->b_pages, 0, sizeof(struct page *) * page_count); 286 } 287 return 0; 288 } 289 290 /* 291 * Frees b_pages if it was allocated. 292 */ 293 STATIC void 294 _xfs_buf_free_pages( 295 xfs_buf_t *bp) 296 { 297 if (bp->b_pages != bp->b_page_array) { 298 kmem_free(bp->b_pages); 299 bp->b_pages = NULL; 300 } 301 } 302 303 /* 304 * Releases the specified buffer. 305 * 306 * The modification state of any associated pages is left unchanged. 307 * The buffer must not be on any hash - use xfs_buf_rele instead for 308 * hashed and refcounted buffers 309 */ 310 static void 311 xfs_buf_free( 312 xfs_buf_t *bp) 313 { 314 trace_xfs_buf_free(bp, _RET_IP_); 315 316 ASSERT(list_empty(&bp->b_lru)); 317 318 if (bp->b_flags & _XBF_PAGES) { 319 uint i; 320 321 if (xfs_buf_is_vmapped(bp)) 322 vm_unmap_ram(bp->b_addr - bp->b_offset, 323 bp->b_page_count); 324 325 for (i = 0; i < bp->b_page_count; i++) { 326 struct page *page = bp->b_pages[i]; 327 328 __free_page(page); 329 } 330 } else if (bp->b_flags & _XBF_KMEM) 331 kmem_free(bp->b_addr); 332 _xfs_buf_free_pages(bp); 333 xfs_buf_free_maps(bp); 334 kmem_cache_free(xfs_buf_zone, bp); 335 } 336 337 /* 338 * Allocates all the pages for buffer in question and builds it's page list. 339 */ 340 STATIC int 341 xfs_buf_allocate_memory( 342 xfs_buf_t *bp, 343 uint flags) 344 { 345 size_t size; 346 size_t nbytes, offset; 347 gfp_t gfp_mask = xb_to_gfp(flags); 348 unsigned short page_count, i; 349 xfs_off_t start, end; 350 int error; 351 xfs_km_flags_t kmflag_mask = 0; 352 353 /* 354 * assure zeroed buffer for non-read cases. 355 */ 356 if (!(flags & XBF_READ)) { 357 kmflag_mask |= KM_ZERO; 358 gfp_mask |= __GFP_ZERO; 359 } 360 361 /* 362 * for buffers that are contained within a single page, just allocate 363 * the memory from the heap - there's no need for the complexity of 364 * page arrays to keep allocation down to order 0. 365 */ 366 size = BBTOB(bp->b_length); 367 if (size < PAGE_SIZE) { 368 int align_mask = xfs_buftarg_dma_alignment(bp->b_target); 369 bp->b_addr = kmem_alloc_io(size, align_mask, 370 KM_NOFS | kmflag_mask); 371 if (!bp->b_addr) { 372 /* low memory - use alloc_page loop instead */ 373 goto use_alloc_page; 374 } 375 376 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) != 377 ((unsigned long)bp->b_addr & PAGE_MASK)) { 378 /* b_addr spans two pages - use alloc_page instead */ 379 kmem_free(bp->b_addr); 380 bp->b_addr = NULL; 381 goto use_alloc_page; 382 } 383 bp->b_offset = offset_in_page(bp->b_addr); 384 bp->b_pages = bp->b_page_array; 385 bp->b_pages[0] = kmem_to_page(bp->b_addr); 386 bp->b_page_count = 1; 387 bp->b_flags |= _XBF_KMEM; 388 return 0; 389 } 390 391 use_alloc_page: 392 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT; 393 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) 394 >> PAGE_SHIFT; 395 page_count = end - start; 396 error = _xfs_buf_get_pages(bp, page_count); 397 if (unlikely(error)) 398 return error; 399 400 offset = bp->b_offset; 401 bp->b_flags |= _XBF_PAGES; 402 403 for (i = 0; i < bp->b_page_count; i++) { 404 struct page *page; 405 uint retries = 0; 406 retry: 407 page = alloc_page(gfp_mask); 408 if (unlikely(page == NULL)) { 409 if (flags & XBF_READ_AHEAD) { 410 bp->b_page_count = i; 411 error = -ENOMEM; 412 goto out_free_pages; 413 } 414 415 /* 416 * This could deadlock. 417 * 418 * But until all the XFS lowlevel code is revamped to 419 * handle buffer allocation failures we can't do much. 420 */ 421 if (!(++retries % 100)) 422 xfs_err(NULL, 423 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", 424 current->comm, current->pid, 425 __func__, gfp_mask); 426 427 XFS_STATS_INC(bp->b_mount, xb_page_retries); 428 congestion_wait(BLK_RW_ASYNC, HZ/50); 429 goto retry; 430 } 431 432 XFS_STATS_INC(bp->b_mount, xb_page_found); 433 434 nbytes = min_t(size_t, size, PAGE_SIZE - offset); 435 size -= nbytes; 436 bp->b_pages[i] = page; 437 offset = 0; 438 } 439 return 0; 440 441 out_free_pages: 442 for (i = 0; i < bp->b_page_count; i++) 443 __free_page(bp->b_pages[i]); 444 bp->b_flags &= ~_XBF_PAGES; 445 return error; 446 } 447 448 /* 449 * Map buffer into kernel address-space if necessary. 450 */ 451 STATIC int 452 _xfs_buf_map_pages( 453 xfs_buf_t *bp, 454 uint flags) 455 { 456 ASSERT(bp->b_flags & _XBF_PAGES); 457 if (bp->b_page_count == 1) { 458 /* A single page buffer is always mappable */ 459 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 460 } else if (flags & XBF_UNMAPPED) { 461 bp->b_addr = NULL; 462 } else { 463 int retried = 0; 464 unsigned nofs_flag; 465 466 /* 467 * vm_map_ram() will allocate auxiliary structures (e.g. 468 * pagetables) with GFP_KERNEL, yet we are likely to be under 469 * GFP_NOFS context here. Hence we need to tell memory reclaim 470 * that we are in such a context via PF_MEMALLOC_NOFS to prevent 471 * memory reclaim re-entering the filesystem here and 472 * potentially deadlocking. 473 */ 474 nofs_flag = memalloc_nofs_save(); 475 do { 476 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 477 -1, PAGE_KERNEL); 478 if (bp->b_addr) 479 break; 480 vm_unmap_aliases(); 481 } while (retried++ <= 1); 482 memalloc_nofs_restore(nofs_flag); 483 484 if (!bp->b_addr) 485 return -ENOMEM; 486 bp->b_addr += bp->b_offset; 487 } 488 489 return 0; 490 } 491 492 /* 493 * Finding and Reading Buffers 494 */ 495 static int 496 _xfs_buf_obj_cmp( 497 struct rhashtable_compare_arg *arg, 498 const void *obj) 499 { 500 const struct xfs_buf_map *map = arg->key; 501 const struct xfs_buf *bp = obj; 502 503 /* 504 * The key hashing in the lookup path depends on the key being the 505 * first element of the compare_arg, make sure to assert this. 506 */ 507 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0); 508 509 if (bp->b_bn != map->bm_bn) 510 return 1; 511 512 if (unlikely(bp->b_length != map->bm_len)) { 513 /* 514 * found a block number match. If the range doesn't 515 * match, the only way this is allowed is if the buffer 516 * in the cache is stale and the transaction that made 517 * it stale has not yet committed. i.e. we are 518 * reallocating a busy extent. Skip this buffer and 519 * continue searching for an exact match. 520 */ 521 ASSERT(bp->b_flags & XBF_STALE); 522 return 1; 523 } 524 return 0; 525 } 526 527 static const struct rhashtable_params xfs_buf_hash_params = { 528 .min_size = 32, /* empty AGs have minimal footprint */ 529 .nelem_hint = 16, 530 .key_len = sizeof(xfs_daddr_t), 531 .key_offset = offsetof(struct xfs_buf, b_bn), 532 .head_offset = offsetof(struct xfs_buf, b_rhash_head), 533 .automatic_shrinking = true, 534 .obj_cmpfn = _xfs_buf_obj_cmp, 535 }; 536 537 int 538 xfs_buf_hash_init( 539 struct xfs_perag *pag) 540 { 541 spin_lock_init(&pag->pag_buf_lock); 542 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params); 543 } 544 545 void 546 xfs_buf_hash_destroy( 547 struct xfs_perag *pag) 548 { 549 rhashtable_destroy(&pag->pag_buf_hash); 550 } 551 552 /* 553 * Look up a buffer in the buffer cache and return it referenced and locked 554 * in @found_bp. 555 * 556 * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the 557 * cache. 558 * 559 * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return 560 * -EAGAIN if we fail to lock it. 561 * 562 * Return values are: 563 * -EFSCORRUPTED if have been supplied with an invalid address 564 * -EAGAIN on trylock failure 565 * -ENOENT if we fail to find a match and @new_bp was NULL 566 * 0, with @found_bp: 567 * - @new_bp if we inserted it into the cache 568 * - the buffer we found and locked. 569 */ 570 static int 571 xfs_buf_find( 572 struct xfs_buftarg *btp, 573 struct xfs_buf_map *map, 574 int nmaps, 575 xfs_buf_flags_t flags, 576 struct xfs_buf *new_bp, 577 struct xfs_buf **found_bp) 578 { 579 struct xfs_perag *pag; 580 xfs_buf_t *bp; 581 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; 582 xfs_daddr_t eofs; 583 int i; 584 585 *found_bp = NULL; 586 587 for (i = 0; i < nmaps; i++) 588 cmap.bm_len += map[i].bm_len; 589 590 /* Check for IOs smaller than the sector size / not sector aligned */ 591 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize)); 592 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); 593 594 /* 595 * Corrupted block numbers can get through to here, unfortunately, so we 596 * have to check that the buffer falls within the filesystem bounds. 597 */ 598 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); 599 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) { 600 xfs_alert(btp->bt_mount, 601 "%s: daddr 0x%llx out of range, EOFS 0x%llx", 602 __func__, cmap.bm_bn, eofs); 603 WARN_ON(1); 604 return -EFSCORRUPTED; 605 } 606 607 pag = xfs_perag_get(btp->bt_mount, 608 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); 609 610 spin_lock(&pag->pag_buf_lock); 611 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap, 612 xfs_buf_hash_params); 613 if (bp) { 614 atomic_inc(&bp->b_hold); 615 goto found; 616 } 617 618 /* No match found */ 619 if (!new_bp) { 620 XFS_STATS_INC(btp->bt_mount, xb_miss_locked); 621 spin_unlock(&pag->pag_buf_lock); 622 xfs_perag_put(pag); 623 return -ENOENT; 624 } 625 626 /* the buffer keeps the perag reference until it is freed */ 627 new_bp->b_pag = pag; 628 rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, 629 xfs_buf_hash_params); 630 spin_unlock(&pag->pag_buf_lock); 631 *found_bp = new_bp; 632 return 0; 633 634 found: 635 spin_unlock(&pag->pag_buf_lock); 636 xfs_perag_put(pag); 637 638 if (!xfs_buf_trylock(bp)) { 639 if (flags & XBF_TRYLOCK) { 640 xfs_buf_rele(bp); 641 XFS_STATS_INC(btp->bt_mount, xb_busy_locked); 642 return -EAGAIN; 643 } 644 xfs_buf_lock(bp); 645 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); 646 } 647 648 /* 649 * if the buffer is stale, clear all the external state associated with 650 * it. We need to keep flags such as how we allocated the buffer memory 651 * intact here. 652 */ 653 if (bp->b_flags & XBF_STALE) { 654 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 655 ASSERT(bp->b_iodone == NULL); 656 bp->b_flags &= _XBF_KMEM | _XBF_PAGES; 657 bp->b_ops = NULL; 658 } 659 660 trace_xfs_buf_find(bp, flags, _RET_IP_); 661 XFS_STATS_INC(btp->bt_mount, xb_get_locked); 662 *found_bp = bp; 663 return 0; 664 } 665 666 struct xfs_buf * 667 xfs_buf_incore( 668 struct xfs_buftarg *target, 669 xfs_daddr_t blkno, 670 size_t numblks, 671 xfs_buf_flags_t flags) 672 { 673 struct xfs_buf *bp; 674 int error; 675 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 676 677 error = xfs_buf_find(target, &map, 1, flags, NULL, &bp); 678 if (error) 679 return NULL; 680 return bp; 681 } 682 683 /* 684 * Assembles a buffer covering the specified range. The code is optimised for 685 * cache hits, as metadata intensive workloads will see 3 orders of magnitude 686 * more hits than misses. 687 */ 688 int 689 xfs_buf_get_map( 690 struct xfs_buftarg *target, 691 struct xfs_buf_map *map, 692 int nmaps, 693 xfs_buf_flags_t flags, 694 struct xfs_buf **bpp) 695 { 696 struct xfs_buf *bp; 697 struct xfs_buf *new_bp; 698 int error = 0; 699 700 *bpp = NULL; 701 error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); 702 if (!error) 703 goto found; 704 if (error != -ENOENT) 705 return error; 706 707 error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); 708 if (error) 709 return error; 710 711 error = xfs_buf_allocate_memory(new_bp, flags); 712 if (error) { 713 xfs_buf_free(new_bp); 714 return error; 715 } 716 717 error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); 718 if (error) { 719 xfs_buf_free(new_bp); 720 return error; 721 } 722 723 if (bp != new_bp) 724 xfs_buf_free(new_bp); 725 726 found: 727 if (!bp->b_addr) { 728 error = _xfs_buf_map_pages(bp, flags); 729 if (unlikely(error)) { 730 xfs_warn(target->bt_mount, 731 "%s: failed to map pagesn", __func__); 732 xfs_buf_relse(bp); 733 return error; 734 } 735 } 736 737 /* 738 * Clear b_error if this is a lookup from a caller that doesn't expect 739 * valid data to be found in the buffer. 740 */ 741 if (!(flags & XBF_READ)) 742 xfs_buf_ioerror(bp, 0); 743 744 XFS_STATS_INC(target->bt_mount, xb_get); 745 trace_xfs_buf_get(bp, flags, _RET_IP_); 746 *bpp = bp; 747 return 0; 748 } 749 750 STATIC int 751 _xfs_buf_read( 752 xfs_buf_t *bp, 753 xfs_buf_flags_t flags) 754 { 755 ASSERT(!(flags & XBF_WRITE)); 756 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); 757 758 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); 759 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); 760 761 return xfs_buf_submit(bp); 762 } 763 764 /* 765 * Reverify a buffer found in cache without an attached ->b_ops. 766 * 767 * If the caller passed an ops structure and the buffer doesn't have ops 768 * assigned, set the ops and use it to verify the contents. If verification 769 * fails, clear XBF_DONE. We assume the buffer has no recorded errors and is 770 * already in XBF_DONE state on entry. 771 * 772 * Under normal operations, every in-core buffer is verified on read I/O 773 * completion. There are two scenarios that can lead to in-core buffers without 774 * an assigned ->b_ops. The first is during log recovery of buffers on a V4 775 * filesystem, though these buffers are purged at the end of recovery. The 776 * other is online repair, which intentionally reads with a NULL buffer ops to 777 * run several verifiers across an in-core buffer in order to establish buffer 778 * type. If repair can't establish that, the buffer will be left in memory 779 * with NULL buffer ops. 780 */ 781 int 782 xfs_buf_reverify( 783 struct xfs_buf *bp, 784 const struct xfs_buf_ops *ops) 785 { 786 ASSERT(bp->b_flags & XBF_DONE); 787 ASSERT(bp->b_error == 0); 788 789 if (!ops || bp->b_ops) 790 return 0; 791 792 bp->b_ops = ops; 793 bp->b_ops->verify_read(bp); 794 if (bp->b_error) 795 bp->b_flags &= ~XBF_DONE; 796 return bp->b_error; 797 } 798 799 int 800 xfs_buf_read_map( 801 struct xfs_buftarg *target, 802 struct xfs_buf_map *map, 803 int nmaps, 804 xfs_buf_flags_t flags, 805 struct xfs_buf **bpp, 806 const struct xfs_buf_ops *ops, 807 xfs_failaddr_t fa) 808 { 809 struct xfs_buf *bp; 810 int error; 811 812 flags |= XBF_READ; 813 *bpp = NULL; 814 815 error = xfs_buf_get_map(target, map, nmaps, flags, &bp); 816 if (error) 817 return error; 818 819 trace_xfs_buf_read(bp, flags, _RET_IP_); 820 821 if (!(bp->b_flags & XBF_DONE)) { 822 /* Initiate the buffer read and wait. */ 823 XFS_STATS_INC(target->bt_mount, xb_get_read); 824 bp->b_ops = ops; 825 error = _xfs_buf_read(bp, flags); 826 827 /* Readahead iodone already dropped the buffer, so exit. */ 828 if (flags & XBF_ASYNC) 829 return 0; 830 } else { 831 /* Buffer already read; all we need to do is check it. */ 832 error = xfs_buf_reverify(bp, ops); 833 834 /* Readahead already finished; drop the buffer and exit. */ 835 if (flags & XBF_ASYNC) { 836 xfs_buf_relse(bp); 837 return 0; 838 } 839 840 /* We do not want read in the flags */ 841 bp->b_flags &= ~XBF_READ; 842 ASSERT(bp->b_ops != NULL || ops == NULL); 843 } 844 845 /* 846 * If we've had a read error, then the contents of the buffer are 847 * invalid and should not be used. To ensure that a followup read tries 848 * to pull the buffer from disk again, we clear the XBF_DONE flag and 849 * mark the buffer stale. This ensures that anyone who has a current 850 * reference to the buffer will interpret it's contents correctly and 851 * future cache lookups will also treat it as an empty, uninitialised 852 * buffer. 853 */ 854 if (error) { 855 if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) 856 xfs_buf_ioerror_alert(bp, fa); 857 858 bp->b_flags &= ~XBF_DONE; 859 xfs_buf_stale(bp); 860 xfs_buf_relse(bp); 861 862 /* bad CRC means corrupted metadata */ 863 if (error == -EFSBADCRC) 864 error = -EFSCORRUPTED; 865 return error; 866 } 867 868 *bpp = bp; 869 return 0; 870 } 871 872 /* 873 * If we are not low on memory then do the readahead in a deadlock 874 * safe manner. 875 */ 876 void 877 xfs_buf_readahead_map( 878 struct xfs_buftarg *target, 879 struct xfs_buf_map *map, 880 int nmaps, 881 const struct xfs_buf_ops *ops) 882 { 883 struct xfs_buf *bp; 884 885 if (bdi_read_congested(target->bt_bdev->bd_bdi)) 886 return; 887 888 xfs_buf_read_map(target, map, nmaps, 889 XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, 890 __this_address); 891 } 892 893 /* 894 * Read an uncached buffer from disk. Allocates and returns a locked 895 * buffer containing the disk contents or nothing. 896 */ 897 int 898 xfs_buf_read_uncached( 899 struct xfs_buftarg *target, 900 xfs_daddr_t daddr, 901 size_t numblks, 902 int flags, 903 struct xfs_buf **bpp, 904 const struct xfs_buf_ops *ops) 905 { 906 struct xfs_buf *bp; 907 int error; 908 909 *bpp = NULL; 910 911 error = xfs_buf_get_uncached(target, numblks, flags, &bp); 912 if (error) 913 return error; 914 915 /* set up the buffer for a read IO */ 916 ASSERT(bp->b_map_count == 1); 917 bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ 918 bp->b_maps[0].bm_bn = daddr; 919 bp->b_flags |= XBF_READ; 920 bp->b_ops = ops; 921 922 xfs_buf_submit(bp); 923 if (bp->b_error) { 924 error = bp->b_error; 925 xfs_buf_relse(bp); 926 return error; 927 } 928 929 *bpp = bp; 930 return 0; 931 } 932 933 int 934 xfs_buf_get_uncached( 935 struct xfs_buftarg *target, 936 size_t numblks, 937 int flags, 938 struct xfs_buf **bpp) 939 { 940 unsigned long page_count; 941 int error, i; 942 struct xfs_buf *bp; 943 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); 944 945 *bpp = NULL; 946 947 /* flags might contain irrelevant bits, pass only what we care about */ 948 error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); 949 if (error) 950 goto fail; 951 952 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; 953 error = _xfs_buf_get_pages(bp, page_count); 954 if (error) 955 goto fail_free_buf; 956 957 for (i = 0; i < page_count; i++) { 958 bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); 959 if (!bp->b_pages[i]) { 960 error = -ENOMEM; 961 goto fail_free_mem; 962 } 963 } 964 bp->b_flags |= _XBF_PAGES; 965 966 error = _xfs_buf_map_pages(bp, 0); 967 if (unlikely(error)) { 968 xfs_warn(target->bt_mount, 969 "%s: failed to map pages", __func__); 970 goto fail_free_mem; 971 } 972 973 trace_xfs_buf_get_uncached(bp, _RET_IP_); 974 *bpp = bp; 975 return 0; 976 977 fail_free_mem: 978 while (--i >= 0) 979 __free_page(bp->b_pages[i]); 980 _xfs_buf_free_pages(bp); 981 fail_free_buf: 982 xfs_buf_free_maps(bp); 983 kmem_cache_free(xfs_buf_zone, bp); 984 fail: 985 return error; 986 } 987 988 /* 989 * Increment reference count on buffer, to hold the buffer concurrently 990 * with another thread which may release (free) the buffer asynchronously. 991 * Must hold the buffer already to call this function. 992 */ 993 void 994 xfs_buf_hold( 995 xfs_buf_t *bp) 996 { 997 trace_xfs_buf_hold(bp, _RET_IP_); 998 atomic_inc(&bp->b_hold); 999 } 1000 1001 /* 1002 * Release a hold on the specified buffer. If the hold count is 1, the buffer is 1003 * placed on LRU or freed (depending on b_lru_ref). 1004 */ 1005 void 1006 xfs_buf_rele( 1007 xfs_buf_t *bp) 1008 { 1009 struct xfs_perag *pag = bp->b_pag; 1010 bool release; 1011 bool freebuf = false; 1012 1013 trace_xfs_buf_rele(bp, _RET_IP_); 1014 1015 if (!pag) { 1016 ASSERT(list_empty(&bp->b_lru)); 1017 if (atomic_dec_and_test(&bp->b_hold)) { 1018 xfs_buf_ioacct_dec(bp); 1019 xfs_buf_free(bp); 1020 } 1021 return; 1022 } 1023 1024 ASSERT(atomic_read(&bp->b_hold) > 0); 1025 1026 /* 1027 * We grab the b_lock here first to serialise racing xfs_buf_rele() 1028 * calls. The pag_buf_lock being taken on the last reference only 1029 * serialises against racing lookups in xfs_buf_find(). IOWs, the second 1030 * to last reference we drop here is not serialised against the last 1031 * reference until we take bp->b_lock. Hence if we don't grab b_lock 1032 * first, the last "release" reference can win the race to the lock and 1033 * free the buffer before the second-to-last reference is processed, 1034 * leading to a use-after-free scenario. 1035 */ 1036 spin_lock(&bp->b_lock); 1037 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); 1038 if (!release) { 1039 /* 1040 * Drop the in-flight state if the buffer is already on the LRU 1041 * and it holds the only reference. This is racy because we 1042 * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT 1043 * ensures the decrement occurs only once per-buf. 1044 */ 1045 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) 1046 __xfs_buf_ioacct_dec(bp); 1047 goto out_unlock; 1048 } 1049 1050 /* the last reference has been dropped ... */ 1051 __xfs_buf_ioacct_dec(bp); 1052 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { 1053 /* 1054 * If the buffer is added to the LRU take a new reference to the 1055 * buffer for the LRU and clear the (now stale) dispose list 1056 * state flag 1057 */ 1058 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { 1059 bp->b_state &= ~XFS_BSTATE_DISPOSE; 1060 atomic_inc(&bp->b_hold); 1061 } 1062 spin_unlock(&pag->pag_buf_lock); 1063 } else { 1064 /* 1065 * most of the time buffers will already be removed from the 1066 * LRU, so optimise that case by checking for the 1067 * XFS_BSTATE_DISPOSE flag indicating the last list the buffer 1068 * was on was the disposal list 1069 */ 1070 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { 1071 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); 1072 } else { 1073 ASSERT(list_empty(&bp->b_lru)); 1074 } 1075 1076 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1077 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head, 1078 xfs_buf_hash_params); 1079 spin_unlock(&pag->pag_buf_lock); 1080 xfs_perag_put(pag); 1081 freebuf = true; 1082 } 1083 1084 out_unlock: 1085 spin_unlock(&bp->b_lock); 1086 1087 if (freebuf) 1088 xfs_buf_free(bp); 1089 } 1090 1091 1092 /* 1093 * Lock a buffer object, if it is not already locked. 1094 * 1095 * If we come across a stale, pinned, locked buffer, we know that we are 1096 * being asked to lock a buffer that has been reallocated. Because it is 1097 * pinned, we know that the log has not been pushed to disk and hence it 1098 * will still be locked. Rather than continuing to have trylock attempts 1099 * fail until someone else pushes the log, push it ourselves before 1100 * returning. This means that the xfsaild will not get stuck trying 1101 * to push on stale inode buffers. 1102 */ 1103 int 1104 xfs_buf_trylock( 1105 struct xfs_buf *bp) 1106 { 1107 int locked; 1108 1109 locked = down_trylock(&bp->b_sema) == 0; 1110 if (locked) 1111 trace_xfs_buf_trylock(bp, _RET_IP_); 1112 else 1113 trace_xfs_buf_trylock_fail(bp, _RET_IP_); 1114 return locked; 1115 } 1116 1117 /* 1118 * Lock a buffer object. 1119 * 1120 * If we come across a stale, pinned, locked buffer, we know that we 1121 * are being asked to lock a buffer that has been reallocated. Because 1122 * it is pinned, we know that the log has not been pushed to disk and 1123 * hence it will still be locked. Rather than sleeping until someone 1124 * else pushes the log, push it ourselves before trying to get the lock. 1125 */ 1126 void 1127 xfs_buf_lock( 1128 struct xfs_buf *bp) 1129 { 1130 trace_xfs_buf_lock(bp, _RET_IP_); 1131 1132 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 1133 xfs_log_force(bp->b_mount, 0); 1134 down(&bp->b_sema); 1135 1136 trace_xfs_buf_lock_done(bp, _RET_IP_); 1137 } 1138 1139 void 1140 xfs_buf_unlock( 1141 struct xfs_buf *bp) 1142 { 1143 ASSERT(xfs_buf_islocked(bp)); 1144 1145 up(&bp->b_sema); 1146 trace_xfs_buf_unlock(bp, _RET_IP_); 1147 } 1148 1149 STATIC void 1150 xfs_buf_wait_unpin( 1151 xfs_buf_t *bp) 1152 { 1153 DECLARE_WAITQUEUE (wait, current); 1154 1155 if (atomic_read(&bp->b_pin_count) == 0) 1156 return; 1157 1158 add_wait_queue(&bp->b_waiters, &wait); 1159 for (;;) { 1160 set_current_state(TASK_UNINTERRUPTIBLE); 1161 if (atomic_read(&bp->b_pin_count) == 0) 1162 break; 1163 io_schedule(); 1164 } 1165 remove_wait_queue(&bp->b_waiters, &wait); 1166 set_current_state(TASK_RUNNING); 1167 } 1168 1169 /* 1170 * Buffer Utility Routines 1171 */ 1172 1173 void 1174 xfs_buf_ioend( 1175 struct xfs_buf *bp) 1176 { 1177 bool read = bp->b_flags & XBF_READ; 1178 1179 trace_xfs_buf_iodone(bp, _RET_IP_); 1180 1181 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1182 1183 /* 1184 * Pull in IO completion errors now. We are guaranteed to be running 1185 * single threaded, so we don't need the lock to read b_io_error. 1186 */ 1187 if (!bp->b_error && bp->b_io_error) 1188 xfs_buf_ioerror(bp, bp->b_io_error); 1189 1190 /* Only validate buffers that were read without errors */ 1191 if (read && !bp->b_error && bp->b_ops) { 1192 ASSERT(!bp->b_iodone); 1193 bp->b_ops->verify_read(bp); 1194 } 1195 1196 if (!bp->b_error) 1197 bp->b_flags |= XBF_DONE; 1198 1199 if (bp->b_iodone) 1200 (*(bp->b_iodone))(bp); 1201 else if (bp->b_flags & XBF_ASYNC) 1202 xfs_buf_relse(bp); 1203 else 1204 complete(&bp->b_iowait); 1205 } 1206 1207 static void 1208 xfs_buf_ioend_work( 1209 struct work_struct *work) 1210 { 1211 struct xfs_buf *bp = 1212 container_of(work, xfs_buf_t, b_ioend_work); 1213 1214 xfs_buf_ioend(bp); 1215 } 1216 1217 static void 1218 xfs_buf_ioend_async( 1219 struct xfs_buf *bp) 1220 { 1221 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); 1222 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); 1223 } 1224 1225 void 1226 __xfs_buf_ioerror( 1227 xfs_buf_t *bp, 1228 int error, 1229 xfs_failaddr_t failaddr) 1230 { 1231 ASSERT(error <= 0 && error >= -1000); 1232 bp->b_error = error; 1233 trace_xfs_buf_ioerror(bp, error, failaddr); 1234 } 1235 1236 void 1237 xfs_buf_ioerror_alert( 1238 struct xfs_buf *bp, 1239 xfs_failaddr_t func) 1240 { 1241 xfs_alert(bp->b_mount, 1242 "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", 1243 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1244 -bp->b_error); 1245 } 1246 1247 int 1248 xfs_bwrite( 1249 struct xfs_buf *bp) 1250 { 1251 int error; 1252 1253 ASSERT(xfs_buf_islocked(bp)); 1254 1255 bp->b_flags |= XBF_WRITE; 1256 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | 1257 XBF_WRITE_FAIL | XBF_DONE); 1258 1259 error = xfs_buf_submit(bp); 1260 if (error) 1261 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 1262 return error; 1263 } 1264 1265 static void 1266 xfs_buf_bio_end_io( 1267 struct bio *bio) 1268 { 1269 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private; 1270 1271 /* 1272 * don't overwrite existing errors - otherwise we can lose errors on 1273 * buffers that require multiple bios to complete. 1274 */ 1275 if (bio->bi_status) { 1276 int error = blk_status_to_errno(bio->bi_status); 1277 1278 cmpxchg(&bp->b_io_error, 0, error); 1279 } 1280 1281 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1282 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1283 1284 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) 1285 xfs_buf_ioend_async(bp); 1286 bio_put(bio); 1287 } 1288 1289 static void 1290 xfs_buf_ioapply_map( 1291 struct xfs_buf *bp, 1292 int map, 1293 int *buf_offset, 1294 int *count, 1295 int op) 1296 { 1297 int page_index; 1298 int total_nr_pages = bp->b_page_count; 1299 int nr_pages; 1300 struct bio *bio; 1301 sector_t sector = bp->b_maps[map].bm_bn; 1302 int size; 1303 int offset; 1304 1305 /* skip the pages in the buffer before the start offset */ 1306 page_index = 0; 1307 offset = *buf_offset; 1308 while (offset >= PAGE_SIZE) { 1309 page_index++; 1310 offset -= PAGE_SIZE; 1311 } 1312 1313 /* 1314 * Limit the IO size to the length of the current vector, and update the 1315 * remaining IO count for the next time around. 1316 */ 1317 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count); 1318 *count -= size; 1319 *buf_offset += size; 1320 1321 next_chunk: 1322 atomic_inc(&bp->b_io_remaining); 1323 nr_pages = min(total_nr_pages, BIO_MAX_PAGES); 1324 1325 bio = bio_alloc(GFP_NOIO, nr_pages); 1326 bio_set_dev(bio, bp->b_target->bt_bdev); 1327 bio->bi_iter.bi_sector = sector; 1328 bio->bi_end_io = xfs_buf_bio_end_io; 1329 bio->bi_private = bp; 1330 bio->bi_opf = op; 1331 1332 for (; size && nr_pages; nr_pages--, page_index++) { 1333 int rbytes, nbytes = PAGE_SIZE - offset; 1334 1335 if (nbytes > size) 1336 nbytes = size; 1337 1338 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes, 1339 offset); 1340 if (rbytes < nbytes) 1341 break; 1342 1343 offset = 0; 1344 sector += BTOBB(nbytes); 1345 size -= nbytes; 1346 total_nr_pages--; 1347 } 1348 1349 if (likely(bio->bi_iter.bi_size)) { 1350 if (xfs_buf_is_vmapped(bp)) { 1351 flush_kernel_vmap_range(bp->b_addr, 1352 xfs_buf_vmap_len(bp)); 1353 } 1354 submit_bio(bio); 1355 if (size) 1356 goto next_chunk; 1357 } else { 1358 /* 1359 * This is guaranteed not to be the last io reference count 1360 * because the caller (xfs_buf_submit) holds a count itself. 1361 */ 1362 atomic_dec(&bp->b_io_remaining); 1363 xfs_buf_ioerror(bp, -EIO); 1364 bio_put(bio); 1365 } 1366 1367 } 1368 1369 STATIC void 1370 _xfs_buf_ioapply( 1371 struct xfs_buf *bp) 1372 { 1373 struct blk_plug plug; 1374 int op; 1375 int offset; 1376 int size; 1377 int i; 1378 1379 /* 1380 * Make sure we capture only current IO errors rather than stale errors 1381 * left over from previous use of the buffer (e.g. failed readahead). 1382 */ 1383 bp->b_error = 0; 1384 1385 if (bp->b_flags & XBF_WRITE) { 1386 op = REQ_OP_WRITE; 1387 1388 /* 1389 * Run the write verifier callback function if it exists. If 1390 * this function fails it will mark the buffer with an error and 1391 * the IO should not be dispatched. 1392 */ 1393 if (bp->b_ops) { 1394 bp->b_ops->verify_write(bp); 1395 if (bp->b_error) { 1396 xfs_force_shutdown(bp->b_mount, 1397 SHUTDOWN_CORRUPT_INCORE); 1398 return; 1399 } 1400 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { 1401 struct xfs_mount *mp = bp->b_mount; 1402 1403 /* 1404 * non-crc filesystems don't attach verifiers during 1405 * log recovery, so don't warn for such filesystems. 1406 */ 1407 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1408 xfs_warn(mp, 1409 "%s: no buf ops on daddr 0x%llx len %d", 1410 __func__, bp->b_bn, bp->b_length); 1411 xfs_hex_dump(bp->b_addr, 1412 XFS_CORRUPTION_DUMP_LEN); 1413 dump_stack(); 1414 } 1415 } 1416 } else { 1417 op = REQ_OP_READ; 1418 if (bp->b_flags & XBF_READ_AHEAD) 1419 op |= REQ_RAHEAD; 1420 } 1421 1422 /* we only use the buffer cache for meta-data */ 1423 op |= REQ_META; 1424 1425 /* 1426 * Walk all the vectors issuing IO on them. Set up the initial offset 1427 * into the buffer and the desired IO size before we start - 1428 * _xfs_buf_ioapply_vec() will modify them appropriately for each 1429 * subsequent call. 1430 */ 1431 offset = bp->b_offset; 1432 size = BBTOB(bp->b_length); 1433 blk_start_plug(&plug); 1434 for (i = 0; i < bp->b_map_count; i++) { 1435 xfs_buf_ioapply_map(bp, i, &offset, &size, op); 1436 if (bp->b_error) 1437 break; 1438 if (size <= 0) 1439 break; /* all done */ 1440 } 1441 blk_finish_plug(&plug); 1442 } 1443 1444 /* 1445 * Wait for I/O completion of a sync buffer and return the I/O error code. 1446 */ 1447 static int 1448 xfs_buf_iowait( 1449 struct xfs_buf *bp) 1450 { 1451 ASSERT(!(bp->b_flags & XBF_ASYNC)); 1452 1453 trace_xfs_buf_iowait(bp, _RET_IP_); 1454 wait_for_completion(&bp->b_iowait); 1455 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1456 1457 return bp->b_error; 1458 } 1459 1460 /* 1461 * Buffer I/O submission path, read or write. Asynchronous submission transfers 1462 * the buffer lock ownership and the current reference to the IO. It is not 1463 * safe to reference the buffer after a call to this function unless the caller 1464 * holds an additional reference itself. 1465 */ 1466 int 1467 __xfs_buf_submit( 1468 struct xfs_buf *bp, 1469 bool wait) 1470 { 1471 int error = 0; 1472 1473 trace_xfs_buf_submit(bp, _RET_IP_); 1474 1475 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1476 1477 /* on shutdown we stale and complete the buffer immediately */ 1478 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 1479 xfs_buf_ioerror(bp, -EIO); 1480 bp->b_flags &= ~XBF_DONE; 1481 xfs_buf_stale(bp); 1482 xfs_buf_ioend(bp); 1483 return -EIO; 1484 } 1485 1486 /* 1487 * Grab a reference so the buffer does not go away underneath us. For 1488 * async buffers, I/O completion drops the callers reference, which 1489 * could occur before submission returns. 1490 */ 1491 xfs_buf_hold(bp); 1492 1493 if (bp->b_flags & XBF_WRITE) 1494 xfs_buf_wait_unpin(bp); 1495 1496 /* clear the internal error state to avoid spurious errors */ 1497 bp->b_io_error = 0; 1498 1499 /* 1500 * Set the count to 1 initially, this will stop an I/O completion 1501 * callout which happens before we have started all the I/O from calling 1502 * xfs_buf_ioend too early. 1503 */ 1504 atomic_set(&bp->b_io_remaining, 1); 1505 if (bp->b_flags & XBF_ASYNC) 1506 xfs_buf_ioacct_inc(bp); 1507 _xfs_buf_ioapply(bp); 1508 1509 /* 1510 * If _xfs_buf_ioapply failed, we can get back here with only the IO 1511 * reference we took above. If we drop it to zero, run completion so 1512 * that we don't return to the caller with completion still pending. 1513 */ 1514 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1515 if (bp->b_error || !(bp->b_flags & XBF_ASYNC)) 1516 xfs_buf_ioend(bp); 1517 else 1518 xfs_buf_ioend_async(bp); 1519 } 1520 1521 if (wait) 1522 error = xfs_buf_iowait(bp); 1523 1524 /* 1525 * Release the hold that keeps the buffer referenced for the entire 1526 * I/O. Note that if the buffer is async, it is not safe to reference 1527 * after this release. 1528 */ 1529 xfs_buf_rele(bp); 1530 return error; 1531 } 1532 1533 void * 1534 xfs_buf_offset( 1535 struct xfs_buf *bp, 1536 size_t offset) 1537 { 1538 struct page *page; 1539 1540 if (bp->b_addr) 1541 return bp->b_addr + offset; 1542 1543 offset += bp->b_offset; 1544 page = bp->b_pages[offset >> PAGE_SHIFT]; 1545 return page_address(page) + (offset & (PAGE_SIZE-1)); 1546 } 1547 1548 void 1549 xfs_buf_zero( 1550 struct xfs_buf *bp, 1551 size_t boff, 1552 size_t bsize) 1553 { 1554 size_t bend; 1555 1556 bend = boff + bsize; 1557 while (boff < bend) { 1558 struct page *page; 1559 int page_index, page_offset, csize; 1560 1561 page_index = (boff + bp->b_offset) >> PAGE_SHIFT; 1562 page_offset = (boff + bp->b_offset) & ~PAGE_MASK; 1563 page = bp->b_pages[page_index]; 1564 csize = min_t(size_t, PAGE_SIZE - page_offset, 1565 BBTOB(bp->b_length) - boff); 1566 1567 ASSERT((csize + page_offset) <= PAGE_SIZE); 1568 1569 memset(page_address(page) + page_offset, 0, csize); 1570 1571 boff += csize; 1572 } 1573 } 1574 1575 /* 1576 * Handling of buffer targets (buftargs). 1577 */ 1578 1579 /* 1580 * Wait for any bufs with callbacks that have been submitted but have not yet 1581 * returned. These buffers will have an elevated hold count, so wait on those 1582 * while freeing all the buffers only held by the LRU. 1583 */ 1584 static enum lru_status 1585 xfs_buftarg_wait_rele( 1586 struct list_head *item, 1587 struct list_lru_one *lru, 1588 spinlock_t *lru_lock, 1589 void *arg) 1590 1591 { 1592 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); 1593 struct list_head *dispose = arg; 1594 1595 if (atomic_read(&bp->b_hold) > 1) { 1596 /* need to wait, so skip it this pass */ 1597 trace_xfs_buf_wait_buftarg(bp, _RET_IP_); 1598 return LRU_SKIP; 1599 } 1600 if (!spin_trylock(&bp->b_lock)) 1601 return LRU_SKIP; 1602 1603 /* 1604 * clear the LRU reference count so the buffer doesn't get 1605 * ignored in xfs_buf_rele(). 1606 */ 1607 atomic_set(&bp->b_lru_ref, 0); 1608 bp->b_state |= XFS_BSTATE_DISPOSE; 1609 list_lru_isolate_move(lru, item, dispose); 1610 spin_unlock(&bp->b_lock); 1611 return LRU_REMOVED; 1612 } 1613 1614 void 1615 xfs_wait_buftarg( 1616 struct xfs_buftarg *btp) 1617 { 1618 LIST_HEAD(dispose); 1619 int loop = 0; 1620 1621 /* 1622 * First wait on the buftarg I/O count for all in-flight buffers to be 1623 * released. This is critical as new buffers do not make the LRU until 1624 * they are released. 1625 * 1626 * Next, flush the buffer workqueue to ensure all completion processing 1627 * has finished. Just waiting on buffer locks is not sufficient for 1628 * async IO as the reference count held over IO is not released until 1629 * after the buffer lock is dropped. Hence we need to ensure here that 1630 * all reference counts have been dropped before we start walking the 1631 * LRU list. 1632 */ 1633 while (percpu_counter_sum(&btp->bt_io_count)) 1634 delay(100); 1635 flush_workqueue(btp->bt_mount->m_buf_workqueue); 1636 1637 /* loop until there is nothing left on the lru list. */ 1638 while (list_lru_count(&btp->bt_lru)) { 1639 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, 1640 &dispose, LONG_MAX); 1641 1642 while (!list_empty(&dispose)) { 1643 struct xfs_buf *bp; 1644 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1645 list_del_init(&bp->b_lru); 1646 if (bp->b_flags & XBF_WRITE_FAIL) { 1647 xfs_alert(btp->bt_mount, 1648 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", 1649 (long long)bp->b_bn); 1650 xfs_alert(btp->bt_mount, 1651 "Please run xfs_repair to determine the extent of the problem."); 1652 } 1653 xfs_buf_rele(bp); 1654 } 1655 if (loop++ != 0) 1656 delay(100); 1657 } 1658 } 1659 1660 static enum lru_status 1661 xfs_buftarg_isolate( 1662 struct list_head *item, 1663 struct list_lru_one *lru, 1664 spinlock_t *lru_lock, 1665 void *arg) 1666 { 1667 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); 1668 struct list_head *dispose = arg; 1669 1670 /* 1671 * we are inverting the lru lock/bp->b_lock here, so use a trylock. 1672 * If we fail to get the lock, just skip it. 1673 */ 1674 if (!spin_trylock(&bp->b_lock)) 1675 return LRU_SKIP; 1676 /* 1677 * Decrement the b_lru_ref count unless the value is already 1678 * zero. If the value is already zero, we need to reclaim the 1679 * buffer, otherwise it gets another trip through the LRU. 1680 */ 1681 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) { 1682 spin_unlock(&bp->b_lock); 1683 return LRU_ROTATE; 1684 } 1685 1686 bp->b_state |= XFS_BSTATE_DISPOSE; 1687 list_lru_isolate_move(lru, item, dispose); 1688 spin_unlock(&bp->b_lock); 1689 return LRU_REMOVED; 1690 } 1691 1692 static unsigned long 1693 xfs_buftarg_shrink_scan( 1694 struct shrinker *shrink, 1695 struct shrink_control *sc) 1696 { 1697 struct xfs_buftarg *btp = container_of(shrink, 1698 struct xfs_buftarg, bt_shrinker); 1699 LIST_HEAD(dispose); 1700 unsigned long freed; 1701 1702 freed = list_lru_shrink_walk(&btp->bt_lru, sc, 1703 xfs_buftarg_isolate, &dispose); 1704 1705 while (!list_empty(&dispose)) { 1706 struct xfs_buf *bp; 1707 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1708 list_del_init(&bp->b_lru); 1709 xfs_buf_rele(bp); 1710 } 1711 1712 return freed; 1713 } 1714 1715 static unsigned long 1716 xfs_buftarg_shrink_count( 1717 struct shrinker *shrink, 1718 struct shrink_control *sc) 1719 { 1720 struct xfs_buftarg *btp = container_of(shrink, 1721 struct xfs_buftarg, bt_shrinker); 1722 return list_lru_shrink_count(&btp->bt_lru, sc); 1723 } 1724 1725 void 1726 xfs_free_buftarg( 1727 struct xfs_buftarg *btp) 1728 { 1729 unregister_shrinker(&btp->bt_shrinker); 1730 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); 1731 percpu_counter_destroy(&btp->bt_io_count); 1732 list_lru_destroy(&btp->bt_lru); 1733 1734 xfs_blkdev_issue_flush(btp); 1735 1736 kmem_free(btp); 1737 } 1738 1739 int 1740 xfs_setsize_buftarg( 1741 xfs_buftarg_t *btp, 1742 unsigned int sectorsize) 1743 { 1744 /* Set up metadata sector size info */ 1745 btp->bt_meta_sectorsize = sectorsize; 1746 btp->bt_meta_sectormask = sectorsize - 1; 1747 1748 if (set_blocksize(btp->bt_bdev, sectorsize)) { 1749 xfs_warn(btp->bt_mount, 1750 "Cannot set_blocksize to %u on device %pg", 1751 sectorsize, btp->bt_bdev); 1752 return -EINVAL; 1753 } 1754 1755 /* Set up device logical sector size mask */ 1756 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev); 1757 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1; 1758 1759 return 0; 1760 } 1761 1762 /* 1763 * When allocating the initial buffer target we have not yet 1764 * read in the superblock, so don't know what sized sectors 1765 * are being used at this early stage. Play safe. 1766 */ 1767 STATIC int 1768 xfs_setsize_buftarg_early( 1769 xfs_buftarg_t *btp, 1770 struct block_device *bdev) 1771 { 1772 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); 1773 } 1774 1775 xfs_buftarg_t * 1776 xfs_alloc_buftarg( 1777 struct xfs_mount *mp, 1778 struct block_device *bdev, 1779 struct dax_device *dax_dev) 1780 { 1781 xfs_buftarg_t *btp; 1782 1783 btp = kmem_zalloc(sizeof(*btp), KM_NOFS); 1784 1785 btp->bt_mount = mp; 1786 btp->bt_dev = bdev->bd_dev; 1787 btp->bt_bdev = bdev; 1788 btp->bt_daxdev = dax_dev; 1789 1790 if (xfs_setsize_buftarg_early(btp, bdev)) 1791 goto error_free; 1792 1793 if (list_lru_init(&btp->bt_lru)) 1794 goto error_free; 1795 1796 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) 1797 goto error_lru; 1798 1799 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; 1800 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; 1801 btp->bt_shrinker.seeks = DEFAULT_SEEKS; 1802 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; 1803 if (register_shrinker(&btp->bt_shrinker)) 1804 goto error_pcpu; 1805 return btp; 1806 1807 error_pcpu: 1808 percpu_counter_destroy(&btp->bt_io_count); 1809 error_lru: 1810 list_lru_destroy(&btp->bt_lru); 1811 error_free: 1812 kmem_free(btp); 1813 return NULL; 1814 } 1815 1816 /* 1817 * Cancel a delayed write list. 1818 * 1819 * Remove each buffer from the list, clear the delwri queue flag and drop the 1820 * associated buffer reference. 1821 */ 1822 void 1823 xfs_buf_delwri_cancel( 1824 struct list_head *list) 1825 { 1826 struct xfs_buf *bp; 1827 1828 while (!list_empty(list)) { 1829 bp = list_first_entry(list, struct xfs_buf, b_list); 1830 1831 xfs_buf_lock(bp); 1832 bp->b_flags &= ~_XBF_DELWRI_Q; 1833 list_del_init(&bp->b_list); 1834 xfs_buf_relse(bp); 1835 } 1836 } 1837 1838 /* 1839 * Add a buffer to the delayed write list. 1840 * 1841 * This queues a buffer for writeout if it hasn't already been. Note that 1842 * neither this routine nor the buffer list submission functions perform 1843 * any internal synchronization. It is expected that the lists are thread-local 1844 * to the callers. 1845 * 1846 * Returns true if we queued up the buffer, or false if it already had 1847 * been on the buffer list. 1848 */ 1849 bool 1850 xfs_buf_delwri_queue( 1851 struct xfs_buf *bp, 1852 struct list_head *list) 1853 { 1854 ASSERT(xfs_buf_islocked(bp)); 1855 ASSERT(!(bp->b_flags & XBF_READ)); 1856 1857 /* 1858 * If the buffer is already marked delwri it already is queued up 1859 * by someone else for imediate writeout. Just ignore it in that 1860 * case. 1861 */ 1862 if (bp->b_flags & _XBF_DELWRI_Q) { 1863 trace_xfs_buf_delwri_queued(bp, _RET_IP_); 1864 return false; 1865 } 1866 1867 trace_xfs_buf_delwri_queue(bp, _RET_IP_); 1868 1869 /* 1870 * If a buffer gets written out synchronously or marked stale while it 1871 * is on a delwri list we lazily remove it. To do this, the other party 1872 * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone. 1873 * It remains referenced and on the list. In a rare corner case it 1874 * might get readded to a delwri list after the synchronous writeout, in 1875 * which case we need just need to re-add the flag here. 1876 */ 1877 bp->b_flags |= _XBF_DELWRI_Q; 1878 if (list_empty(&bp->b_list)) { 1879 atomic_inc(&bp->b_hold); 1880 list_add_tail(&bp->b_list, list); 1881 } 1882 1883 return true; 1884 } 1885 1886 /* 1887 * Compare function is more complex than it needs to be because 1888 * the return value is only 32 bits and we are doing comparisons 1889 * on 64 bit values 1890 */ 1891 static int 1892 xfs_buf_cmp( 1893 void *priv, 1894 struct list_head *a, 1895 struct list_head *b) 1896 { 1897 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); 1898 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); 1899 xfs_daddr_t diff; 1900 1901 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn; 1902 if (diff < 0) 1903 return -1; 1904 if (diff > 0) 1905 return 1; 1906 return 0; 1907 } 1908 1909 /* 1910 * Submit buffers for write. If wait_list is specified, the buffers are 1911 * submitted using sync I/O and placed on the wait list such that the caller can 1912 * iowait each buffer. Otherwise async I/O is used and the buffers are released 1913 * at I/O completion time. In either case, buffers remain locked until I/O 1914 * completes and the buffer is released from the queue. 1915 */ 1916 static int 1917 xfs_buf_delwri_submit_buffers( 1918 struct list_head *buffer_list, 1919 struct list_head *wait_list) 1920 { 1921 struct xfs_buf *bp, *n; 1922 int pinned = 0; 1923 struct blk_plug plug; 1924 1925 list_sort(NULL, buffer_list, xfs_buf_cmp); 1926 1927 blk_start_plug(&plug); 1928 list_for_each_entry_safe(bp, n, buffer_list, b_list) { 1929 if (!wait_list) { 1930 if (xfs_buf_ispinned(bp)) { 1931 pinned++; 1932 continue; 1933 } 1934 if (!xfs_buf_trylock(bp)) 1935 continue; 1936 } else { 1937 xfs_buf_lock(bp); 1938 } 1939 1940 /* 1941 * Someone else might have written the buffer synchronously or 1942 * marked it stale in the meantime. In that case only the 1943 * _XBF_DELWRI_Q flag got cleared, and we have to drop the 1944 * reference and remove it from the list here. 1945 */ 1946 if (!(bp->b_flags & _XBF_DELWRI_Q)) { 1947 list_del_init(&bp->b_list); 1948 xfs_buf_relse(bp); 1949 continue; 1950 } 1951 1952 trace_xfs_buf_delwri_split(bp, _RET_IP_); 1953 1954 /* 1955 * If we have a wait list, each buffer (and associated delwri 1956 * queue reference) transfers to it and is submitted 1957 * synchronously. Otherwise, drop the buffer from the delwri 1958 * queue and submit async. 1959 */ 1960 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL); 1961 bp->b_flags |= XBF_WRITE; 1962 if (wait_list) { 1963 bp->b_flags &= ~XBF_ASYNC; 1964 list_move_tail(&bp->b_list, wait_list); 1965 } else { 1966 bp->b_flags |= XBF_ASYNC; 1967 list_del_init(&bp->b_list); 1968 } 1969 __xfs_buf_submit(bp, false); 1970 } 1971 blk_finish_plug(&plug); 1972 1973 return pinned; 1974 } 1975 1976 /* 1977 * Write out a buffer list asynchronously. 1978 * 1979 * This will take the @buffer_list, write all non-locked and non-pinned buffers 1980 * out and not wait for I/O completion on any of the buffers. This interface 1981 * is only safely useable for callers that can track I/O completion by higher 1982 * level means, e.g. AIL pushing as the @buffer_list is consumed in this 1983 * function. 1984 * 1985 * Note: this function will skip buffers it would block on, and in doing so 1986 * leaves them on @buffer_list so they can be retried on a later pass. As such, 1987 * it is up to the caller to ensure that the buffer list is fully submitted or 1988 * cancelled appropriately when they are finished with the list. Failure to 1989 * cancel or resubmit the list until it is empty will result in leaked buffers 1990 * at unmount time. 1991 */ 1992 int 1993 xfs_buf_delwri_submit_nowait( 1994 struct list_head *buffer_list) 1995 { 1996 return xfs_buf_delwri_submit_buffers(buffer_list, NULL); 1997 } 1998 1999 /* 2000 * Write out a buffer list synchronously. 2001 * 2002 * This will take the @buffer_list, write all buffers out and wait for I/O 2003 * completion on all of the buffers. @buffer_list is consumed by the function, 2004 * so callers must have some other way of tracking buffers if they require such 2005 * functionality. 2006 */ 2007 int 2008 xfs_buf_delwri_submit( 2009 struct list_head *buffer_list) 2010 { 2011 LIST_HEAD (wait_list); 2012 int error = 0, error2; 2013 struct xfs_buf *bp; 2014 2015 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list); 2016 2017 /* Wait for IO to complete. */ 2018 while (!list_empty(&wait_list)) { 2019 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 2020 2021 list_del_init(&bp->b_list); 2022 2023 /* 2024 * Wait on the locked buffer, check for errors and unlock and 2025 * release the delwri queue reference. 2026 */ 2027 error2 = xfs_buf_iowait(bp); 2028 xfs_buf_relse(bp); 2029 if (!error) 2030 error = error2; 2031 } 2032 2033 return error; 2034 } 2035 2036 /* 2037 * Push a single buffer on a delwri queue. 2038 * 2039 * The purpose of this function is to submit a single buffer of a delwri queue 2040 * and return with the buffer still on the original queue. The waiting delwri 2041 * buffer submission infrastructure guarantees transfer of the delwri queue 2042 * buffer reference to a temporary wait list. We reuse this infrastructure to 2043 * transfer the buffer back to the original queue. 2044 * 2045 * Note the buffer transitions from the queued state, to the submitted and wait 2046 * listed state and back to the queued state during this call. The buffer 2047 * locking and queue management logic between _delwri_pushbuf() and 2048 * _delwri_queue() guarantee that the buffer cannot be queued to another list 2049 * before returning. 2050 */ 2051 int 2052 xfs_buf_delwri_pushbuf( 2053 struct xfs_buf *bp, 2054 struct list_head *buffer_list) 2055 { 2056 LIST_HEAD (submit_list); 2057 int error; 2058 2059 ASSERT(bp->b_flags & _XBF_DELWRI_Q); 2060 2061 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); 2062 2063 /* 2064 * Isolate the buffer to a new local list so we can submit it for I/O 2065 * independently from the rest of the original list. 2066 */ 2067 xfs_buf_lock(bp); 2068 list_move(&bp->b_list, &submit_list); 2069 xfs_buf_unlock(bp); 2070 2071 /* 2072 * Delwri submission clears the DELWRI_Q buffer flag and returns with 2073 * the buffer on the wait list with the original reference. Rather than 2074 * bounce the buffer from a local wait list back to the original list 2075 * after I/O completion, reuse the original list as the wait list. 2076 */ 2077 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list); 2078 2079 /* 2080 * The buffer is now locked, under I/O and wait listed on the original 2081 * delwri queue. Wait for I/O completion, restore the DELWRI_Q flag and 2082 * return with the buffer unlocked and on the original queue. 2083 */ 2084 error = xfs_buf_iowait(bp); 2085 bp->b_flags |= _XBF_DELWRI_Q; 2086 xfs_buf_unlock(bp); 2087 2088 return error; 2089 } 2090 2091 int __init 2092 xfs_buf_init(void) 2093 { 2094 xfs_buf_zone = kmem_cache_create("xfs_buf", 2095 sizeof(struct xfs_buf), 0, 2096 SLAB_HWCACHE_ALIGN, NULL); 2097 if (!xfs_buf_zone) 2098 goto out; 2099 2100 return 0; 2101 2102 out: 2103 return -ENOMEM; 2104 } 2105 2106 void 2107 xfs_buf_terminate(void) 2108 { 2109 kmem_cache_destroy(xfs_buf_zone); 2110 } 2111 2112 void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 2113 { 2114 /* 2115 * Set the lru reference count to 0 based on the error injection tag. 2116 * This allows userspace to disrupt buffer caching for debug/testing 2117 * purposes. 2118 */ 2119 if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) 2120 lru_ref = 0; 2121 2122 atomic_set(&bp->b_lru_ref, lru_ref); 2123 } 2124 2125 /* 2126 * Verify an on-disk magic value against the magic value specified in the 2127 * verifier structure. The verifier magic is in disk byte order so the caller is 2128 * expected to pass the value directly from disk. 2129 */ 2130 bool 2131 xfs_verify_magic( 2132 struct xfs_buf *bp, 2133 __be32 dmagic) 2134 { 2135 struct xfs_mount *mp = bp->b_mount; 2136 int idx; 2137 2138 idx = xfs_sb_version_hascrc(&mp->m_sb); 2139 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])) 2140 return false; 2141 return dmagic == bp->b_ops->magic[idx]; 2142 } 2143 /* 2144 * Verify an on-disk magic value against the magic value specified in the 2145 * verifier structure. The verifier magic is in disk byte order so the caller is 2146 * expected to pass the value directly from disk. 2147 */ 2148 bool 2149 xfs_verify_magic16( 2150 struct xfs_buf *bp, 2151 __be16 dmagic) 2152 { 2153 struct xfs_mount *mp = bp->b_mount; 2154 int idx; 2155 2156 idx = xfs_sb_version_hascrc(&mp->m_sb); 2157 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])) 2158 return false; 2159 return dmagic == bp->b_ops->magic16[idx]; 2160 } 2161