1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * localalloc.c 6 * 7 * Node local data allocation 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 */ 11 12 #include <linux/fs.h> 13 #include <linux/types.h> 14 #include <linux/slab.h> 15 #include <linux/highmem.h> 16 #include <linux/bitops.h> 17 18 #include <cluster/masklog.h> 19 20 #include "ocfs2.h" 21 22 #include "alloc.h" 23 #include "blockcheck.h" 24 #include "dlmglue.h" 25 #include "inode.h" 26 #include "journal.h" 27 #include "localalloc.h" 28 #include "suballoc.h" 29 #include "super.h" 30 #include "sysfile.h" 31 #include "ocfs2_trace.h" 32 33 #include "buffer_head_io.h" 34 35 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 36 37 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 38 39 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 40 struct ocfs2_dinode *alloc, 41 u32 *numbits, 42 struct ocfs2_alloc_reservation *resv); 43 44 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 45 46 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 47 handle_t *handle, 48 struct ocfs2_dinode *alloc, 49 struct inode *main_bm_inode, 50 struct buffer_head *main_bm_bh); 51 52 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 53 struct ocfs2_alloc_context **ac, 54 struct inode **bitmap_inode, 55 struct buffer_head **bitmap_bh); 56 57 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 58 handle_t *handle, 59 struct ocfs2_alloc_context *ac); 60 61 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 62 struct inode *local_alloc_inode); 63 64 /* 65 * ocfs2_la_default_mb() - determine a default size, in megabytes of 66 * the local alloc. 67 * 68 * Generally, we'd like to pick as large a local alloc as 69 * possible. Performance on large workloads tends to scale 70 * proportionally to la size. In addition to that, the reservations 71 * code functions more efficiently as it can reserve more windows for 72 * write. 73 * 74 * Some things work against us when trying to choose a large local alloc: 75 * 76 * - We need to ensure our sizing is picked to leave enough space in 77 * group descriptors for other allocations (such as block groups, 78 * etc). Picking default sizes which are a multiple of 4 could help 79 * - block groups are allocated in 2mb and 4mb chunks. 80 * 81 * - Likewise, we don't want to starve other nodes of bits on small 82 * file systems. This can easily be taken care of by limiting our 83 * default to a reasonable size (256M) on larger cluster sizes. 84 * 85 * - Some file systems can't support very large sizes - 4k and 8k in 86 * particular are limited to less than 128 and 256 megabytes respectively. 87 * 88 * The following reference table shows group descriptor and local 89 * alloc maximums at various cluster sizes (4k blocksize) 90 * 91 * csize: 4K group: 126M la: 121M 92 * csize: 8K group: 252M la: 243M 93 * csize: 16K group: 504M la: 486M 94 * csize: 32K group: 1008M la: 972M 95 * csize: 64K group: 2016M la: 1944M 96 * csize: 128K group: 4032M la: 3888M 97 * csize: 256K group: 8064M la: 7776M 98 * csize: 512K group: 16128M la: 15552M 99 * csize: 1024K group: 32256M la: 31104M 100 */ 101 #define OCFS2_LA_MAX_DEFAULT_MB 256 102 #define OCFS2_LA_OLD_DEFAULT 8 103 unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) 104 { 105 unsigned int la_mb; 106 unsigned int gd_mb; 107 unsigned int la_max_mb; 108 unsigned int megs_per_slot; 109 struct super_block *sb = osb->sb; 110 111 gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 112 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); 113 114 /* 115 * This takes care of files systems with very small group 116 * descriptors - 512 byte blocksize at cluster sizes lower 117 * than 16K and also 1k blocksize with 4k cluster size. 118 */ 119 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) 120 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) 121 return OCFS2_LA_OLD_DEFAULT; 122 123 /* 124 * Leave enough room for some block groups and make the final 125 * value we work from a multiple of 4. 126 */ 127 gd_mb -= 16; 128 gd_mb &= 0xFFFFFFFB; 129 130 la_mb = gd_mb; 131 132 /* 133 * Keep window sizes down to a reasonable default 134 */ 135 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { 136 /* 137 * Some clustersize / blocksize combinations will have 138 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB 139 * default size, but get poor distribution when 140 * limited to exactly 256 megabytes. 141 * 142 * As an example, 16K clustersize at 4K blocksize 143 * gives us a cluster group size of 504M. Paring the 144 * local alloc size down to 256 however, would give us 145 * only one window and around 200MB left in the 146 * cluster group. Instead, find the first size below 147 * 256 which would give us an even distribution. 148 * 149 * Larger cluster group sizes actually work out pretty 150 * well when pared to 256, so we don't have to do this 151 * for any group that fits more than two 152 * OCFS2_LA_MAX_DEFAULT_MB windows. 153 */ 154 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) 155 la_mb = 256; 156 else { 157 unsigned int gd_mult = gd_mb; 158 159 while (gd_mult > 256) 160 gd_mult = gd_mult >> 1; 161 162 la_mb = gd_mult; 163 } 164 } 165 166 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; 167 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); 168 /* Too many nodes, too few disk clusters. */ 169 if (megs_per_slot < la_mb) 170 la_mb = megs_per_slot; 171 172 /* We can't store more bits than we can in a block. */ 173 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, 174 ocfs2_local_alloc_size(sb) * 8); 175 if (la_mb > la_max_mb) 176 la_mb = la_max_mb; 177 178 return la_mb; 179 } 180 181 void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 182 { 183 struct super_block *sb = osb->sb; 184 unsigned int la_default_mb = ocfs2_la_default_mb(osb); 185 unsigned int la_max_mb; 186 187 la_max_mb = ocfs2_clusters_to_megabytes(sb, 188 ocfs2_local_alloc_size(sb) * 8); 189 190 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); 191 192 if (requested_mb == -1) { 193 /* No user request - use defaults */ 194 osb->local_alloc_default_bits = 195 ocfs2_megabytes_to_clusters(sb, la_default_mb); 196 } else if (requested_mb > la_max_mb) { 197 /* Request is too big, we give the maximum available */ 198 osb->local_alloc_default_bits = 199 ocfs2_megabytes_to_clusters(sb, la_max_mb); 200 } else { 201 osb->local_alloc_default_bits = 202 ocfs2_megabytes_to_clusters(sb, requested_mb); 203 } 204 205 osb->local_alloc_bits = osb->local_alloc_default_bits; 206 } 207 208 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 209 { 210 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 211 osb->local_alloc_state == OCFS2_LA_ENABLED); 212 } 213 214 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 215 unsigned int num_clusters) 216 { 217 spin_lock(&osb->osb_lock); 218 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 219 osb->local_alloc_state == OCFS2_LA_THROTTLED) 220 if (num_clusters >= osb->local_alloc_default_bits) { 221 cancel_delayed_work(&osb->la_enable_wq); 222 osb->local_alloc_state = OCFS2_LA_ENABLED; 223 } 224 spin_unlock(&osb->osb_lock); 225 } 226 227 void ocfs2_la_enable_worker(struct work_struct *work) 228 { 229 struct ocfs2_super *osb = 230 container_of(work, struct ocfs2_super, 231 la_enable_wq.work); 232 spin_lock(&osb->osb_lock); 233 osb->local_alloc_state = OCFS2_LA_ENABLED; 234 spin_unlock(&osb->osb_lock); 235 } 236 237 /* 238 * Tell us whether a given allocation should use the local alloc 239 * file. Otherwise, it has to go to the main bitmap. 240 * 241 * This function does semi-dirty reads of local alloc size and state! 242 * This is ok however, as the values are re-checked once under mutex. 243 */ 244 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 245 { 246 int ret = 0; 247 int la_bits; 248 249 spin_lock(&osb->osb_lock); 250 la_bits = osb->local_alloc_bits; 251 252 if (!ocfs2_la_state_enabled(osb)) 253 goto bail; 254 255 /* la_bits should be at least twice the size (in clusters) of 256 * a new block group. We want to be sure block group 257 * allocations go through the local alloc, so allow an 258 * allocation to take up to half the bitmap. */ 259 if (bits > (la_bits / 2)) 260 goto bail; 261 262 ret = 1; 263 bail: 264 trace_ocfs2_alloc_should_use_local( 265 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); 266 spin_unlock(&osb->osb_lock); 267 return ret; 268 } 269 270 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 271 { 272 int status = 0; 273 struct ocfs2_dinode *alloc = NULL; 274 struct buffer_head *alloc_bh = NULL; 275 u32 num_used; 276 struct inode *inode = NULL; 277 struct ocfs2_local_alloc *la; 278 279 if (osb->local_alloc_bits == 0) 280 goto bail; 281 282 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 283 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 284 "than max possible %u. Using defaults.\n", 285 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 286 osb->local_alloc_bits = 287 ocfs2_megabytes_to_clusters(osb->sb, 288 ocfs2_la_default_mb(osb)); 289 } 290 291 /* read the alloc off disk */ 292 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 293 osb->slot_num); 294 if (!inode) { 295 status = -EINVAL; 296 mlog_errno(status); 297 goto bail; 298 } 299 300 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 301 OCFS2_BH_IGNORE_CACHE); 302 if (status < 0) { 303 mlog_errno(status); 304 goto bail; 305 } 306 307 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 308 la = OCFS2_LOCAL_ALLOC(alloc); 309 310 if (!(le32_to_cpu(alloc->i_flags) & 311 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 312 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 313 (unsigned long long)OCFS2_I(inode)->ip_blkno); 314 status = -EINVAL; 315 goto bail; 316 } 317 318 if ((la->la_size == 0) || 319 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 320 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 321 le16_to_cpu(la->la_size)); 322 status = -EINVAL; 323 goto bail; 324 } 325 326 /* do a little verification. */ 327 num_used = ocfs2_local_alloc_count_bits(alloc); 328 329 /* hopefully the local alloc has always been recovered before 330 * we load it. */ 331 if (num_used 332 || alloc->id1.bitmap1.i_used 333 || alloc->id1.bitmap1.i_total 334 || la->la_bm_off) { 335 mlog(ML_ERROR, "inconsistent detected, clean journal with" 336 " unrecovered local alloc, please run fsck.ocfs2!\n" 337 "found = %u, set = %u, taken = %u, off = %u\n", 338 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 339 le32_to_cpu(alloc->id1.bitmap1.i_total), 340 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 341 342 status = -EINVAL; 343 goto bail; 344 } 345 346 osb->local_alloc_bh = alloc_bh; 347 osb->local_alloc_state = OCFS2_LA_ENABLED; 348 349 bail: 350 if (status < 0) 351 brelse(alloc_bh); 352 iput(inode); 353 354 trace_ocfs2_load_local_alloc(osb->local_alloc_bits); 355 356 if (status) 357 mlog_errno(status); 358 return status; 359 } 360 361 /* 362 * return any unused bits to the bitmap and write out a clean 363 * local_alloc. 364 * 365 * local_alloc_bh is optional. If not passed, we will simply use the 366 * one off osb. If you do pass it however, be warned that it *will* be 367 * returned brelse'd and NULL'd out.*/ 368 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 369 { 370 int status; 371 handle_t *handle; 372 struct inode *local_alloc_inode = NULL; 373 struct buffer_head *bh = NULL; 374 struct buffer_head *main_bm_bh = NULL; 375 struct inode *main_bm_inode = NULL; 376 struct ocfs2_dinode *alloc_copy = NULL; 377 struct ocfs2_dinode *alloc = NULL; 378 379 cancel_delayed_work(&osb->la_enable_wq); 380 if (osb->ocfs2_wq) 381 flush_workqueue(osb->ocfs2_wq); 382 383 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 384 goto out; 385 386 local_alloc_inode = 387 ocfs2_get_system_file_inode(osb, 388 LOCAL_ALLOC_SYSTEM_INODE, 389 osb->slot_num); 390 if (!local_alloc_inode) { 391 status = -ENOENT; 392 mlog_errno(status); 393 goto out; 394 } 395 396 osb->local_alloc_state = OCFS2_LA_DISABLED; 397 398 ocfs2_resmap_uninit(&osb->osb_la_resmap); 399 400 main_bm_inode = ocfs2_get_system_file_inode(osb, 401 GLOBAL_BITMAP_SYSTEM_INODE, 402 OCFS2_INVALID_SLOT); 403 if (!main_bm_inode) { 404 status = -EINVAL; 405 mlog_errno(status); 406 goto out; 407 } 408 409 inode_lock(main_bm_inode); 410 411 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 412 if (status < 0) { 413 mlog_errno(status); 414 goto out_mutex; 415 } 416 417 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 418 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 419 if (IS_ERR(handle)) { 420 mlog_errno(PTR_ERR(handle)); 421 handle = NULL; 422 goto out_unlock; 423 } 424 425 bh = osb->local_alloc_bh; 426 alloc = (struct ocfs2_dinode *) bh->b_data; 427 428 alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); 429 if (!alloc_copy) { 430 status = -ENOMEM; 431 goto out_commit; 432 } 433 434 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 435 bh, OCFS2_JOURNAL_ACCESS_WRITE); 436 if (status < 0) { 437 mlog_errno(status); 438 goto out_commit; 439 } 440 441 ocfs2_clear_local_alloc(alloc); 442 ocfs2_journal_dirty(handle, bh); 443 444 brelse(bh); 445 osb->local_alloc_bh = NULL; 446 osb->local_alloc_state = OCFS2_LA_UNUSED; 447 448 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 449 main_bm_inode, main_bm_bh); 450 if (status < 0) 451 mlog_errno(status); 452 453 out_commit: 454 ocfs2_commit_trans(osb, handle); 455 456 out_unlock: 457 brelse(main_bm_bh); 458 459 ocfs2_inode_unlock(main_bm_inode, 1); 460 461 out_mutex: 462 inode_unlock(main_bm_inode); 463 iput(main_bm_inode); 464 465 out: 466 iput(local_alloc_inode); 467 468 kfree(alloc_copy); 469 } 470 471 /* 472 * We want to free the bitmap bits outside of any recovery context as 473 * we'll need a cluster lock to do so, but we must clear the local 474 * alloc before giving up the recovered nodes journal. To solve this, 475 * we kmalloc a copy of the local alloc before it's change for the 476 * caller to process with ocfs2_complete_local_alloc_recovery 477 */ 478 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 479 int slot_num, 480 struct ocfs2_dinode **alloc_copy) 481 { 482 int status = 0; 483 struct buffer_head *alloc_bh = NULL; 484 struct inode *inode = NULL; 485 struct ocfs2_dinode *alloc; 486 487 trace_ocfs2_begin_local_alloc_recovery(slot_num); 488 489 *alloc_copy = NULL; 490 491 inode = ocfs2_get_system_file_inode(osb, 492 LOCAL_ALLOC_SYSTEM_INODE, 493 slot_num); 494 if (!inode) { 495 status = -EINVAL; 496 mlog_errno(status); 497 goto bail; 498 } 499 500 inode_lock(inode); 501 502 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 503 OCFS2_BH_IGNORE_CACHE); 504 if (status < 0) { 505 mlog_errno(status); 506 goto bail; 507 } 508 509 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 510 if (!(*alloc_copy)) { 511 status = -ENOMEM; 512 goto bail; 513 } 514 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 515 516 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 517 ocfs2_clear_local_alloc(alloc); 518 519 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 520 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 521 if (status < 0) 522 mlog_errno(status); 523 524 bail: 525 if (status < 0) { 526 kfree(*alloc_copy); 527 *alloc_copy = NULL; 528 } 529 530 brelse(alloc_bh); 531 532 if (inode) { 533 inode_unlock(inode); 534 iput(inode); 535 } 536 537 if (status) 538 mlog_errno(status); 539 return status; 540 } 541 542 /* 543 * Step 2: By now, we've completed the journal recovery, we've stamped 544 * a clean local alloc on disk and dropped the node out of the 545 * recovery map. Dlm locks will no longer stall, so lets clear out the 546 * main bitmap. 547 */ 548 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 549 struct ocfs2_dinode *alloc) 550 { 551 int status; 552 handle_t *handle; 553 struct buffer_head *main_bm_bh = NULL; 554 struct inode *main_bm_inode; 555 556 main_bm_inode = ocfs2_get_system_file_inode(osb, 557 GLOBAL_BITMAP_SYSTEM_INODE, 558 OCFS2_INVALID_SLOT); 559 if (!main_bm_inode) { 560 status = -EINVAL; 561 mlog_errno(status); 562 goto out; 563 } 564 565 inode_lock(main_bm_inode); 566 567 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 568 if (status < 0) { 569 mlog_errno(status); 570 goto out_mutex; 571 } 572 573 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 574 if (IS_ERR(handle)) { 575 status = PTR_ERR(handle); 576 handle = NULL; 577 mlog_errno(status); 578 goto out_unlock; 579 } 580 581 /* we want the bitmap change to be recorded on disk asap */ 582 handle->h_sync = 1; 583 584 status = ocfs2_sync_local_to_main(osb, handle, alloc, 585 main_bm_inode, main_bm_bh); 586 if (status < 0) 587 mlog_errno(status); 588 589 ocfs2_commit_trans(osb, handle); 590 591 out_unlock: 592 ocfs2_inode_unlock(main_bm_inode, 1); 593 594 out_mutex: 595 inode_unlock(main_bm_inode); 596 597 brelse(main_bm_bh); 598 599 iput(main_bm_inode); 600 601 out: 602 if (!status) 603 ocfs2_init_steal_slots(osb); 604 if (status) 605 mlog_errno(status); 606 return status; 607 } 608 609 /* 610 * make sure we've got at least bits_wanted contiguous bits in the 611 * local alloc. You lose them when you drop i_mutex. 612 * 613 * We will add ourselves to the transaction passed in, but may start 614 * our own in order to shift windows. 615 */ 616 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 617 u32 bits_wanted, 618 struct ocfs2_alloc_context *ac) 619 { 620 int status; 621 struct ocfs2_dinode *alloc; 622 struct inode *local_alloc_inode; 623 unsigned int free_bits; 624 625 BUG_ON(!ac); 626 627 local_alloc_inode = 628 ocfs2_get_system_file_inode(osb, 629 LOCAL_ALLOC_SYSTEM_INODE, 630 osb->slot_num); 631 if (!local_alloc_inode) { 632 status = -ENOENT; 633 mlog_errno(status); 634 goto bail; 635 } 636 637 inode_lock(local_alloc_inode); 638 639 /* 640 * We must double check state and allocator bits because 641 * another process may have changed them while holding i_mutex. 642 */ 643 spin_lock(&osb->osb_lock); 644 if (!ocfs2_la_state_enabled(osb) || 645 (bits_wanted > osb->local_alloc_bits)) { 646 spin_unlock(&osb->osb_lock); 647 status = -ENOSPC; 648 goto bail; 649 } 650 spin_unlock(&osb->osb_lock); 651 652 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 653 654 #ifdef CONFIG_OCFS2_DEBUG_FS 655 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 656 ocfs2_local_alloc_count_bits(alloc)) { 657 status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", 658 (unsigned long long)le64_to_cpu(alloc->i_blkno), 659 le32_to_cpu(alloc->id1.bitmap1.i_used), 660 ocfs2_local_alloc_count_bits(alloc)); 661 goto bail; 662 } 663 #endif 664 665 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 666 le32_to_cpu(alloc->id1.bitmap1.i_used); 667 if (bits_wanted > free_bits) { 668 /* uhoh, window change time. */ 669 status = 670 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 671 if (status < 0) { 672 if (status != -ENOSPC) 673 mlog_errno(status); 674 goto bail; 675 } 676 677 /* 678 * Under certain conditions, the window slide code 679 * might have reduced the number of bits available or 680 * disabled the local alloc entirely. Re-check 681 * here and return -ENOSPC if necessary. 682 */ 683 status = -ENOSPC; 684 if (!ocfs2_la_state_enabled(osb)) 685 goto bail; 686 687 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 688 le32_to_cpu(alloc->id1.bitmap1.i_used); 689 if (bits_wanted > free_bits) 690 goto bail; 691 } 692 693 ac->ac_inode = local_alloc_inode; 694 /* We should never use localalloc from another slot */ 695 ac->ac_alloc_slot = osb->slot_num; 696 ac->ac_which = OCFS2_AC_USE_LOCAL; 697 get_bh(osb->local_alloc_bh); 698 ac->ac_bh = osb->local_alloc_bh; 699 status = 0; 700 bail: 701 if (status < 0 && local_alloc_inode) { 702 inode_unlock(local_alloc_inode); 703 iput(local_alloc_inode); 704 } 705 706 trace_ocfs2_reserve_local_alloc_bits( 707 (unsigned long long)ac->ac_max_block, 708 bits_wanted, osb->slot_num, status); 709 710 if (status) 711 mlog_errno(status); 712 return status; 713 } 714 715 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 716 handle_t *handle, 717 struct ocfs2_alloc_context *ac, 718 u32 bits_wanted, 719 u32 *bit_off, 720 u32 *num_bits) 721 { 722 int status, start; 723 struct inode *local_alloc_inode; 724 void *bitmap; 725 struct ocfs2_dinode *alloc; 726 struct ocfs2_local_alloc *la; 727 728 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 729 730 local_alloc_inode = ac->ac_inode; 731 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 732 la = OCFS2_LOCAL_ALLOC(alloc); 733 734 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, 735 ac->ac_resv); 736 if (start == -1) { 737 /* TODO: Shouldn't we just BUG here? */ 738 status = -ENOSPC; 739 mlog_errno(status); 740 goto bail; 741 } 742 743 bitmap = la->la_bitmap; 744 *bit_off = le32_to_cpu(la->la_bm_off) + start; 745 *num_bits = bits_wanted; 746 747 status = ocfs2_journal_access_di(handle, 748 INODE_CACHE(local_alloc_inode), 749 osb->local_alloc_bh, 750 OCFS2_JOURNAL_ACCESS_WRITE); 751 if (status < 0) { 752 mlog_errno(status); 753 goto bail; 754 } 755 756 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, 757 bits_wanted); 758 759 while(bits_wanted--) 760 ocfs2_set_bit(start++, bitmap); 761 762 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 763 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 764 765 bail: 766 if (status) 767 mlog_errno(status); 768 return status; 769 } 770 771 int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, 772 handle_t *handle, 773 struct ocfs2_alloc_context *ac, 774 u32 bit_off, 775 u32 num_bits) 776 { 777 int status, start; 778 u32 clear_bits; 779 struct inode *local_alloc_inode; 780 void *bitmap; 781 struct ocfs2_dinode *alloc; 782 struct ocfs2_local_alloc *la; 783 784 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 785 786 local_alloc_inode = ac->ac_inode; 787 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 788 la = OCFS2_LOCAL_ALLOC(alloc); 789 790 bitmap = la->la_bitmap; 791 start = bit_off - le32_to_cpu(la->la_bm_off); 792 clear_bits = num_bits; 793 794 status = ocfs2_journal_access_di(handle, 795 INODE_CACHE(local_alloc_inode), 796 osb->local_alloc_bh, 797 OCFS2_JOURNAL_ACCESS_WRITE); 798 if (status < 0) { 799 mlog_errno(status); 800 goto bail; 801 } 802 803 while (clear_bits--) 804 ocfs2_clear_bit(start++, bitmap); 805 806 le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); 807 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 808 809 bail: 810 return status; 811 } 812 813 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 814 { 815 u32 count; 816 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 817 818 count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); 819 820 trace_ocfs2_local_alloc_count_bits(count); 821 return count; 822 } 823 824 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 825 struct ocfs2_dinode *alloc, 826 u32 *numbits, 827 struct ocfs2_alloc_reservation *resv) 828 { 829 int numfound = 0, bitoff, left, startoff; 830 int local_resv = 0; 831 struct ocfs2_alloc_reservation r; 832 void *bitmap = NULL; 833 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 834 835 if (!alloc->id1.bitmap1.i_total) { 836 bitoff = -1; 837 goto bail; 838 } 839 840 if (!resv) { 841 local_resv = 1; 842 ocfs2_resv_init_once(&r); 843 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); 844 resv = &r; 845 } 846 847 numfound = *numbits; 848 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { 849 if (numfound < *numbits) 850 *numbits = numfound; 851 goto bail; 852 } 853 854 /* 855 * Code error. While reservations are enabled, local 856 * allocation should _always_ go through them. 857 */ 858 BUG_ON(osb->osb_resv_level != 0); 859 860 /* 861 * Reservations are disabled. Handle this the old way. 862 */ 863 864 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 865 866 numfound = bitoff = startoff = 0; 867 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 868 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 869 if (bitoff == left) { 870 /* mlog(0, "bitoff (%d) == left", bitoff); */ 871 break; 872 } 873 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 874 "numfound = %d\n", bitoff, startoff, numfound);*/ 875 876 /* Ok, we found a zero bit... is it contig. or do we 877 * start over?*/ 878 if (bitoff == startoff) { 879 /* we found a zero */ 880 numfound++; 881 startoff++; 882 } else { 883 /* got a zero after some ones */ 884 numfound = 1; 885 startoff = bitoff+1; 886 } 887 /* we got everything we needed */ 888 if (numfound == *numbits) { 889 /* mlog(0, "Found it all!\n"); */ 890 break; 891 } 892 } 893 894 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); 895 896 if (numfound == *numbits) 897 bitoff = startoff - numfound; 898 else 899 bitoff = -1; 900 901 bail: 902 if (local_resv) 903 ocfs2_resv_discard(resmap, resv); 904 905 trace_ocfs2_local_alloc_find_clear_bits(*numbits, 906 le32_to_cpu(alloc->id1.bitmap1.i_total), 907 bitoff, numfound); 908 909 return bitoff; 910 } 911 912 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 913 { 914 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 915 int i; 916 917 alloc->id1.bitmap1.i_total = 0; 918 alloc->id1.bitmap1.i_used = 0; 919 la->la_bm_off = 0; 920 for(i = 0; i < le16_to_cpu(la->la_size); i++) 921 la->la_bitmap[i] = 0; 922 } 923 924 #if 0 925 /* turn this on and uncomment below to aid debugging window shifts. */ 926 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 927 unsigned int start, 928 unsigned int count) 929 { 930 unsigned int tmp = count; 931 while(tmp--) { 932 if (ocfs2_test_bit(start + tmp, bitmap)) { 933 printk("ocfs2_verify_zero_bits: start = %u, count = " 934 "%u\n", start, count); 935 printk("ocfs2_verify_zero_bits: bit %u is set!", 936 start + tmp); 937 BUG(); 938 } 939 } 940 } 941 #endif 942 943 /* 944 * sync the local alloc to main bitmap. 945 * 946 * assumes you've already locked the main bitmap -- the bitmap inode 947 * passed is used for caching. 948 */ 949 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 950 handle_t *handle, 951 struct ocfs2_dinode *alloc, 952 struct inode *main_bm_inode, 953 struct buffer_head *main_bm_bh) 954 { 955 int status = 0; 956 int bit_off, left, count, start; 957 u64 la_start_blk; 958 u64 blkno; 959 void *bitmap; 960 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 961 962 trace_ocfs2_sync_local_to_main( 963 le32_to_cpu(alloc->id1.bitmap1.i_total), 964 le32_to_cpu(alloc->id1.bitmap1.i_used)); 965 966 if (!alloc->id1.bitmap1.i_total) { 967 goto bail; 968 } 969 970 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 971 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 972 goto bail; 973 } 974 975 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 976 le32_to_cpu(la->la_bm_off)); 977 bitmap = la->la_bitmap; 978 start = count = bit_off = 0; 979 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 980 981 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 982 != -1) { 983 if ((bit_off < left) && (bit_off == start)) { 984 count++; 985 start++; 986 continue; 987 } 988 if (count) { 989 blkno = la_start_blk + 990 ocfs2_clusters_to_blocks(osb->sb, 991 start - count); 992 993 trace_ocfs2_sync_local_to_main_free( 994 count, start - count, 995 (unsigned long long)la_start_blk, 996 (unsigned long long)blkno); 997 998 status = ocfs2_release_clusters(handle, 999 main_bm_inode, 1000 main_bm_bh, blkno, 1001 count); 1002 if (status < 0) { 1003 mlog_errno(status); 1004 goto bail; 1005 } 1006 } 1007 if (bit_off >= left) 1008 break; 1009 count = 1; 1010 start = bit_off + 1; 1011 } 1012 1013 bail: 1014 if (status) 1015 mlog_errno(status); 1016 return status; 1017 } 1018 1019 enum ocfs2_la_event { 1020 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 1021 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 1022 * enough bits theoretically 1023 * free, but a contiguous 1024 * allocation could not be 1025 * found. */ 1026 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 1027 * enough bits free to satisfy 1028 * our request. */ 1029 }; 1030 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 1031 /* 1032 * Given an event, calculate the size of our next local alloc window. 1033 * 1034 * This should always be called under i_mutex of the local alloc inode 1035 * so that local alloc disabling doesn't race with processes trying to 1036 * use the allocator. 1037 * 1038 * Returns the state which the local alloc was left in. This value can 1039 * be ignored by some paths. 1040 */ 1041 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 1042 enum ocfs2_la_event event) 1043 { 1044 unsigned int bits; 1045 int state; 1046 1047 spin_lock(&osb->osb_lock); 1048 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 1049 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 1050 goto out_unlock; 1051 } 1052 1053 /* 1054 * ENOSPC and fragmentation are treated similarly for now. 1055 */ 1056 if (event == OCFS2_LA_EVENT_ENOSPC || 1057 event == OCFS2_LA_EVENT_FRAGMENTED) { 1058 /* 1059 * We ran out of contiguous space in the primary 1060 * bitmap. Drastically reduce the number of bits used 1061 * by local alloc until we have to disable it. 1062 */ 1063 bits = osb->local_alloc_bits >> 1; 1064 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 1065 /* 1066 * By setting state to THROTTLED, we'll keep 1067 * the number of local alloc bits used down 1068 * until an event occurs which would give us 1069 * reason to assume the bitmap situation might 1070 * have changed. 1071 */ 1072 osb->local_alloc_state = OCFS2_LA_THROTTLED; 1073 osb->local_alloc_bits = bits; 1074 } else { 1075 osb->local_alloc_state = OCFS2_LA_DISABLED; 1076 } 1077 queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, 1078 OCFS2_LA_ENABLE_INTERVAL); 1079 goto out_unlock; 1080 } 1081 1082 /* 1083 * Don't increase the size of the local alloc window until we 1084 * know we might be able to fulfill the request. Otherwise, we 1085 * risk bouncing around the global bitmap during periods of 1086 * low space. 1087 */ 1088 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 1089 osb->local_alloc_bits = osb->local_alloc_default_bits; 1090 1091 out_unlock: 1092 state = osb->local_alloc_state; 1093 spin_unlock(&osb->osb_lock); 1094 1095 return state; 1096 } 1097 1098 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1099 struct ocfs2_alloc_context **ac, 1100 struct inode **bitmap_inode, 1101 struct buffer_head **bitmap_bh) 1102 { 1103 int status; 1104 1105 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1106 if (!(*ac)) { 1107 status = -ENOMEM; 1108 mlog_errno(status); 1109 goto bail; 1110 } 1111 1112 retry_enospc: 1113 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 1114 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1115 if (status == -ENOSPC) { 1116 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1117 OCFS2_LA_DISABLED) 1118 goto bail; 1119 1120 ocfs2_free_ac_resource(*ac); 1121 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 1122 goto retry_enospc; 1123 } 1124 if (status < 0) { 1125 mlog_errno(status); 1126 goto bail; 1127 } 1128 1129 *bitmap_inode = (*ac)->ac_inode; 1130 igrab(*bitmap_inode); 1131 *bitmap_bh = (*ac)->ac_bh; 1132 get_bh(*bitmap_bh); 1133 status = 0; 1134 bail: 1135 if ((status < 0) && *ac) { 1136 ocfs2_free_alloc_context(*ac); 1137 *ac = NULL; 1138 } 1139 1140 if (status) 1141 mlog_errno(status); 1142 return status; 1143 } 1144 1145 /* 1146 * pass it the bitmap lock in lock_bh if you have it. 1147 */ 1148 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1149 handle_t *handle, 1150 struct ocfs2_alloc_context *ac) 1151 { 1152 int status = 0; 1153 u32 cluster_off, cluster_count; 1154 struct ocfs2_dinode *alloc = NULL; 1155 struct ocfs2_local_alloc *la; 1156 1157 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1158 la = OCFS2_LOCAL_ALLOC(alloc); 1159 1160 trace_ocfs2_local_alloc_new_window( 1161 le32_to_cpu(alloc->id1.bitmap1.i_total), 1162 osb->local_alloc_bits); 1163 1164 /* Instruct the allocation code to try the most recently used 1165 * cluster group. We'll re-record the group used this pass 1166 * below. */ 1167 ac->ac_last_group = osb->la_last_gd; 1168 1169 /* we used the generic suballoc reserve function, but we set 1170 * everything up nicely, so there's no reason why we can't use 1171 * the more specific cluster api to claim bits. */ 1172 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, 1173 &cluster_off, &cluster_count); 1174 if (status == -ENOSPC) { 1175 retry_enospc: 1176 /* 1177 * Note: We could also try syncing the journal here to 1178 * allow use of any free bits which the current 1179 * transaction can't give us access to. --Mark 1180 */ 1181 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1182 OCFS2_LA_DISABLED) 1183 goto bail; 1184 1185 ac->ac_bits_wanted = osb->local_alloc_bits; 1186 status = ocfs2_claim_clusters(handle, ac, 1187 osb->local_alloc_bits, 1188 &cluster_off, 1189 &cluster_count); 1190 if (status == -ENOSPC) 1191 goto retry_enospc; 1192 /* 1193 * We only shrunk the *minimum* number of in our 1194 * request - it's entirely possible that the allocator 1195 * might give us more than we asked for. 1196 */ 1197 if (status == 0) { 1198 spin_lock(&osb->osb_lock); 1199 osb->local_alloc_bits = cluster_count; 1200 spin_unlock(&osb->osb_lock); 1201 } 1202 } 1203 if (status < 0) { 1204 if (status != -ENOSPC) 1205 mlog_errno(status); 1206 goto bail; 1207 } 1208 1209 osb->la_last_gd = ac->ac_last_group; 1210 1211 la->la_bm_off = cpu_to_le32(cluster_off); 1212 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1213 /* just in case... In the future when we find space ourselves, 1214 * we don't have to get all contiguous -- but we'll have to 1215 * set all previously used bits in bitmap and update 1216 * la_bits_set before setting the bits in the main bitmap. */ 1217 alloc->id1.bitmap1.i_used = 0; 1218 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1219 le16_to_cpu(la->la_size)); 1220 1221 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1222 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1223 1224 trace_ocfs2_local_alloc_new_window_result( 1225 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off, 1226 le32_to_cpu(alloc->id1.bitmap1.i_total)); 1227 1228 bail: 1229 if (status) 1230 mlog_errno(status); 1231 return status; 1232 } 1233 1234 /* Note that we do *NOT* lock the local alloc inode here as 1235 * it's been locked already for us. */ 1236 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1237 struct inode *local_alloc_inode) 1238 { 1239 int status = 0; 1240 struct buffer_head *main_bm_bh = NULL; 1241 struct inode *main_bm_inode = NULL; 1242 handle_t *handle = NULL; 1243 struct ocfs2_dinode *alloc; 1244 struct ocfs2_dinode *alloc_copy = NULL; 1245 struct ocfs2_alloc_context *ac = NULL; 1246 1247 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1248 1249 /* This will lock the main bitmap for us. */ 1250 status = ocfs2_local_alloc_reserve_for_window(osb, 1251 &ac, 1252 &main_bm_inode, 1253 &main_bm_bh); 1254 if (status < 0) { 1255 if (status != -ENOSPC) 1256 mlog_errno(status); 1257 goto bail; 1258 } 1259 1260 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1261 if (IS_ERR(handle)) { 1262 status = PTR_ERR(handle); 1263 handle = NULL; 1264 mlog_errno(status); 1265 goto bail; 1266 } 1267 1268 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1269 1270 /* We want to clear the local alloc before doing anything 1271 * else, so that if we error later during this operation, 1272 * local alloc shutdown won't try to double free main bitmap 1273 * bits. Make a copy so the sync function knows which bits to 1274 * free. */ 1275 alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); 1276 if (!alloc_copy) { 1277 status = -ENOMEM; 1278 mlog_errno(status); 1279 goto bail; 1280 } 1281 1282 status = ocfs2_journal_access_di(handle, 1283 INODE_CACHE(local_alloc_inode), 1284 osb->local_alloc_bh, 1285 OCFS2_JOURNAL_ACCESS_WRITE); 1286 if (status < 0) { 1287 mlog_errno(status); 1288 goto bail; 1289 } 1290 1291 ocfs2_clear_local_alloc(alloc); 1292 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1293 1294 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1295 main_bm_inode, main_bm_bh); 1296 if (status < 0) { 1297 mlog_errno(status); 1298 goto bail; 1299 } 1300 1301 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1302 if (status < 0) { 1303 if (status != -ENOSPC) 1304 mlog_errno(status); 1305 goto bail; 1306 } 1307 1308 atomic_inc(&osb->alloc_stats.moves); 1309 1310 bail: 1311 if (handle) 1312 ocfs2_commit_trans(osb, handle); 1313 1314 brelse(main_bm_bh); 1315 1316 iput(main_bm_inode); 1317 kfree(alloc_copy); 1318 1319 if (ac) 1320 ocfs2_free_alloc_context(ac); 1321 1322 if (status) 1323 mlog_errno(status); 1324 return status; 1325 } 1326 1327