1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * localalloc.c 6 * 7 * Node local data allocation 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 */ 11 12 #include <linux/fs.h> 13 #include <linux/types.h> 14 #include <linux/slab.h> 15 #include <linux/highmem.h> 16 #include <linux/bitops.h> 17 18 #include <cluster/masklog.h> 19 20 #include "ocfs2.h" 21 22 #include "alloc.h" 23 #include "blockcheck.h" 24 #include "dlmglue.h" 25 #include "inode.h" 26 #include "journal.h" 27 #include "localalloc.h" 28 #include "suballoc.h" 29 #include "super.h" 30 #include "sysfile.h" 31 #include "ocfs2_trace.h" 32 33 #include "buffer_head_io.h" 34 35 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 36 37 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 38 39 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 40 struct ocfs2_dinode *alloc, 41 u32 *numbits, 42 struct ocfs2_alloc_reservation *resv); 43 44 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 45 46 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 47 handle_t *handle, 48 struct ocfs2_dinode *alloc, 49 struct inode *main_bm_inode, 50 struct buffer_head *main_bm_bh); 51 52 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 53 struct ocfs2_alloc_context **ac, 54 struct inode **bitmap_inode, 55 struct buffer_head **bitmap_bh); 56 57 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 58 handle_t *handle, 59 struct ocfs2_alloc_context *ac); 60 61 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 62 struct inode *local_alloc_inode); 63 64 /* 65 * ocfs2_la_default_mb() - determine a default size, in megabytes of 66 * the local alloc. 67 * 68 * Generally, we'd like to pick as large a local alloc as 69 * possible. Performance on large workloads tends to scale 70 * proportionally to la size. In addition to that, the reservations 71 * code functions more efficiently as it can reserve more windows for 72 * write. 73 * 74 * Some things work against us when trying to choose a large local alloc: 75 * 76 * - We need to ensure our sizing is picked to leave enough space in 77 * group descriptors for other allocations (such as block groups, 78 * etc). Picking default sizes which are a multiple of 4 could help 79 * - block groups are allocated in 2mb and 4mb chunks. 80 * 81 * - Likewise, we don't want to starve other nodes of bits on small 82 * file systems. This can easily be taken care of by limiting our 83 * default to a reasonable size (256M) on larger cluster sizes. 84 * 85 * - Some file systems can't support very large sizes - 4k and 8k in 86 * particular are limited to less than 128 and 256 megabytes respectively. 87 * 88 * The following reference table shows group descriptor and local 89 * alloc maximums at various cluster sizes (4k blocksize) 90 * 91 * csize: 4K group: 126M la: 121M 92 * csize: 8K group: 252M la: 243M 93 * csize: 16K group: 504M la: 486M 94 * csize: 32K group: 1008M la: 972M 95 * csize: 64K group: 2016M la: 1944M 96 * csize: 128K group: 4032M la: 3888M 97 * csize: 256K group: 8064M la: 7776M 98 * csize: 512K group: 16128M la: 15552M 99 * csize: 1024K group: 32256M la: 31104M 100 */ 101 #define OCFS2_LA_MAX_DEFAULT_MB 256 102 #define OCFS2_LA_OLD_DEFAULT 8 103 unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) 104 { 105 unsigned int la_mb; 106 unsigned int gd_mb; 107 unsigned int la_max_mb; 108 unsigned int megs_per_slot; 109 struct super_block *sb = osb->sb; 110 111 gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 112 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); 113 114 /* 115 * This takes care of files systems with very small group 116 * descriptors - 512 byte blocksize at cluster sizes lower 117 * than 16K and also 1k blocksize with 4k cluster size. 118 */ 119 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) 120 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) 121 return OCFS2_LA_OLD_DEFAULT; 122 123 /* 124 * Leave enough room for some block groups and make the final 125 * value we work from a multiple of 4. 126 */ 127 gd_mb -= 16; 128 gd_mb &= 0xFFFFFFFB; 129 130 la_mb = gd_mb; 131 132 /* 133 * Keep window sizes down to a reasonable default 134 */ 135 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { 136 /* 137 * Some clustersize / blocksize combinations will have 138 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB 139 * default size, but get poor distribution when 140 * limited to exactly 256 megabytes. 141 * 142 * As an example, 16K clustersize at 4K blocksize 143 * gives us a cluster group size of 504M. Paring the 144 * local alloc size down to 256 however, would give us 145 * only one window and around 200MB left in the 146 * cluster group. Instead, find the first size below 147 * 256 which would give us an even distribution. 148 * 149 * Larger cluster group sizes actually work out pretty 150 * well when pared to 256, so we don't have to do this 151 * for any group that fits more than two 152 * OCFS2_LA_MAX_DEFAULT_MB windows. 153 */ 154 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) 155 la_mb = 256; 156 else { 157 unsigned int gd_mult = gd_mb; 158 159 while (gd_mult > 256) 160 gd_mult = gd_mult >> 1; 161 162 la_mb = gd_mult; 163 } 164 } 165 166 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; 167 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); 168 /* Too many nodes, too few disk clusters. */ 169 if (megs_per_slot < la_mb) 170 la_mb = megs_per_slot; 171 172 /* We can't store more bits than we can in a block. */ 173 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, 174 ocfs2_local_alloc_size(sb) * 8); 175 if (la_mb > la_max_mb) 176 la_mb = la_max_mb; 177 178 return la_mb; 179 } 180 181 void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 182 { 183 struct super_block *sb = osb->sb; 184 unsigned int la_default_mb = ocfs2_la_default_mb(osb); 185 unsigned int la_max_mb; 186 187 la_max_mb = ocfs2_clusters_to_megabytes(sb, 188 ocfs2_local_alloc_size(sb) * 8); 189 190 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); 191 192 if (requested_mb == -1) { 193 /* No user request - use defaults */ 194 osb->local_alloc_default_bits = 195 ocfs2_megabytes_to_clusters(sb, la_default_mb); 196 } else if (requested_mb > la_max_mb) { 197 /* Request is too big, we give the maximum available */ 198 osb->local_alloc_default_bits = 199 ocfs2_megabytes_to_clusters(sb, la_max_mb); 200 } else { 201 osb->local_alloc_default_bits = 202 ocfs2_megabytes_to_clusters(sb, requested_mb); 203 } 204 205 osb->local_alloc_bits = osb->local_alloc_default_bits; 206 } 207 208 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 209 { 210 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 211 osb->local_alloc_state == OCFS2_LA_ENABLED); 212 } 213 214 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 215 unsigned int num_clusters) 216 { 217 spin_lock(&osb->osb_lock); 218 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 219 osb->local_alloc_state == OCFS2_LA_THROTTLED) 220 if (num_clusters >= osb->local_alloc_default_bits) { 221 cancel_delayed_work(&osb->la_enable_wq); 222 osb->local_alloc_state = OCFS2_LA_ENABLED; 223 } 224 spin_unlock(&osb->osb_lock); 225 } 226 227 void ocfs2_la_enable_worker(struct work_struct *work) 228 { 229 struct ocfs2_super *osb = 230 container_of(work, struct ocfs2_super, 231 la_enable_wq.work); 232 spin_lock(&osb->osb_lock); 233 osb->local_alloc_state = OCFS2_LA_ENABLED; 234 spin_unlock(&osb->osb_lock); 235 } 236 237 /* 238 * Tell us whether a given allocation should use the local alloc 239 * file. Otherwise, it has to go to the main bitmap. 240 * 241 * This function does semi-dirty reads of local alloc size and state! 242 * This is ok however, as the values are re-checked once under mutex. 243 */ 244 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 245 { 246 int ret = 0; 247 int la_bits; 248 249 spin_lock(&osb->osb_lock); 250 la_bits = osb->local_alloc_bits; 251 252 if (!ocfs2_la_state_enabled(osb)) 253 goto bail; 254 255 /* la_bits should be at least twice the size (in clusters) of 256 * a new block group. We want to be sure block group 257 * allocations go through the local alloc, so allow an 258 * allocation to take up to half the bitmap. */ 259 if (bits > (la_bits / 2)) 260 goto bail; 261 262 ret = 1; 263 bail: 264 trace_ocfs2_alloc_should_use_local( 265 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); 266 spin_unlock(&osb->osb_lock); 267 return ret; 268 } 269 270 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 271 { 272 int status = 0; 273 struct ocfs2_dinode *alloc = NULL; 274 struct buffer_head *alloc_bh = NULL; 275 u32 num_used; 276 struct inode *inode = NULL; 277 struct ocfs2_local_alloc *la; 278 279 if (osb->local_alloc_bits == 0) 280 goto bail; 281 282 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 283 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 284 "than max possible %u. Using defaults.\n", 285 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 286 osb->local_alloc_bits = 287 ocfs2_megabytes_to_clusters(osb->sb, 288 ocfs2_la_default_mb(osb)); 289 } 290 291 /* read the alloc off disk */ 292 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 293 osb->slot_num); 294 if (!inode) { 295 status = -EINVAL; 296 mlog_errno(status); 297 goto bail; 298 } 299 300 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 301 OCFS2_BH_IGNORE_CACHE); 302 if (status < 0) { 303 mlog_errno(status); 304 goto bail; 305 } 306 307 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 308 la = OCFS2_LOCAL_ALLOC(alloc); 309 310 if (!(le32_to_cpu(alloc->i_flags) & 311 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 312 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 313 (unsigned long long)OCFS2_I(inode)->ip_blkno); 314 status = -EINVAL; 315 goto bail; 316 } 317 318 if ((la->la_size == 0) || 319 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 320 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 321 le16_to_cpu(la->la_size)); 322 status = -EINVAL; 323 goto bail; 324 } 325 326 /* do a little verification. */ 327 num_used = ocfs2_local_alloc_count_bits(alloc); 328 329 /* hopefully the local alloc has always been recovered before 330 * we load it. */ 331 if (num_used 332 || alloc->id1.bitmap1.i_used 333 || alloc->id1.bitmap1.i_total 334 || la->la_bm_off) { 335 mlog(ML_ERROR, "inconsistent detected, clean journal with" 336 " unrecovered local alloc, please run fsck.ocfs2!\n" 337 "found = %u, set = %u, taken = %u, off = %u\n", 338 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 339 le32_to_cpu(alloc->id1.bitmap1.i_total), 340 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 341 342 status = -EINVAL; 343 goto bail; 344 } 345 346 osb->local_alloc_bh = alloc_bh; 347 osb->local_alloc_state = OCFS2_LA_ENABLED; 348 349 bail: 350 if (status < 0) 351 brelse(alloc_bh); 352 iput(inode); 353 354 trace_ocfs2_load_local_alloc(osb->local_alloc_bits); 355 356 if (status) 357 mlog_errno(status); 358 return status; 359 } 360 361 /* 362 * return any unused bits to the bitmap and write out a clean 363 * local_alloc. 364 * 365 * local_alloc_bh is optional. If not passed, we will simply use the 366 * one off osb. If you do pass it however, be warned that it *will* be 367 * returned brelse'd and NULL'd out.*/ 368 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 369 { 370 int status; 371 handle_t *handle; 372 struct inode *local_alloc_inode = NULL; 373 struct buffer_head *bh = NULL; 374 struct buffer_head *main_bm_bh = NULL; 375 struct inode *main_bm_inode = NULL; 376 struct ocfs2_dinode *alloc_copy = NULL; 377 struct ocfs2_dinode *alloc = NULL; 378 379 cancel_delayed_work(&osb->la_enable_wq); 380 flush_workqueue(osb->ocfs2_wq); 381 382 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 383 goto out; 384 385 local_alloc_inode = 386 ocfs2_get_system_file_inode(osb, 387 LOCAL_ALLOC_SYSTEM_INODE, 388 osb->slot_num); 389 if (!local_alloc_inode) { 390 status = -ENOENT; 391 mlog_errno(status); 392 goto out; 393 } 394 395 osb->local_alloc_state = OCFS2_LA_DISABLED; 396 397 ocfs2_resmap_uninit(&osb->osb_la_resmap); 398 399 main_bm_inode = ocfs2_get_system_file_inode(osb, 400 GLOBAL_BITMAP_SYSTEM_INODE, 401 OCFS2_INVALID_SLOT); 402 if (!main_bm_inode) { 403 status = -EINVAL; 404 mlog_errno(status); 405 goto out; 406 } 407 408 inode_lock(main_bm_inode); 409 410 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 411 if (status < 0) { 412 mlog_errno(status); 413 goto out_mutex; 414 } 415 416 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 417 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 418 if (IS_ERR(handle)) { 419 mlog_errno(PTR_ERR(handle)); 420 handle = NULL; 421 goto out_unlock; 422 } 423 424 bh = osb->local_alloc_bh; 425 alloc = (struct ocfs2_dinode *) bh->b_data; 426 427 alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); 428 if (!alloc_copy) { 429 status = -ENOMEM; 430 goto out_commit; 431 } 432 433 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 434 bh, OCFS2_JOURNAL_ACCESS_WRITE); 435 if (status < 0) { 436 mlog_errno(status); 437 goto out_commit; 438 } 439 440 ocfs2_clear_local_alloc(alloc); 441 ocfs2_journal_dirty(handle, bh); 442 443 brelse(bh); 444 osb->local_alloc_bh = NULL; 445 osb->local_alloc_state = OCFS2_LA_UNUSED; 446 447 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 448 main_bm_inode, main_bm_bh); 449 if (status < 0) 450 mlog_errno(status); 451 452 out_commit: 453 ocfs2_commit_trans(osb, handle); 454 455 out_unlock: 456 brelse(main_bm_bh); 457 458 ocfs2_inode_unlock(main_bm_inode, 1); 459 460 out_mutex: 461 inode_unlock(main_bm_inode); 462 iput(main_bm_inode); 463 464 out: 465 iput(local_alloc_inode); 466 467 kfree(alloc_copy); 468 } 469 470 /* 471 * We want to free the bitmap bits outside of any recovery context as 472 * we'll need a cluster lock to do so, but we must clear the local 473 * alloc before giving up the recovered nodes journal. To solve this, 474 * we kmalloc a copy of the local alloc before it's change for the 475 * caller to process with ocfs2_complete_local_alloc_recovery 476 */ 477 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 478 int slot_num, 479 struct ocfs2_dinode **alloc_copy) 480 { 481 int status = 0; 482 struct buffer_head *alloc_bh = NULL; 483 struct inode *inode = NULL; 484 struct ocfs2_dinode *alloc; 485 486 trace_ocfs2_begin_local_alloc_recovery(slot_num); 487 488 *alloc_copy = NULL; 489 490 inode = ocfs2_get_system_file_inode(osb, 491 LOCAL_ALLOC_SYSTEM_INODE, 492 slot_num); 493 if (!inode) { 494 status = -EINVAL; 495 mlog_errno(status); 496 goto bail; 497 } 498 499 inode_lock(inode); 500 501 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 502 OCFS2_BH_IGNORE_CACHE); 503 if (status < 0) { 504 mlog_errno(status); 505 goto bail; 506 } 507 508 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 509 if (!(*alloc_copy)) { 510 status = -ENOMEM; 511 goto bail; 512 } 513 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 514 515 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 516 ocfs2_clear_local_alloc(alloc); 517 518 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 519 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 520 if (status < 0) 521 mlog_errno(status); 522 523 bail: 524 if (status < 0) { 525 kfree(*alloc_copy); 526 *alloc_copy = NULL; 527 } 528 529 brelse(alloc_bh); 530 531 if (inode) { 532 inode_unlock(inode); 533 iput(inode); 534 } 535 536 if (status) 537 mlog_errno(status); 538 return status; 539 } 540 541 /* 542 * Step 2: By now, we've completed the journal recovery, we've stamped 543 * a clean local alloc on disk and dropped the node out of the 544 * recovery map. Dlm locks will no longer stall, so lets clear out the 545 * main bitmap. 546 */ 547 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 548 struct ocfs2_dinode *alloc) 549 { 550 int status; 551 handle_t *handle; 552 struct buffer_head *main_bm_bh = NULL; 553 struct inode *main_bm_inode; 554 555 main_bm_inode = ocfs2_get_system_file_inode(osb, 556 GLOBAL_BITMAP_SYSTEM_INODE, 557 OCFS2_INVALID_SLOT); 558 if (!main_bm_inode) { 559 status = -EINVAL; 560 mlog_errno(status); 561 goto out; 562 } 563 564 inode_lock(main_bm_inode); 565 566 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 567 if (status < 0) { 568 mlog_errno(status); 569 goto out_mutex; 570 } 571 572 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 573 if (IS_ERR(handle)) { 574 status = PTR_ERR(handle); 575 handle = NULL; 576 mlog_errno(status); 577 goto out_unlock; 578 } 579 580 /* we want the bitmap change to be recorded on disk asap */ 581 handle->h_sync = 1; 582 583 status = ocfs2_sync_local_to_main(osb, handle, alloc, 584 main_bm_inode, main_bm_bh); 585 if (status < 0) 586 mlog_errno(status); 587 588 ocfs2_commit_trans(osb, handle); 589 590 out_unlock: 591 ocfs2_inode_unlock(main_bm_inode, 1); 592 593 out_mutex: 594 inode_unlock(main_bm_inode); 595 596 brelse(main_bm_bh); 597 598 iput(main_bm_inode); 599 600 out: 601 if (!status) 602 ocfs2_init_steal_slots(osb); 603 if (status) 604 mlog_errno(status); 605 return status; 606 } 607 608 /* 609 * make sure we've got at least bits_wanted contiguous bits in the 610 * local alloc. You lose them when you drop i_mutex. 611 * 612 * We will add ourselves to the transaction passed in, but may start 613 * our own in order to shift windows. 614 */ 615 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 616 u32 bits_wanted, 617 struct ocfs2_alloc_context *ac) 618 { 619 int status; 620 struct ocfs2_dinode *alloc; 621 struct inode *local_alloc_inode; 622 unsigned int free_bits; 623 624 BUG_ON(!ac); 625 626 local_alloc_inode = 627 ocfs2_get_system_file_inode(osb, 628 LOCAL_ALLOC_SYSTEM_INODE, 629 osb->slot_num); 630 if (!local_alloc_inode) { 631 status = -ENOENT; 632 mlog_errno(status); 633 goto bail; 634 } 635 636 inode_lock(local_alloc_inode); 637 638 /* 639 * We must double check state and allocator bits because 640 * another process may have changed them while holding i_mutex. 641 */ 642 spin_lock(&osb->osb_lock); 643 if (!ocfs2_la_state_enabled(osb) || 644 (bits_wanted > osb->local_alloc_bits)) { 645 spin_unlock(&osb->osb_lock); 646 status = -ENOSPC; 647 goto bail; 648 } 649 spin_unlock(&osb->osb_lock); 650 651 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 652 653 #ifdef CONFIG_OCFS2_DEBUG_FS 654 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 655 ocfs2_local_alloc_count_bits(alloc)) { 656 status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", 657 (unsigned long long)le64_to_cpu(alloc->i_blkno), 658 le32_to_cpu(alloc->id1.bitmap1.i_used), 659 ocfs2_local_alloc_count_bits(alloc)); 660 goto bail; 661 } 662 #endif 663 664 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 665 le32_to_cpu(alloc->id1.bitmap1.i_used); 666 if (bits_wanted > free_bits) { 667 /* uhoh, window change time. */ 668 status = 669 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 670 if (status < 0) { 671 if (status != -ENOSPC) 672 mlog_errno(status); 673 goto bail; 674 } 675 676 /* 677 * Under certain conditions, the window slide code 678 * might have reduced the number of bits available or 679 * disabled the the local alloc entirely. Re-check 680 * here and return -ENOSPC if necessary. 681 */ 682 status = -ENOSPC; 683 if (!ocfs2_la_state_enabled(osb)) 684 goto bail; 685 686 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 687 le32_to_cpu(alloc->id1.bitmap1.i_used); 688 if (bits_wanted > free_bits) 689 goto bail; 690 } 691 692 ac->ac_inode = local_alloc_inode; 693 /* We should never use localalloc from another slot */ 694 ac->ac_alloc_slot = osb->slot_num; 695 ac->ac_which = OCFS2_AC_USE_LOCAL; 696 get_bh(osb->local_alloc_bh); 697 ac->ac_bh = osb->local_alloc_bh; 698 status = 0; 699 bail: 700 if (status < 0 && local_alloc_inode) { 701 inode_unlock(local_alloc_inode); 702 iput(local_alloc_inode); 703 } 704 705 trace_ocfs2_reserve_local_alloc_bits( 706 (unsigned long long)ac->ac_max_block, 707 bits_wanted, osb->slot_num, status); 708 709 if (status) 710 mlog_errno(status); 711 return status; 712 } 713 714 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 715 handle_t *handle, 716 struct ocfs2_alloc_context *ac, 717 u32 bits_wanted, 718 u32 *bit_off, 719 u32 *num_bits) 720 { 721 int status, start; 722 struct inode *local_alloc_inode; 723 void *bitmap; 724 struct ocfs2_dinode *alloc; 725 struct ocfs2_local_alloc *la; 726 727 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 728 729 local_alloc_inode = ac->ac_inode; 730 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 731 la = OCFS2_LOCAL_ALLOC(alloc); 732 733 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, 734 ac->ac_resv); 735 if (start == -1) { 736 /* TODO: Shouldn't we just BUG here? */ 737 status = -ENOSPC; 738 mlog_errno(status); 739 goto bail; 740 } 741 742 bitmap = la->la_bitmap; 743 *bit_off = le32_to_cpu(la->la_bm_off) + start; 744 *num_bits = bits_wanted; 745 746 status = ocfs2_journal_access_di(handle, 747 INODE_CACHE(local_alloc_inode), 748 osb->local_alloc_bh, 749 OCFS2_JOURNAL_ACCESS_WRITE); 750 if (status < 0) { 751 mlog_errno(status); 752 goto bail; 753 } 754 755 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, 756 bits_wanted); 757 758 while(bits_wanted--) 759 ocfs2_set_bit(start++, bitmap); 760 761 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 762 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 763 764 bail: 765 if (status) 766 mlog_errno(status); 767 return status; 768 } 769 770 int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, 771 handle_t *handle, 772 struct ocfs2_alloc_context *ac, 773 u32 bit_off, 774 u32 num_bits) 775 { 776 int status, start; 777 u32 clear_bits; 778 struct inode *local_alloc_inode; 779 void *bitmap; 780 struct ocfs2_dinode *alloc; 781 struct ocfs2_local_alloc *la; 782 783 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 784 785 local_alloc_inode = ac->ac_inode; 786 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 787 la = OCFS2_LOCAL_ALLOC(alloc); 788 789 bitmap = la->la_bitmap; 790 start = bit_off - le32_to_cpu(la->la_bm_off); 791 clear_bits = num_bits; 792 793 status = ocfs2_journal_access_di(handle, 794 INODE_CACHE(local_alloc_inode), 795 osb->local_alloc_bh, 796 OCFS2_JOURNAL_ACCESS_WRITE); 797 if (status < 0) { 798 mlog_errno(status); 799 goto bail; 800 } 801 802 while (clear_bits--) 803 ocfs2_clear_bit(start++, bitmap); 804 805 le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); 806 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 807 808 bail: 809 return status; 810 } 811 812 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 813 { 814 u32 count; 815 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 816 817 count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); 818 819 trace_ocfs2_local_alloc_count_bits(count); 820 return count; 821 } 822 823 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 824 struct ocfs2_dinode *alloc, 825 u32 *numbits, 826 struct ocfs2_alloc_reservation *resv) 827 { 828 int numfound = 0, bitoff, left, startoff; 829 int local_resv = 0; 830 struct ocfs2_alloc_reservation r; 831 void *bitmap = NULL; 832 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 833 834 if (!alloc->id1.bitmap1.i_total) { 835 bitoff = -1; 836 goto bail; 837 } 838 839 if (!resv) { 840 local_resv = 1; 841 ocfs2_resv_init_once(&r); 842 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); 843 resv = &r; 844 } 845 846 numfound = *numbits; 847 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { 848 if (numfound < *numbits) 849 *numbits = numfound; 850 goto bail; 851 } 852 853 /* 854 * Code error. While reservations are enabled, local 855 * allocation should _always_ go through them. 856 */ 857 BUG_ON(osb->osb_resv_level != 0); 858 859 /* 860 * Reservations are disabled. Handle this the old way. 861 */ 862 863 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 864 865 numfound = bitoff = startoff = 0; 866 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 867 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 868 if (bitoff == left) { 869 /* mlog(0, "bitoff (%d) == left", bitoff); */ 870 break; 871 } 872 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 873 "numfound = %d\n", bitoff, startoff, numfound);*/ 874 875 /* Ok, we found a zero bit... is it contig. or do we 876 * start over?*/ 877 if (bitoff == startoff) { 878 /* we found a zero */ 879 numfound++; 880 startoff++; 881 } else { 882 /* got a zero after some ones */ 883 numfound = 1; 884 startoff = bitoff+1; 885 } 886 /* we got everything we needed */ 887 if (numfound == *numbits) { 888 /* mlog(0, "Found it all!\n"); */ 889 break; 890 } 891 } 892 893 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); 894 895 if (numfound == *numbits) 896 bitoff = startoff - numfound; 897 else 898 bitoff = -1; 899 900 bail: 901 if (local_resv) 902 ocfs2_resv_discard(resmap, resv); 903 904 trace_ocfs2_local_alloc_find_clear_bits(*numbits, 905 le32_to_cpu(alloc->id1.bitmap1.i_total), 906 bitoff, numfound); 907 908 return bitoff; 909 } 910 911 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 912 { 913 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 914 int i; 915 916 alloc->id1.bitmap1.i_total = 0; 917 alloc->id1.bitmap1.i_used = 0; 918 la->la_bm_off = 0; 919 for(i = 0; i < le16_to_cpu(la->la_size); i++) 920 la->la_bitmap[i] = 0; 921 } 922 923 #if 0 924 /* turn this on and uncomment below to aid debugging window shifts. */ 925 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 926 unsigned int start, 927 unsigned int count) 928 { 929 unsigned int tmp = count; 930 while(tmp--) { 931 if (ocfs2_test_bit(start + tmp, bitmap)) { 932 printk("ocfs2_verify_zero_bits: start = %u, count = " 933 "%u\n", start, count); 934 printk("ocfs2_verify_zero_bits: bit %u is set!", 935 start + tmp); 936 BUG(); 937 } 938 } 939 } 940 #endif 941 942 /* 943 * sync the local alloc to main bitmap. 944 * 945 * assumes you've already locked the main bitmap -- the bitmap inode 946 * passed is used for caching. 947 */ 948 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 949 handle_t *handle, 950 struct ocfs2_dinode *alloc, 951 struct inode *main_bm_inode, 952 struct buffer_head *main_bm_bh) 953 { 954 int status = 0; 955 int bit_off, left, count, start; 956 u64 la_start_blk; 957 u64 blkno; 958 void *bitmap; 959 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 960 961 trace_ocfs2_sync_local_to_main( 962 le32_to_cpu(alloc->id1.bitmap1.i_total), 963 le32_to_cpu(alloc->id1.bitmap1.i_used)); 964 965 if (!alloc->id1.bitmap1.i_total) { 966 goto bail; 967 } 968 969 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 970 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 971 goto bail; 972 } 973 974 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 975 le32_to_cpu(la->la_bm_off)); 976 bitmap = la->la_bitmap; 977 start = count = bit_off = 0; 978 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 979 980 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 981 != -1) { 982 if ((bit_off < left) && (bit_off == start)) { 983 count++; 984 start++; 985 continue; 986 } 987 if (count) { 988 blkno = la_start_blk + 989 ocfs2_clusters_to_blocks(osb->sb, 990 start - count); 991 992 trace_ocfs2_sync_local_to_main_free( 993 count, start - count, 994 (unsigned long long)la_start_blk, 995 (unsigned long long)blkno); 996 997 status = ocfs2_release_clusters(handle, 998 main_bm_inode, 999 main_bm_bh, blkno, 1000 count); 1001 if (status < 0) { 1002 mlog_errno(status); 1003 goto bail; 1004 } 1005 } 1006 if (bit_off >= left) 1007 break; 1008 count = 1; 1009 start = bit_off + 1; 1010 } 1011 1012 bail: 1013 if (status) 1014 mlog_errno(status); 1015 return status; 1016 } 1017 1018 enum ocfs2_la_event { 1019 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 1020 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 1021 * enough bits theoretically 1022 * free, but a contiguous 1023 * allocation could not be 1024 * found. */ 1025 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 1026 * enough bits free to satisfy 1027 * our request. */ 1028 }; 1029 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 1030 /* 1031 * Given an event, calculate the size of our next local alloc window. 1032 * 1033 * This should always be called under i_mutex of the local alloc inode 1034 * so that local alloc disabling doesn't race with processes trying to 1035 * use the allocator. 1036 * 1037 * Returns the state which the local alloc was left in. This value can 1038 * be ignored by some paths. 1039 */ 1040 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 1041 enum ocfs2_la_event event) 1042 { 1043 unsigned int bits; 1044 int state; 1045 1046 spin_lock(&osb->osb_lock); 1047 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 1048 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 1049 goto out_unlock; 1050 } 1051 1052 /* 1053 * ENOSPC and fragmentation are treated similarly for now. 1054 */ 1055 if (event == OCFS2_LA_EVENT_ENOSPC || 1056 event == OCFS2_LA_EVENT_FRAGMENTED) { 1057 /* 1058 * We ran out of contiguous space in the primary 1059 * bitmap. Drastically reduce the number of bits used 1060 * by local alloc until we have to disable it. 1061 */ 1062 bits = osb->local_alloc_bits >> 1; 1063 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 1064 /* 1065 * By setting state to THROTTLED, we'll keep 1066 * the number of local alloc bits used down 1067 * until an event occurs which would give us 1068 * reason to assume the bitmap situation might 1069 * have changed. 1070 */ 1071 osb->local_alloc_state = OCFS2_LA_THROTTLED; 1072 osb->local_alloc_bits = bits; 1073 } else { 1074 osb->local_alloc_state = OCFS2_LA_DISABLED; 1075 } 1076 queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, 1077 OCFS2_LA_ENABLE_INTERVAL); 1078 goto out_unlock; 1079 } 1080 1081 /* 1082 * Don't increase the size of the local alloc window until we 1083 * know we might be able to fulfill the request. Otherwise, we 1084 * risk bouncing around the global bitmap during periods of 1085 * low space. 1086 */ 1087 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 1088 osb->local_alloc_bits = osb->local_alloc_default_bits; 1089 1090 out_unlock: 1091 state = osb->local_alloc_state; 1092 spin_unlock(&osb->osb_lock); 1093 1094 return state; 1095 } 1096 1097 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1098 struct ocfs2_alloc_context **ac, 1099 struct inode **bitmap_inode, 1100 struct buffer_head **bitmap_bh) 1101 { 1102 int status; 1103 1104 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1105 if (!(*ac)) { 1106 status = -ENOMEM; 1107 mlog_errno(status); 1108 goto bail; 1109 } 1110 1111 retry_enospc: 1112 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 1113 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1114 if (status == -ENOSPC) { 1115 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1116 OCFS2_LA_DISABLED) 1117 goto bail; 1118 1119 ocfs2_free_ac_resource(*ac); 1120 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 1121 goto retry_enospc; 1122 } 1123 if (status < 0) { 1124 mlog_errno(status); 1125 goto bail; 1126 } 1127 1128 *bitmap_inode = (*ac)->ac_inode; 1129 igrab(*bitmap_inode); 1130 *bitmap_bh = (*ac)->ac_bh; 1131 get_bh(*bitmap_bh); 1132 status = 0; 1133 bail: 1134 if ((status < 0) && *ac) { 1135 ocfs2_free_alloc_context(*ac); 1136 *ac = NULL; 1137 } 1138 1139 if (status) 1140 mlog_errno(status); 1141 return status; 1142 } 1143 1144 /* 1145 * pass it the bitmap lock in lock_bh if you have it. 1146 */ 1147 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1148 handle_t *handle, 1149 struct ocfs2_alloc_context *ac) 1150 { 1151 int status = 0; 1152 u32 cluster_off, cluster_count; 1153 struct ocfs2_dinode *alloc = NULL; 1154 struct ocfs2_local_alloc *la; 1155 1156 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1157 la = OCFS2_LOCAL_ALLOC(alloc); 1158 1159 trace_ocfs2_local_alloc_new_window( 1160 le32_to_cpu(alloc->id1.bitmap1.i_total), 1161 osb->local_alloc_bits); 1162 1163 /* Instruct the allocation code to try the most recently used 1164 * cluster group. We'll re-record the group used this pass 1165 * below. */ 1166 ac->ac_last_group = osb->la_last_gd; 1167 1168 /* we used the generic suballoc reserve function, but we set 1169 * everything up nicely, so there's no reason why we can't use 1170 * the more specific cluster api to claim bits. */ 1171 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, 1172 &cluster_off, &cluster_count); 1173 if (status == -ENOSPC) { 1174 retry_enospc: 1175 /* 1176 * Note: We could also try syncing the journal here to 1177 * allow use of any free bits which the current 1178 * transaction can't give us access to. --Mark 1179 */ 1180 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1181 OCFS2_LA_DISABLED) 1182 goto bail; 1183 1184 ac->ac_bits_wanted = osb->local_alloc_bits; 1185 status = ocfs2_claim_clusters(handle, ac, 1186 osb->local_alloc_bits, 1187 &cluster_off, 1188 &cluster_count); 1189 if (status == -ENOSPC) 1190 goto retry_enospc; 1191 /* 1192 * We only shrunk the *minimum* number of in our 1193 * request - it's entirely possible that the allocator 1194 * might give us more than we asked for. 1195 */ 1196 if (status == 0) { 1197 spin_lock(&osb->osb_lock); 1198 osb->local_alloc_bits = cluster_count; 1199 spin_unlock(&osb->osb_lock); 1200 } 1201 } 1202 if (status < 0) { 1203 if (status != -ENOSPC) 1204 mlog_errno(status); 1205 goto bail; 1206 } 1207 1208 osb->la_last_gd = ac->ac_last_group; 1209 1210 la->la_bm_off = cpu_to_le32(cluster_off); 1211 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1212 /* just in case... In the future when we find space ourselves, 1213 * we don't have to get all contiguous -- but we'll have to 1214 * set all previously used bits in bitmap and update 1215 * la_bits_set before setting the bits in the main bitmap. */ 1216 alloc->id1.bitmap1.i_used = 0; 1217 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1218 le16_to_cpu(la->la_size)); 1219 1220 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1221 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1222 1223 trace_ocfs2_local_alloc_new_window_result( 1224 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off, 1225 le32_to_cpu(alloc->id1.bitmap1.i_total)); 1226 1227 bail: 1228 if (status) 1229 mlog_errno(status); 1230 return status; 1231 } 1232 1233 /* Note that we do *NOT* lock the local alloc inode here as 1234 * it's been locked already for us. */ 1235 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1236 struct inode *local_alloc_inode) 1237 { 1238 int status = 0; 1239 struct buffer_head *main_bm_bh = NULL; 1240 struct inode *main_bm_inode = NULL; 1241 handle_t *handle = NULL; 1242 struct ocfs2_dinode *alloc; 1243 struct ocfs2_dinode *alloc_copy = NULL; 1244 struct ocfs2_alloc_context *ac = NULL; 1245 1246 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1247 1248 /* This will lock the main bitmap for us. */ 1249 status = ocfs2_local_alloc_reserve_for_window(osb, 1250 &ac, 1251 &main_bm_inode, 1252 &main_bm_bh); 1253 if (status < 0) { 1254 if (status != -ENOSPC) 1255 mlog_errno(status); 1256 goto bail; 1257 } 1258 1259 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1260 if (IS_ERR(handle)) { 1261 status = PTR_ERR(handle); 1262 handle = NULL; 1263 mlog_errno(status); 1264 goto bail; 1265 } 1266 1267 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1268 1269 /* We want to clear the local alloc before doing anything 1270 * else, so that if we error later during this operation, 1271 * local alloc shutdown won't try to double free main bitmap 1272 * bits. Make a copy so the sync function knows which bits to 1273 * free. */ 1274 alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); 1275 if (!alloc_copy) { 1276 status = -ENOMEM; 1277 mlog_errno(status); 1278 goto bail; 1279 } 1280 1281 status = ocfs2_journal_access_di(handle, 1282 INODE_CACHE(local_alloc_inode), 1283 osb->local_alloc_bh, 1284 OCFS2_JOURNAL_ACCESS_WRITE); 1285 if (status < 0) { 1286 mlog_errno(status); 1287 goto bail; 1288 } 1289 1290 ocfs2_clear_local_alloc(alloc); 1291 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1292 1293 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1294 main_bm_inode, main_bm_bh); 1295 if (status < 0) { 1296 mlog_errno(status); 1297 goto bail; 1298 } 1299 1300 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1301 if (status < 0) { 1302 if (status != -ENOSPC) 1303 mlog_errno(status); 1304 goto bail; 1305 } 1306 1307 atomic_inc(&osb->alloc_stats.moves); 1308 1309 bail: 1310 if (handle) 1311 ocfs2_commit_trans(osb, handle); 1312 1313 brelse(main_bm_bh); 1314 1315 iput(main_bm_inode); 1316 kfree(alloc_copy); 1317 1318 if (ac) 1319 ocfs2_free_alloc_context(ac); 1320 1321 if (status) 1322 mlog_errno(status); 1323 return status; 1324 } 1325 1326