1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. 5 * 6 * This code builds two trees of free clusters extents. 7 * Trees are sorted by start of extent and by length of extent. 8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. 9 * In extreme case code reads on-disk bitmap to find free clusters. 10 * 11 */ 12 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/kernel.h> 16 17 #include "ntfs.h" 18 #include "ntfs_fs.h" 19 20 /* 21 * Maximum number of extents in tree. 22 */ 23 #define NTFS_MAX_WND_EXTENTS (32u * 1024u) 24 25 struct rb_node_key { 26 struct rb_node node; 27 size_t key; 28 }; 29 30 struct e_node { 31 struct rb_node_key start; /* Tree sorted by start. */ 32 struct rb_node_key count; /* Tree sorted by len. */ 33 }; 34 35 static int wnd_rescan(struct wnd_bitmap *wnd); 36 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); 37 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); 38 39 static struct kmem_cache *ntfs_enode_cachep; 40 41 int __init ntfs3_init_bitmap(void) 42 { 43 ntfs_enode_cachep = 44 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, 45 SLAB_RECLAIM_ACCOUNT, NULL); 46 return ntfs_enode_cachep ? 0 : -ENOMEM; 47 } 48 49 void ntfs3_exit_bitmap(void) 50 { 51 kmem_cache_destroy(ntfs_enode_cachep); 52 } 53 54 /* 55 * wnd_scan 56 * 57 * b_pos + b_len - biggest fragment. 58 * Scan range [wpos wbits) window @buf. 59 * 60 * Return: -1 if not found. 61 */ 62 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, 63 size_t to_alloc, size_t *prev_tail, size_t *b_pos, 64 size_t *b_len) 65 { 66 while (wpos < wend) { 67 size_t free_len; 68 u32 free_bits, end; 69 u32 used = find_next_zero_bit(buf, wend, wpos); 70 71 if (used >= wend) { 72 if (*b_len < *prev_tail) { 73 *b_pos = wbit - *prev_tail; 74 *b_len = *prev_tail; 75 } 76 77 *prev_tail = 0; 78 return -1; 79 } 80 81 if (used > wpos) { 82 wpos = used; 83 if (*b_len < *prev_tail) { 84 *b_pos = wbit - *prev_tail; 85 *b_len = *prev_tail; 86 } 87 88 *prev_tail = 0; 89 } 90 91 /* 92 * Now we have a fragment [wpos, wend) staring with 0. 93 */ 94 end = wpos + to_alloc - *prev_tail; 95 free_bits = find_next_bit(buf, min(end, wend), wpos); 96 97 free_len = *prev_tail + free_bits - wpos; 98 99 if (*b_len < free_len) { 100 *b_pos = wbit + wpos - *prev_tail; 101 *b_len = free_len; 102 } 103 104 if (free_len >= to_alloc) 105 return wbit + wpos - *prev_tail; 106 107 if (free_bits >= wend) { 108 *prev_tail += free_bits - wpos; 109 return -1; 110 } 111 112 wpos = free_bits + 1; 113 114 *prev_tail = 0; 115 } 116 117 return -1; 118 } 119 120 /* 121 * wnd_close - Frees all resources. 122 */ 123 void wnd_close(struct wnd_bitmap *wnd) 124 { 125 struct rb_node *node, *next; 126 127 kfree(wnd->free_bits); 128 run_close(&wnd->run); 129 130 node = rb_first(&wnd->start_tree); 131 132 while (node) { 133 next = rb_next(node); 134 rb_erase(node, &wnd->start_tree); 135 kmem_cache_free(ntfs_enode_cachep, 136 rb_entry(node, struct e_node, start.node)); 137 node = next; 138 } 139 } 140 141 static struct rb_node *rb_lookup(struct rb_root *root, size_t v) 142 { 143 struct rb_node **p = &root->rb_node; 144 struct rb_node *r = NULL; 145 146 while (*p) { 147 struct rb_node_key *k; 148 149 k = rb_entry(*p, struct rb_node_key, node); 150 if (v < k->key) { 151 p = &(*p)->rb_left; 152 } else if (v > k->key) { 153 r = &k->node; 154 p = &(*p)->rb_right; 155 } else { 156 return &k->node; 157 } 158 } 159 160 return r; 161 } 162 163 /* 164 * rb_insert_count - Helper function to insert special kind of 'count' tree. 165 */ 166 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) 167 { 168 struct rb_node **p = &root->rb_node; 169 struct rb_node *parent = NULL; 170 size_t e_ckey = e->count.key; 171 size_t e_skey = e->start.key; 172 173 while (*p) { 174 struct e_node *k = 175 rb_entry(parent = *p, struct e_node, count.node); 176 177 if (e_ckey > k->count.key) { 178 p = &(*p)->rb_left; 179 } else if (e_ckey < k->count.key) { 180 p = &(*p)->rb_right; 181 } else if (e_skey < k->start.key) { 182 p = &(*p)->rb_left; 183 } else if (e_skey > k->start.key) { 184 p = &(*p)->rb_right; 185 } else { 186 WARN_ON(1); 187 return false; 188 } 189 } 190 191 rb_link_node(&e->count.node, parent, p); 192 rb_insert_color(&e->count.node, root); 193 return true; 194 } 195 196 /* 197 * rb_insert_start - Helper function to insert special kind of 'count' tree. 198 */ 199 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) 200 { 201 struct rb_node **p = &root->rb_node; 202 struct rb_node *parent = NULL; 203 size_t e_skey = e->start.key; 204 205 while (*p) { 206 struct e_node *k; 207 208 parent = *p; 209 210 k = rb_entry(parent, struct e_node, start.node); 211 if (e_skey < k->start.key) { 212 p = &(*p)->rb_left; 213 } else if (e_skey > k->start.key) { 214 p = &(*p)->rb_right; 215 } else { 216 WARN_ON(1); 217 return false; 218 } 219 } 220 221 rb_link_node(&e->start.node, parent, p); 222 rb_insert_color(&e->start.node, root); 223 return true; 224 } 225 226 /* 227 * wnd_add_free_ext - Adds a new extent of free space. 228 * @build: 1 when building tree. 229 */ 230 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, 231 bool build) 232 { 233 struct e_node *e, *e0 = NULL; 234 size_t ib, end_in = bit + len; 235 struct rb_node *n; 236 237 if (build) { 238 /* Use extent_min to filter too short extents. */ 239 if (wnd->count >= NTFS_MAX_WND_EXTENTS && 240 len <= wnd->extent_min) { 241 wnd->uptodated = -1; 242 return; 243 } 244 } else { 245 /* Try to find extent before 'bit'. */ 246 n = rb_lookup(&wnd->start_tree, bit); 247 248 if (!n) { 249 n = rb_first(&wnd->start_tree); 250 } else { 251 e = rb_entry(n, struct e_node, start.node); 252 n = rb_next(n); 253 if (e->start.key + e->count.key == bit) { 254 /* Remove left. */ 255 bit = e->start.key; 256 len += e->count.key; 257 rb_erase(&e->start.node, &wnd->start_tree); 258 rb_erase(&e->count.node, &wnd->count_tree); 259 wnd->count -= 1; 260 e0 = e; 261 } 262 } 263 264 while (n) { 265 size_t next_end; 266 267 e = rb_entry(n, struct e_node, start.node); 268 next_end = e->start.key + e->count.key; 269 if (e->start.key > end_in) 270 break; 271 272 /* Remove right. */ 273 n = rb_next(n); 274 len += next_end - end_in; 275 end_in = next_end; 276 rb_erase(&e->start.node, &wnd->start_tree); 277 rb_erase(&e->count.node, &wnd->count_tree); 278 wnd->count -= 1; 279 280 if (!e0) 281 e0 = e; 282 else 283 kmem_cache_free(ntfs_enode_cachep, e); 284 } 285 286 if (wnd->uptodated != 1) { 287 /* Check bits before 'bit'. */ 288 ib = wnd->zone_bit == wnd->zone_end || 289 bit < wnd->zone_end 290 ? 0 291 : wnd->zone_end; 292 293 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { 294 bit -= 1; 295 len += 1; 296 } 297 298 /* Check bits after 'end_in'. */ 299 ib = wnd->zone_bit == wnd->zone_end || 300 end_in > wnd->zone_bit 301 ? wnd->nbits 302 : wnd->zone_bit; 303 304 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { 305 end_in += 1; 306 len += 1; 307 } 308 } 309 } 310 /* Insert new fragment. */ 311 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 312 if (e0) 313 kmem_cache_free(ntfs_enode_cachep, e0); 314 315 wnd->uptodated = -1; 316 317 /* Compare with smallest fragment. */ 318 n = rb_last(&wnd->count_tree); 319 e = rb_entry(n, struct e_node, count.node); 320 if (len <= e->count.key) 321 goto out; /* Do not insert small fragments. */ 322 323 if (build) { 324 struct e_node *e2; 325 326 n = rb_prev(n); 327 e2 = rb_entry(n, struct e_node, count.node); 328 /* Smallest fragment will be 'e2->count.key'. */ 329 wnd->extent_min = e2->count.key; 330 } 331 332 /* Replace smallest fragment by new one. */ 333 rb_erase(&e->start.node, &wnd->start_tree); 334 rb_erase(&e->count.node, &wnd->count_tree); 335 wnd->count -= 1; 336 } else { 337 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 338 if (!e) { 339 wnd->uptodated = -1; 340 goto out; 341 } 342 343 if (build && len <= wnd->extent_min) 344 wnd->extent_min = len; 345 } 346 e->start.key = bit; 347 e->count.key = len; 348 if (len > wnd->extent_max) 349 wnd->extent_max = len; 350 351 rb_insert_start(&wnd->start_tree, e); 352 rb_insert_count(&wnd->count_tree, e); 353 wnd->count += 1; 354 355 out:; 356 } 357 358 /* 359 * wnd_remove_free_ext - Remove a run from the cached free space. 360 */ 361 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) 362 { 363 struct rb_node *n, *n3; 364 struct e_node *e, *e3; 365 size_t end_in = bit + len; 366 size_t end3, end, new_key, new_len, max_new_len; 367 368 /* Try to find extent before 'bit'. */ 369 n = rb_lookup(&wnd->start_tree, bit); 370 371 if (!n) 372 return; 373 374 e = rb_entry(n, struct e_node, start.node); 375 end = e->start.key + e->count.key; 376 377 new_key = new_len = 0; 378 len = e->count.key; 379 380 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ 381 if (e->start.key > bit) 382 ; 383 else if (end_in <= end) { 384 /* Range [bit,end_in) inside 'e'. */ 385 new_key = end_in; 386 new_len = end - end_in; 387 len = bit - e->start.key; 388 } else if (bit > end) { 389 bool bmax = false; 390 391 n3 = rb_next(n); 392 393 while (n3) { 394 e3 = rb_entry(n3, struct e_node, start.node); 395 if (e3->start.key >= end_in) 396 break; 397 398 if (e3->count.key == wnd->extent_max) 399 bmax = true; 400 401 end3 = e3->start.key + e3->count.key; 402 if (end3 > end_in) { 403 e3->start.key = end_in; 404 rb_erase(&e3->count.node, &wnd->count_tree); 405 e3->count.key = end3 - end_in; 406 rb_insert_count(&wnd->count_tree, e3); 407 break; 408 } 409 410 n3 = rb_next(n3); 411 rb_erase(&e3->start.node, &wnd->start_tree); 412 rb_erase(&e3->count.node, &wnd->count_tree); 413 wnd->count -= 1; 414 kmem_cache_free(ntfs_enode_cachep, e3); 415 } 416 if (!bmax) 417 return; 418 n3 = rb_first(&wnd->count_tree); 419 wnd->extent_max = 420 n3 ? rb_entry(n3, struct e_node, count.node)->count.key 421 : 0; 422 return; 423 } 424 425 if (e->count.key != wnd->extent_max) { 426 ; 427 } else if (rb_prev(&e->count.node)) { 428 ; 429 } else { 430 n3 = rb_next(&e->count.node); 431 max_new_len = max(len, new_len); 432 if (!n3) { 433 wnd->extent_max = max_new_len; 434 } else { 435 e3 = rb_entry(n3, struct e_node, count.node); 436 wnd->extent_max = max(e3->count.key, max_new_len); 437 } 438 } 439 440 if (!len) { 441 if (new_len) { 442 e->start.key = new_key; 443 rb_erase(&e->count.node, &wnd->count_tree); 444 e->count.key = new_len; 445 rb_insert_count(&wnd->count_tree, e); 446 } else { 447 rb_erase(&e->start.node, &wnd->start_tree); 448 rb_erase(&e->count.node, &wnd->count_tree); 449 wnd->count -= 1; 450 kmem_cache_free(ntfs_enode_cachep, e); 451 } 452 goto out; 453 } 454 rb_erase(&e->count.node, &wnd->count_tree); 455 e->count.key = len; 456 rb_insert_count(&wnd->count_tree, e); 457 458 if (!new_len) 459 goto out; 460 461 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 462 wnd->uptodated = -1; 463 464 /* Get minimal extent. */ 465 e = rb_entry(rb_last(&wnd->count_tree), struct e_node, 466 count.node); 467 if (e->count.key > new_len) 468 goto out; 469 470 /* Replace minimum. */ 471 rb_erase(&e->start.node, &wnd->start_tree); 472 rb_erase(&e->count.node, &wnd->count_tree); 473 wnd->count -= 1; 474 } else { 475 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 476 if (!e) 477 wnd->uptodated = -1; 478 } 479 480 if (e) { 481 e->start.key = new_key; 482 e->count.key = new_len; 483 rb_insert_start(&wnd->start_tree, e); 484 rb_insert_count(&wnd->count_tree, e); 485 wnd->count += 1; 486 } 487 488 out: 489 if (!wnd->count && 1 != wnd->uptodated) 490 wnd_rescan(wnd); 491 } 492 493 /* 494 * wnd_rescan - Scan all bitmap. Used while initialization. 495 */ 496 static int wnd_rescan(struct wnd_bitmap *wnd) 497 { 498 int err = 0; 499 size_t prev_tail = 0; 500 struct super_block *sb = wnd->sb; 501 struct ntfs_sb_info *sbi = sb->s_fs_info; 502 u64 lbo, len = 0; 503 u32 blocksize = sb->s_blocksize; 504 u8 cluster_bits = sbi->cluster_bits; 505 u32 wbits = 8 * sb->s_blocksize; 506 u32 used, frb; 507 const ulong *buf; 508 size_t wpos, wbit, iw, vbo; 509 struct buffer_head *bh = NULL; 510 CLST lcn, clen; 511 512 wnd->uptodated = 0; 513 wnd->extent_max = 0; 514 wnd->extent_min = MINUS_ONE_T; 515 wnd->total_zeroes = 0; 516 517 vbo = 0; 518 519 for (iw = 0; iw < wnd->nwnd; iw++) { 520 if (iw + 1 == wnd->nwnd) 521 wbits = wnd->bits_last; 522 523 if (wnd->inited) { 524 if (!wnd->free_bits[iw]) { 525 /* All ones. */ 526 if (prev_tail) { 527 wnd_add_free_ext(wnd, 528 vbo * 8 - prev_tail, 529 prev_tail, true); 530 prev_tail = 0; 531 } 532 goto next_wnd; 533 } 534 if (wbits == wnd->free_bits[iw]) { 535 /* All zeroes. */ 536 prev_tail += wbits; 537 wnd->total_zeroes += wbits; 538 goto next_wnd; 539 } 540 } 541 542 if (!len) { 543 u32 off = vbo & sbi->cluster_mask; 544 545 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, 546 &lcn, &clen, NULL)) { 547 err = -ENOENT; 548 goto out; 549 } 550 551 lbo = ((u64)lcn << cluster_bits) + off; 552 len = ((u64)clen << cluster_bits) - off; 553 } 554 555 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 556 if (!bh) { 557 err = -EIO; 558 goto out; 559 } 560 561 buf = (ulong *)bh->b_data; 562 563 used = bitmap_weight(buf, wbits); 564 if (used < wbits) { 565 frb = wbits - used; 566 wnd->free_bits[iw] = frb; 567 wnd->total_zeroes += frb; 568 } 569 570 wpos = 0; 571 wbit = vbo * 8; 572 573 if (wbit + wbits > wnd->nbits) 574 wbits = wnd->nbits - wbit; 575 576 do { 577 used = find_next_zero_bit(buf, wbits, wpos); 578 579 if (used > wpos && prev_tail) { 580 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 581 prev_tail, true); 582 prev_tail = 0; 583 } 584 585 wpos = used; 586 587 if (wpos >= wbits) { 588 /* No free blocks. */ 589 prev_tail = 0; 590 break; 591 } 592 593 frb = find_next_bit(buf, wbits, wpos); 594 if (frb >= wbits) { 595 /* Keep last free block. */ 596 prev_tail += frb - wpos; 597 break; 598 } 599 600 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 601 frb + prev_tail - wpos, true); 602 603 /* Skip free block and first '1'. */ 604 wpos = frb + 1; 605 /* Reset previous tail. */ 606 prev_tail = 0; 607 } while (wpos < wbits); 608 609 next_wnd: 610 611 if (bh) 612 put_bh(bh); 613 bh = NULL; 614 615 vbo += blocksize; 616 if (len) { 617 len -= blocksize; 618 lbo += blocksize; 619 } 620 } 621 622 /* Add last block. */ 623 if (prev_tail) 624 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); 625 626 /* 627 * Before init cycle wnd->uptodated was 0. 628 * If any errors or limits occurs while initialization then 629 * wnd->uptodated will be -1. 630 * If 'uptodated' is still 0 then Tree is really updated. 631 */ 632 if (!wnd->uptodated) 633 wnd->uptodated = 1; 634 635 if (wnd->zone_bit != wnd->zone_end) { 636 size_t zlen = wnd->zone_end - wnd->zone_bit; 637 638 wnd->zone_end = wnd->zone_bit; 639 wnd_zone_set(wnd, wnd->zone_bit, zlen); 640 } 641 642 out: 643 return err; 644 } 645 646 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) 647 { 648 int err; 649 u32 blocksize = sb->s_blocksize; 650 u32 wbits = blocksize * 8; 651 652 init_rwsem(&wnd->rw_lock); 653 654 wnd->sb = sb; 655 wnd->nbits = nbits; 656 wnd->total_zeroes = nbits; 657 wnd->extent_max = MINUS_ONE_T; 658 wnd->zone_bit = wnd->zone_end = 0; 659 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits)); 660 wnd->bits_last = nbits & (wbits - 1); 661 if (!wnd->bits_last) 662 wnd->bits_last = wbits; 663 664 wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS); 665 if (!wnd->free_bits) 666 return -ENOMEM; 667 668 err = wnd_rescan(wnd); 669 if (err) 670 return err; 671 672 wnd->inited = true; 673 674 return 0; 675 } 676 677 /* 678 * wnd_map - Call sb_bread for requested window. 679 */ 680 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) 681 { 682 size_t vbo; 683 CLST lcn, clen; 684 struct super_block *sb = wnd->sb; 685 struct ntfs_sb_info *sbi; 686 struct buffer_head *bh; 687 u64 lbo; 688 689 sbi = sb->s_fs_info; 690 vbo = (u64)iw << sb->s_blocksize_bits; 691 692 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, 693 NULL)) { 694 return ERR_PTR(-ENOENT); 695 } 696 697 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); 698 699 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); 700 if (!bh) 701 return ERR_PTR(-EIO); 702 703 return bh; 704 } 705 706 /* 707 * wnd_set_free - Mark the bits range from bit to bit + bits as free. 708 */ 709 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 710 { 711 int err = 0; 712 struct super_block *sb = wnd->sb; 713 size_t bits0 = bits; 714 u32 wbits = 8 * sb->s_blocksize; 715 size_t iw = bit >> (sb->s_blocksize_bits + 3); 716 u32 wbit = bit & (wbits - 1); 717 struct buffer_head *bh; 718 719 while (iw < wnd->nwnd && bits) { 720 u32 tail, op; 721 ulong *buf; 722 723 if (iw + 1 == wnd->nwnd) 724 wbits = wnd->bits_last; 725 726 tail = wbits - wbit; 727 op = min_t(u32, tail, bits); 728 729 bh = wnd_map(wnd, iw); 730 if (IS_ERR(bh)) { 731 err = PTR_ERR(bh); 732 break; 733 } 734 735 buf = (ulong *)bh->b_data; 736 737 lock_buffer(bh); 738 739 __bitmap_clear(buf, wbit, op); 740 741 wnd->free_bits[iw] += op; 742 743 set_buffer_uptodate(bh); 744 mark_buffer_dirty(bh); 745 unlock_buffer(bh); 746 put_bh(bh); 747 748 wnd->total_zeroes += op; 749 bits -= op; 750 wbit = 0; 751 iw += 1; 752 } 753 754 wnd_add_free_ext(wnd, bit, bits0, false); 755 756 return err; 757 } 758 759 /* 760 * wnd_set_used - Mark the bits range from bit to bit + bits as used. 761 */ 762 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 763 { 764 int err = 0; 765 struct super_block *sb = wnd->sb; 766 size_t bits0 = bits; 767 size_t iw = bit >> (sb->s_blocksize_bits + 3); 768 u32 wbits = 8 * sb->s_blocksize; 769 u32 wbit = bit & (wbits - 1); 770 struct buffer_head *bh; 771 772 while (iw < wnd->nwnd && bits) { 773 u32 tail, op; 774 ulong *buf; 775 776 if (unlikely(iw + 1 == wnd->nwnd)) 777 wbits = wnd->bits_last; 778 779 tail = wbits - wbit; 780 op = min_t(u32, tail, bits); 781 782 bh = wnd_map(wnd, iw); 783 if (IS_ERR(bh)) { 784 err = PTR_ERR(bh); 785 break; 786 } 787 buf = (ulong *)bh->b_data; 788 789 lock_buffer(bh); 790 791 __bitmap_set(buf, wbit, op); 792 wnd->free_bits[iw] -= op; 793 794 set_buffer_uptodate(bh); 795 mark_buffer_dirty(bh); 796 unlock_buffer(bh); 797 put_bh(bh); 798 799 wnd->total_zeroes -= op; 800 bits -= op; 801 wbit = 0; 802 iw += 1; 803 } 804 805 if (!RB_EMPTY_ROOT(&wnd->start_tree)) 806 wnd_remove_free_ext(wnd, bit, bits0); 807 808 return err; 809 } 810 811 /* 812 * wnd_is_free_hlp 813 * 814 * Return: True if all clusters [bit, bit+bits) are free (bitmap only). 815 */ 816 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) 817 { 818 struct super_block *sb = wnd->sb; 819 size_t iw = bit >> (sb->s_blocksize_bits + 3); 820 u32 wbits = 8 * sb->s_blocksize; 821 u32 wbit = bit & (wbits - 1); 822 823 while (iw < wnd->nwnd && bits) { 824 u32 tail, op; 825 826 if (unlikely(iw + 1 == wnd->nwnd)) 827 wbits = wnd->bits_last; 828 829 tail = wbits - wbit; 830 op = min_t(u32, tail, bits); 831 832 if (wbits != wnd->free_bits[iw]) { 833 bool ret; 834 struct buffer_head *bh = wnd_map(wnd, iw); 835 836 if (IS_ERR(bh)) 837 return false; 838 839 ret = are_bits_clear((ulong *)bh->b_data, wbit, op); 840 841 put_bh(bh); 842 if (!ret) 843 return false; 844 } 845 846 bits -= op; 847 wbit = 0; 848 iw += 1; 849 } 850 851 return true; 852 } 853 854 /* 855 * wnd_is_free 856 * 857 * Return: True if all clusters [bit, bit+bits) are free. 858 */ 859 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 860 { 861 bool ret; 862 struct rb_node *n; 863 size_t end; 864 struct e_node *e; 865 866 if (RB_EMPTY_ROOT(&wnd->start_tree)) 867 goto use_wnd; 868 869 n = rb_lookup(&wnd->start_tree, bit); 870 if (!n) 871 goto use_wnd; 872 873 e = rb_entry(n, struct e_node, start.node); 874 875 end = e->start.key + e->count.key; 876 877 if (bit < end && bit + bits <= end) 878 return true; 879 880 use_wnd: 881 ret = wnd_is_free_hlp(wnd, bit, bits); 882 883 return ret; 884 } 885 886 /* 887 * wnd_is_used 888 * 889 * Return: True if all clusters [bit, bit+bits) are used. 890 */ 891 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 892 { 893 bool ret = false; 894 struct super_block *sb = wnd->sb; 895 size_t iw = bit >> (sb->s_blocksize_bits + 3); 896 u32 wbits = 8 * sb->s_blocksize; 897 u32 wbit = bit & (wbits - 1); 898 size_t end; 899 struct rb_node *n; 900 struct e_node *e; 901 902 if (RB_EMPTY_ROOT(&wnd->start_tree)) 903 goto use_wnd; 904 905 end = bit + bits; 906 n = rb_lookup(&wnd->start_tree, end - 1); 907 if (!n) 908 goto use_wnd; 909 910 e = rb_entry(n, struct e_node, start.node); 911 if (e->start.key + e->count.key > bit) 912 return false; 913 914 use_wnd: 915 while (iw < wnd->nwnd && bits) { 916 u32 tail, op; 917 918 if (unlikely(iw + 1 == wnd->nwnd)) 919 wbits = wnd->bits_last; 920 921 tail = wbits - wbit; 922 op = min_t(u32, tail, bits); 923 924 if (wnd->free_bits[iw]) { 925 bool ret; 926 struct buffer_head *bh = wnd_map(wnd, iw); 927 928 if (IS_ERR(bh)) 929 goto out; 930 931 ret = are_bits_set((ulong *)bh->b_data, wbit, op); 932 put_bh(bh); 933 if (!ret) 934 goto out; 935 } 936 937 bits -= op; 938 wbit = 0; 939 iw += 1; 940 } 941 ret = true; 942 943 out: 944 return ret; 945 } 946 947 /* 948 * wnd_find - Look for free space. 949 * 950 * - flags - BITMAP_FIND_XXX flags 951 * 952 * Return: 0 if not found. 953 */ 954 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, 955 size_t flags, size_t *allocated) 956 { 957 struct super_block *sb; 958 u32 wbits, wpos, wzbit, wzend; 959 size_t fnd, max_alloc, b_len, b_pos; 960 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; 961 size_t to_alloc0 = to_alloc; 962 const ulong *buf; 963 const struct e_node *e; 964 const struct rb_node *pr, *cr; 965 u8 log2_bits; 966 bool fbits_valid; 967 struct buffer_head *bh; 968 969 /* Fast checking for available free space. */ 970 if (flags & BITMAP_FIND_FULL) { 971 size_t zeroes = wnd_zeroes(wnd); 972 973 zeroes -= wnd->zone_end - wnd->zone_bit; 974 if (zeroes < to_alloc0) 975 goto no_space; 976 977 if (to_alloc0 > wnd->extent_max) 978 goto no_space; 979 } else { 980 if (to_alloc > wnd->extent_max) 981 to_alloc = wnd->extent_max; 982 } 983 984 if (wnd->zone_bit <= hint && hint < wnd->zone_end) 985 hint = wnd->zone_end; 986 987 max_alloc = wnd->nbits; 988 b_len = b_pos = 0; 989 990 if (hint >= max_alloc) 991 hint = 0; 992 993 if (RB_EMPTY_ROOT(&wnd->start_tree)) { 994 if (wnd->uptodated == 1) { 995 /* Extents tree is updated -> No free space. */ 996 goto no_space; 997 } 998 goto scan_bitmap; 999 } 1000 1001 e = NULL; 1002 if (!hint) 1003 goto allocate_biggest; 1004 1005 /* Use hint: Enumerate extents by start >= hint. */ 1006 pr = NULL; 1007 cr = wnd->start_tree.rb_node; 1008 1009 for (;;) { 1010 e = rb_entry(cr, struct e_node, start.node); 1011 1012 if (e->start.key == hint) 1013 break; 1014 1015 if (e->start.key < hint) { 1016 pr = cr; 1017 cr = cr->rb_right; 1018 if (!cr) 1019 break; 1020 continue; 1021 } 1022 1023 cr = cr->rb_left; 1024 if (!cr) { 1025 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; 1026 break; 1027 } 1028 } 1029 1030 if (!e) 1031 goto allocate_biggest; 1032 1033 if (e->start.key + e->count.key > hint) { 1034 /* We have found extension with 'hint' inside. */ 1035 size_t len = e->start.key + e->count.key - hint; 1036 1037 if (len >= to_alloc && hint + to_alloc <= max_alloc) { 1038 fnd = hint; 1039 goto found; 1040 } 1041 1042 if (!(flags & BITMAP_FIND_FULL)) { 1043 if (len > to_alloc) 1044 len = to_alloc; 1045 1046 if (hint + len <= max_alloc) { 1047 fnd = hint; 1048 to_alloc = len; 1049 goto found; 1050 } 1051 } 1052 } 1053 1054 allocate_biggest: 1055 /* Allocate from biggest free extent. */ 1056 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); 1057 if (e->count.key != wnd->extent_max) 1058 wnd->extent_max = e->count.key; 1059 1060 if (e->count.key < max_alloc) { 1061 if (e->count.key >= to_alloc) { 1062 ; 1063 } else if (flags & BITMAP_FIND_FULL) { 1064 if (e->count.key < to_alloc0) { 1065 /* Biggest free block is less then requested. */ 1066 goto no_space; 1067 } 1068 to_alloc = e->count.key; 1069 } else if (-1 != wnd->uptodated) { 1070 to_alloc = e->count.key; 1071 } else { 1072 /* Check if we can use more bits. */ 1073 size_t op, max_check; 1074 struct rb_root start_tree; 1075 1076 memcpy(&start_tree, &wnd->start_tree, 1077 sizeof(struct rb_root)); 1078 memset(&wnd->start_tree, 0, sizeof(struct rb_root)); 1079 1080 max_check = e->start.key + to_alloc; 1081 if (max_check > max_alloc) 1082 max_check = max_alloc; 1083 for (op = e->start.key + e->count.key; op < max_check; 1084 op++) { 1085 if (!wnd_is_free(wnd, op, 1)) 1086 break; 1087 } 1088 memcpy(&wnd->start_tree, &start_tree, 1089 sizeof(struct rb_root)); 1090 to_alloc = op - e->start.key; 1091 } 1092 1093 /* Prepare to return. */ 1094 fnd = e->start.key; 1095 if (e->start.key + to_alloc > max_alloc) 1096 to_alloc = max_alloc - e->start.key; 1097 goto found; 1098 } 1099 1100 if (wnd->uptodated == 1) { 1101 /* Extents tree is updated -> no free space. */ 1102 goto no_space; 1103 } 1104 1105 b_len = e->count.key; 1106 b_pos = e->start.key; 1107 1108 scan_bitmap: 1109 sb = wnd->sb; 1110 log2_bits = sb->s_blocksize_bits + 3; 1111 1112 /* At most two ranges [hint, max_alloc) + [0, hint). */ 1113 Again: 1114 1115 /* TODO: Optimize request for case nbits > wbits. */ 1116 iw = hint >> log2_bits; 1117 wbits = sb->s_blocksize * 8; 1118 wpos = hint & (wbits - 1); 1119 prev_tail = 0; 1120 fbits_valid = true; 1121 1122 if (max_alloc == wnd->nbits) { 1123 nwnd = wnd->nwnd; 1124 } else { 1125 size_t t = max_alloc + wbits - 1; 1126 1127 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; 1128 } 1129 1130 /* Enumerate all windows. */ 1131 for (; iw < nwnd; iw++) { 1132 wbit = iw << log2_bits; 1133 1134 if (!wnd->free_bits[iw]) { 1135 if (prev_tail > b_len) { 1136 b_pos = wbit - prev_tail; 1137 b_len = prev_tail; 1138 } 1139 1140 /* Skip full used window. */ 1141 prev_tail = 0; 1142 wpos = 0; 1143 continue; 1144 } 1145 1146 if (unlikely(iw + 1 == nwnd)) { 1147 if (max_alloc == wnd->nbits) { 1148 wbits = wnd->bits_last; 1149 } else { 1150 size_t t = max_alloc & (wbits - 1); 1151 1152 if (t) { 1153 wbits = t; 1154 fbits_valid = false; 1155 } 1156 } 1157 } 1158 1159 if (wnd->zone_end > wnd->zone_bit) { 1160 ebit = wbit + wbits; 1161 zbit = max(wnd->zone_bit, wbit); 1162 zend = min(wnd->zone_end, ebit); 1163 1164 /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ 1165 if (zend <= zbit) { 1166 /* Zone does not overlap window. */ 1167 } else { 1168 wzbit = zbit - wbit; 1169 wzend = zend - wbit; 1170 1171 /* Zone overlaps window. */ 1172 if (wnd->free_bits[iw] == wzend - wzbit) { 1173 prev_tail = 0; 1174 wpos = 0; 1175 continue; 1176 } 1177 1178 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ 1179 bh = wnd_map(wnd, iw); 1180 1181 if (IS_ERR(bh)) { 1182 /* TODO: Error */ 1183 prev_tail = 0; 1184 wpos = 0; 1185 continue; 1186 } 1187 1188 buf = (ulong *)bh->b_data; 1189 1190 /* Scan range [wbit, zbit). */ 1191 if (wpos < wzbit) { 1192 /* Scan range [wpos, zbit). */ 1193 fnd = wnd_scan(buf, wbit, wpos, wzbit, 1194 to_alloc, &prev_tail, 1195 &b_pos, &b_len); 1196 if (fnd != MINUS_ONE_T) { 1197 put_bh(bh); 1198 goto found; 1199 } 1200 } 1201 1202 prev_tail = 0; 1203 1204 /* Scan range [zend, ebit). */ 1205 if (wzend < wbits) { 1206 fnd = wnd_scan(buf, wbit, 1207 max(wzend, wpos), wbits, 1208 to_alloc, &prev_tail, 1209 &b_pos, &b_len); 1210 if (fnd != MINUS_ONE_T) { 1211 put_bh(bh); 1212 goto found; 1213 } 1214 } 1215 1216 wpos = 0; 1217 put_bh(bh); 1218 continue; 1219 } 1220 } 1221 1222 /* Current window does not overlap zone. */ 1223 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { 1224 /* Window is empty. */ 1225 if (prev_tail + wbits >= to_alloc) { 1226 fnd = wbit + wpos - prev_tail; 1227 goto found; 1228 } 1229 1230 /* Increase 'prev_tail' and process next window. */ 1231 prev_tail += wbits; 1232 wpos = 0; 1233 continue; 1234 } 1235 1236 /* Read window. */ 1237 bh = wnd_map(wnd, iw); 1238 if (IS_ERR(bh)) { 1239 // TODO: Error. 1240 prev_tail = 0; 1241 wpos = 0; 1242 continue; 1243 } 1244 1245 buf = (ulong *)bh->b_data; 1246 1247 /* Scan range [wpos, eBits). */ 1248 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, 1249 &b_pos, &b_len); 1250 put_bh(bh); 1251 if (fnd != MINUS_ONE_T) 1252 goto found; 1253 } 1254 1255 if (b_len < prev_tail) { 1256 /* The last fragment. */ 1257 b_len = prev_tail; 1258 b_pos = max_alloc - prev_tail; 1259 } 1260 1261 if (hint) { 1262 /* 1263 * We have scanned range [hint max_alloc). 1264 * Prepare to scan range [0 hint + to_alloc). 1265 */ 1266 size_t nextmax = hint + to_alloc; 1267 1268 if (likely(nextmax >= hint) && nextmax < max_alloc) 1269 max_alloc = nextmax; 1270 hint = 0; 1271 goto Again; 1272 } 1273 1274 if (!b_len) 1275 goto no_space; 1276 1277 wnd->extent_max = b_len; 1278 1279 if (flags & BITMAP_FIND_FULL) 1280 goto no_space; 1281 1282 fnd = b_pos; 1283 to_alloc = b_len; 1284 1285 found: 1286 if (flags & BITMAP_FIND_MARK_AS_USED) { 1287 /* TODO: Optimize remove extent (pass 'e'?). */ 1288 if (wnd_set_used(wnd, fnd, to_alloc)) 1289 goto no_space; 1290 } else if (wnd->extent_max != MINUS_ONE_T && 1291 to_alloc > wnd->extent_max) { 1292 wnd->extent_max = to_alloc; 1293 } 1294 1295 *allocated = fnd; 1296 return to_alloc; 1297 1298 no_space: 1299 return 0; 1300 } 1301 1302 /* 1303 * wnd_extend - Extend bitmap ($MFT bitmap). 1304 */ 1305 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) 1306 { 1307 int err; 1308 struct super_block *sb = wnd->sb; 1309 struct ntfs_sb_info *sbi = sb->s_fs_info; 1310 u32 blocksize = sb->s_blocksize; 1311 u32 wbits = blocksize * 8; 1312 u32 b0, new_last; 1313 size_t bits, iw, new_wnd; 1314 size_t old_bits = wnd->nbits; 1315 u16 *new_free; 1316 1317 if (new_bits <= old_bits) 1318 return -EINVAL; 1319 1320 /* Align to 8 byte boundary. */ 1321 new_wnd = bytes_to_block(sb, bitmap_size(new_bits)); 1322 new_last = new_bits & (wbits - 1); 1323 if (!new_last) 1324 new_last = wbits; 1325 1326 if (new_wnd != wnd->nwnd) { 1327 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS); 1328 if (!new_free) 1329 return -ENOMEM; 1330 1331 memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); 1332 memset(new_free + wnd->nwnd, 0, 1333 (new_wnd - wnd->nwnd) * sizeof(short)); 1334 kfree(wnd->free_bits); 1335 wnd->free_bits = new_free; 1336 } 1337 1338 /* Zero bits [old_bits,new_bits). */ 1339 bits = new_bits - old_bits; 1340 b0 = old_bits & (wbits - 1); 1341 1342 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { 1343 u32 op; 1344 size_t frb; 1345 u64 vbo, lbo, bytes; 1346 struct buffer_head *bh; 1347 ulong *buf; 1348 1349 if (iw + 1 == new_wnd) 1350 wbits = new_last; 1351 1352 op = b0 + bits > wbits ? wbits - b0 : bits; 1353 vbo = (u64)iw * blocksize; 1354 1355 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); 1356 if (err) 1357 break; 1358 1359 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 1360 if (!bh) 1361 return -EIO; 1362 1363 lock_buffer(bh); 1364 buf = (ulong *)bh->b_data; 1365 1366 __bitmap_clear(buf, b0, blocksize * 8 - b0); 1367 frb = wbits - bitmap_weight(buf, wbits); 1368 wnd->total_zeroes += frb - wnd->free_bits[iw]; 1369 wnd->free_bits[iw] = frb; 1370 1371 set_buffer_uptodate(bh); 1372 mark_buffer_dirty(bh); 1373 unlock_buffer(bh); 1374 /* err = sync_dirty_buffer(bh); */ 1375 1376 b0 = 0; 1377 bits -= op; 1378 } 1379 1380 wnd->nbits = new_bits; 1381 wnd->nwnd = new_wnd; 1382 wnd->bits_last = new_last; 1383 1384 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); 1385 1386 return 0; 1387 } 1388 1389 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) 1390 { 1391 size_t zlen = wnd->zone_end - wnd->zone_bit; 1392 1393 if (zlen) 1394 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); 1395 1396 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) 1397 wnd_remove_free_ext(wnd, lcn, len); 1398 1399 wnd->zone_bit = lcn; 1400 wnd->zone_end = lcn + len; 1401 } 1402 1403 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) 1404 { 1405 int err = 0; 1406 struct super_block *sb = sbi->sb; 1407 struct wnd_bitmap *wnd = &sbi->used.bitmap; 1408 u32 wbits = 8 * sb->s_blocksize; 1409 CLST len = 0, lcn = 0, done = 0; 1410 CLST minlen = bytes_to_cluster(sbi, range->minlen); 1411 CLST lcn_from = bytes_to_cluster(sbi, range->start); 1412 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); 1413 u32 wbit = lcn_from & (wbits - 1); 1414 const ulong *buf; 1415 CLST lcn_to; 1416 1417 if (!minlen) 1418 minlen = 1; 1419 1420 if (range->len == (u64)-1) 1421 lcn_to = wnd->nbits; 1422 else 1423 lcn_to = bytes_to_cluster(sbi, range->start + range->len); 1424 1425 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); 1426 1427 for (; iw < wnd->nbits; iw++, wbit = 0) { 1428 CLST lcn_wnd = iw * wbits; 1429 struct buffer_head *bh; 1430 1431 if (lcn_wnd > lcn_to) 1432 break; 1433 1434 if (!wnd->free_bits[iw]) 1435 continue; 1436 1437 if (iw + 1 == wnd->nwnd) 1438 wbits = wnd->bits_last; 1439 1440 if (lcn_wnd + wbits > lcn_to) 1441 wbits = lcn_to - lcn_wnd; 1442 1443 bh = wnd_map(wnd, iw); 1444 if (IS_ERR(bh)) { 1445 err = PTR_ERR(bh); 1446 break; 1447 } 1448 1449 buf = (ulong *)bh->b_data; 1450 1451 for (; wbit < wbits; wbit++) { 1452 if (!test_bit(wbit, buf)) { 1453 if (!len) 1454 lcn = lcn_wnd + wbit; 1455 len += 1; 1456 continue; 1457 } 1458 if (len >= minlen) { 1459 err = ntfs_discard(sbi, lcn, len); 1460 if (err) 1461 goto out; 1462 done += len; 1463 } 1464 len = 0; 1465 } 1466 put_bh(bh); 1467 } 1468 1469 /* Process the last fragment. */ 1470 if (len >= minlen) { 1471 err = ntfs_discard(sbi, lcn, len); 1472 if (err) 1473 goto out; 1474 done += len; 1475 } 1476 1477 out: 1478 range->len = (u64)done << sbi->cluster_bits; 1479 1480 up_read(&wnd->rw_lock); 1481 1482 return err; 1483 } 1484