1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. 5 * 6 * This code builds two trees of free clusters extents. 7 * Trees are sorted by start of extent and by length of extent. 8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. 9 * In extreme case code reads on-disk bitmap to find free clusters. 10 * 11 */ 12 13 #include <linux/blkdev.h> 14 #include <linux/buffer_head.h> 15 #include <linux/fs.h> 16 #include <linux/nls.h> 17 18 #include "debug.h" 19 #include "ntfs.h" 20 #include "ntfs_fs.h" 21 22 /* 23 * Maximum number of extents in tree. 24 */ 25 #define NTFS_MAX_WND_EXTENTS (32u * 1024u) 26 27 struct rb_node_key { 28 struct rb_node node; 29 size_t key; 30 }; 31 32 /* Tree is sorted by start (key). */ 33 struct e_node { 34 struct rb_node_key start; /* Tree sorted by start. */ 35 struct rb_node_key count; /* Tree sorted by len. */ 36 }; 37 38 static int wnd_rescan(struct wnd_bitmap *wnd); 39 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); 40 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); 41 42 static struct kmem_cache *ntfs_enode_cachep; 43 44 int __init ntfs3_init_bitmap(void) 45 { 46 ntfs_enode_cachep = 47 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, 48 SLAB_RECLAIM_ACCOUNT, NULL); 49 return ntfs_enode_cachep ? 0 : -ENOMEM; 50 } 51 52 void ntfs3_exit_bitmap(void) 53 { 54 kmem_cache_destroy(ntfs_enode_cachep); 55 } 56 57 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i) 58 { 59 return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8; 60 } 61 62 /* 63 * wnd_scan 64 * 65 * b_pos + b_len - biggest fragment. 66 * Scan range [wpos wbits) window @buf. 67 * 68 * Return: -1 if not found. 69 */ 70 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, 71 size_t to_alloc, size_t *prev_tail, size_t *b_pos, 72 size_t *b_len) 73 { 74 while (wpos < wend) { 75 size_t free_len; 76 u32 free_bits, end; 77 u32 used = find_next_zero_bit(buf, wend, wpos); 78 79 if (used >= wend) { 80 if (*b_len < *prev_tail) { 81 *b_pos = wbit - *prev_tail; 82 *b_len = *prev_tail; 83 } 84 85 *prev_tail = 0; 86 return -1; 87 } 88 89 if (used > wpos) { 90 wpos = used; 91 if (*b_len < *prev_tail) { 92 *b_pos = wbit - *prev_tail; 93 *b_len = *prev_tail; 94 } 95 96 *prev_tail = 0; 97 } 98 99 /* 100 * Now we have a fragment [wpos, wend) staring with 0. 101 */ 102 end = wpos + to_alloc - *prev_tail; 103 free_bits = find_next_bit(buf, min(end, wend), wpos); 104 105 free_len = *prev_tail + free_bits - wpos; 106 107 if (*b_len < free_len) { 108 *b_pos = wbit + wpos - *prev_tail; 109 *b_len = free_len; 110 } 111 112 if (free_len >= to_alloc) 113 return wbit + wpos - *prev_tail; 114 115 if (free_bits >= wend) { 116 *prev_tail += free_bits - wpos; 117 return -1; 118 } 119 120 wpos = free_bits + 1; 121 122 *prev_tail = 0; 123 } 124 125 return -1; 126 } 127 128 /* 129 * wnd_close - Frees all resources. 130 */ 131 void wnd_close(struct wnd_bitmap *wnd) 132 { 133 struct rb_node *node, *next; 134 135 kfree(wnd->free_bits); 136 run_close(&wnd->run); 137 138 node = rb_first(&wnd->start_tree); 139 140 while (node) { 141 next = rb_next(node); 142 rb_erase(node, &wnd->start_tree); 143 kmem_cache_free(ntfs_enode_cachep, 144 rb_entry(node, struct e_node, start.node)); 145 node = next; 146 } 147 } 148 149 static struct rb_node *rb_lookup(struct rb_root *root, size_t v) 150 { 151 struct rb_node **p = &root->rb_node; 152 struct rb_node *r = NULL; 153 154 while (*p) { 155 struct rb_node_key *k; 156 157 k = rb_entry(*p, struct rb_node_key, node); 158 if (v < k->key) { 159 p = &(*p)->rb_left; 160 } else if (v > k->key) { 161 r = &k->node; 162 p = &(*p)->rb_right; 163 } else { 164 return &k->node; 165 } 166 } 167 168 return r; 169 } 170 171 /* 172 * rb_insert_count - Helper function to insert special kind of 'count' tree. 173 */ 174 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) 175 { 176 struct rb_node **p = &root->rb_node; 177 struct rb_node *parent = NULL; 178 size_t e_ckey = e->count.key; 179 size_t e_skey = e->start.key; 180 181 while (*p) { 182 struct e_node *k = 183 rb_entry(parent = *p, struct e_node, count.node); 184 185 if (e_ckey > k->count.key) { 186 p = &(*p)->rb_left; 187 } else if (e_ckey < k->count.key) { 188 p = &(*p)->rb_right; 189 } else if (e_skey < k->start.key) { 190 p = &(*p)->rb_left; 191 } else if (e_skey > k->start.key) { 192 p = &(*p)->rb_right; 193 } else { 194 WARN_ON(1); 195 return false; 196 } 197 } 198 199 rb_link_node(&e->count.node, parent, p); 200 rb_insert_color(&e->count.node, root); 201 return true; 202 } 203 204 /* 205 * rb_insert_start - Helper function to insert special kind of 'count' tree. 206 */ 207 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) 208 { 209 struct rb_node **p = &root->rb_node; 210 struct rb_node *parent = NULL; 211 size_t e_skey = e->start.key; 212 213 while (*p) { 214 struct e_node *k; 215 216 parent = *p; 217 218 k = rb_entry(parent, struct e_node, start.node); 219 if (e_skey < k->start.key) { 220 p = &(*p)->rb_left; 221 } else if (e_skey > k->start.key) { 222 p = &(*p)->rb_right; 223 } else { 224 WARN_ON(1); 225 return false; 226 } 227 } 228 229 rb_link_node(&e->start.node, parent, p); 230 rb_insert_color(&e->start.node, root); 231 return true; 232 } 233 234 /* 235 * wnd_add_free_ext - Adds a new extent of free space. 236 * @build: 1 when building tree. 237 */ 238 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, 239 bool build) 240 { 241 struct e_node *e, *e0 = NULL; 242 size_t ib, end_in = bit + len; 243 struct rb_node *n; 244 245 if (build) { 246 /* Use extent_min to filter too short extents. */ 247 if (wnd->count >= NTFS_MAX_WND_EXTENTS && 248 len <= wnd->extent_min) { 249 wnd->uptodated = -1; 250 return; 251 } 252 } else { 253 /* Try to find extent before 'bit'. */ 254 n = rb_lookup(&wnd->start_tree, bit); 255 256 if (!n) { 257 n = rb_first(&wnd->start_tree); 258 } else { 259 e = rb_entry(n, struct e_node, start.node); 260 n = rb_next(n); 261 if (e->start.key + e->count.key == bit) { 262 /* Remove left. */ 263 bit = e->start.key; 264 len += e->count.key; 265 rb_erase(&e->start.node, &wnd->start_tree); 266 rb_erase(&e->count.node, &wnd->count_tree); 267 wnd->count -= 1; 268 e0 = e; 269 } 270 } 271 272 while (n) { 273 size_t next_end; 274 275 e = rb_entry(n, struct e_node, start.node); 276 next_end = e->start.key + e->count.key; 277 if (e->start.key > end_in) 278 break; 279 280 /* Remove right. */ 281 n = rb_next(n); 282 len += next_end - end_in; 283 end_in = next_end; 284 rb_erase(&e->start.node, &wnd->start_tree); 285 rb_erase(&e->count.node, &wnd->count_tree); 286 wnd->count -= 1; 287 288 if (!e0) 289 e0 = e; 290 else 291 kmem_cache_free(ntfs_enode_cachep, e); 292 } 293 294 if (wnd->uptodated != 1) { 295 /* Check bits before 'bit'. */ 296 ib = wnd->zone_bit == wnd->zone_end || 297 bit < wnd->zone_end 298 ? 0 299 : wnd->zone_end; 300 301 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { 302 bit -= 1; 303 len += 1; 304 } 305 306 /* Check bits after 'end_in'. */ 307 ib = wnd->zone_bit == wnd->zone_end || 308 end_in > wnd->zone_bit 309 ? wnd->nbits 310 : wnd->zone_bit; 311 312 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { 313 end_in += 1; 314 len += 1; 315 } 316 } 317 } 318 /* Insert new fragment. */ 319 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 320 if (e0) 321 kmem_cache_free(ntfs_enode_cachep, e0); 322 323 wnd->uptodated = -1; 324 325 /* Compare with smallest fragment. */ 326 n = rb_last(&wnd->count_tree); 327 e = rb_entry(n, struct e_node, count.node); 328 if (len <= e->count.key) 329 goto out; /* Do not insert small fragments. */ 330 331 if (build) { 332 struct e_node *e2; 333 334 n = rb_prev(n); 335 e2 = rb_entry(n, struct e_node, count.node); 336 /* Smallest fragment will be 'e2->count.key'. */ 337 wnd->extent_min = e2->count.key; 338 } 339 340 /* Replace smallest fragment by new one. */ 341 rb_erase(&e->start.node, &wnd->start_tree); 342 rb_erase(&e->count.node, &wnd->count_tree); 343 wnd->count -= 1; 344 } else { 345 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 346 if (!e) { 347 wnd->uptodated = -1; 348 goto out; 349 } 350 351 if (build && len <= wnd->extent_min) 352 wnd->extent_min = len; 353 } 354 e->start.key = bit; 355 e->count.key = len; 356 if (len > wnd->extent_max) 357 wnd->extent_max = len; 358 359 rb_insert_start(&wnd->start_tree, e); 360 rb_insert_count(&wnd->count_tree, e); 361 wnd->count += 1; 362 363 out:; 364 } 365 366 /* 367 * wnd_remove_free_ext - Remove a run from the cached free space. 368 */ 369 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) 370 { 371 struct rb_node *n, *n3; 372 struct e_node *e, *e3; 373 size_t end_in = bit + len; 374 size_t end3, end, new_key, new_len, max_new_len; 375 376 /* Try to find extent before 'bit'. */ 377 n = rb_lookup(&wnd->start_tree, bit); 378 379 if (!n) 380 return; 381 382 e = rb_entry(n, struct e_node, start.node); 383 end = e->start.key + e->count.key; 384 385 new_key = new_len = 0; 386 len = e->count.key; 387 388 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ 389 if (e->start.key > bit) 390 ; 391 else if (end_in <= end) { 392 /* Range [bit,end_in) inside 'e'. */ 393 new_key = end_in; 394 new_len = end - end_in; 395 len = bit - e->start.key; 396 } else if (bit > end) { 397 bool bmax = false; 398 399 n3 = rb_next(n); 400 401 while (n3) { 402 e3 = rb_entry(n3, struct e_node, start.node); 403 if (e3->start.key >= end_in) 404 break; 405 406 if (e3->count.key == wnd->extent_max) 407 bmax = true; 408 409 end3 = e3->start.key + e3->count.key; 410 if (end3 > end_in) { 411 e3->start.key = end_in; 412 rb_erase(&e3->count.node, &wnd->count_tree); 413 e3->count.key = end3 - end_in; 414 rb_insert_count(&wnd->count_tree, e3); 415 break; 416 } 417 418 n3 = rb_next(n3); 419 rb_erase(&e3->start.node, &wnd->start_tree); 420 rb_erase(&e3->count.node, &wnd->count_tree); 421 wnd->count -= 1; 422 kmem_cache_free(ntfs_enode_cachep, e3); 423 } 424 if (!bmax) 425 return; 426 n3 = rb_first(&wnd->count_tree); 427 wnd->extent_max = 428 n3 ? rb_entry(n3, struct e_node, count.node)->count.key 429 : 0; 430 return; 431 } 432 433 if (e->count.key != wnd->extent_max) { 434 ; 435 } else if (rb_prev(&e->count.node)) { 436 ; 437 } else { 438 n3 = rb_next(&e->count.node); 439 max_new_len = len > new_len ? len : new_len; 440 if (!n3) { 441 wnd->extent_max = max_new_len; 442 } else { 443 e3 = rb_entry(n3, struct e_node, count.node); 444 wnd->extent_max = max(e3->count.key, max_new_len); 445 } 446 } 447 448 if (!len) { 449 if (new_len) { 450 e->start.key = new_key; 451 rb_erase(&e->count.node, &wnd->count_tree); 452 e->count.key = new_len; 453 rb_insert_count(&wnd->count_tree, e); 454 } else { 455 rb_erase(&e->start.node, &wnd->start_tree); 456 rb_erase(&e->count.node, &wnd->count_tree); 457 wnd->count -= 1; 458 kmem_cache_free(ntfs_enode_cachep, e); 459 } 460 goto out; 461 } 462 rb_erase(&e->count.node, &wnd->count_tree); 463 e->count.key = len; 464 rb_insert_count(&wnd->count_tree, e); 465 466 if (!new_len) 467 goto out; 468 469 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 470 wnd->uptodated = -1; 471 472 /* Get minimal extent. */ 473 e = rb_entry(rb_last(&wnd->count_tree), struct e_node, 474 count.node); 475 if (e->count.key > new_len) 476 goto out; 477 478 /* Replace minimum. */ 479 rb_erase(&e->start.node, &wnd->start_tree); 480 rb_erase(&e->count.node, &wnd->count_tree); 481 wnd->count -= 1; 482 } else { 483 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 484 if (!e) 485 wnd->uptodated = -1; 486 } 487 488 if (e) { 489 e->start.key = new_key; 490 e->count.key = new_len; 491 rb_insert_start(&wnd->start_tree, e); 492 rb_insert_count(&wnd->count_tree, e); 493 wnd->count += 1; 494 } 495 496 out: 497 if (!wnd->count && 1 != wnd->uptodated) 498 wnd_rescan(wnd); 499 } 500 501 /* 502 * wnd_rescan - Scan all bitmap. Used while initialization. 503 */ 504 static int wnd_rescan(struct wnd_bitmap *wnd) 505 { 506 int err = 0; 507 size_t prev_tail = 0; 508 struct super_block *sb = wnd->sb; 509 struct ntfs_sb_info *sbi = sb->s_fs_info; 510 u64 lbo, len = 0; 511 u32 blocksize = sb->s_blocksize; 512 u8 cluster_bits = sbi->cluster_bits; 513 u32 wbits = 8 * sb->s_blocksize; 514 u32 used, frb; 515 const ulong *buf; 516 size_t wpos, wbit, iw, vbo; 517 struct buffer_head *bh = NULL; 518 CLST lcn, clen; 519 520 wnd->uptodated = 0; 521 wnd->extent_max = 0; 522 wnd->extent_min = MINUS_ONE_T; 523 wnd->total_zeroes = 0; 524 525 vbo = 0; 526 527 for (iw = 0; iw < wnd->nwnd; iw++) { 528 if (iw + 1 == wnd->nwnd) 529 wbits = wnd->bits_last; 530 531 if (wnd->inited) { 532 if (!wnd->free_bits[iw]) { 533 /* All ones. */ 534 if (prev_tail) { 535 wnd_add_free_ext(wnd, 536 vbo * 8 - prev_tail, 537 prev_tail, true); 538 prev_tail = 0; 539 } 540 goto next_wnd; 541 } 542 if (wbits == wnd->free_bits[iw]) { 543 /* All zeroes. */ 544 prev_tail += wbits; 545 wnd->total_zeroes += wbits; 546 goto next_wnd; 547 } 548 } 549 550 if (!len) { 551 u32 off = vbo & sbi->cluster_mask; 552 553 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, 554 &lcn, &clen, NULL)) { 555 err = -ENOENT; 556 goto out; 557 } 558 559 lbo = ((u64)lcn << cluster_bits) + off; 560 len = ((u64)clen << cluster_bits) - off; 561 } 562 563 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 564 if (!bh) { 565 err = -EIO; 566 goto out; 567 } 568 569 buf = (ulong *)bh->b_data; 570 571 used = __bitmap_weight(buf, wbits); 572 if (used < wbits) { 573 frb = wbits - used; 574 wnd->free_bits[iw] = frb; 575 wnd->total_zeroes += frb; 576 } 577 578 wpos = 0; 579 wbit = vbo * 8; 580 581 if (wbit + wbits > wnd->nbits) 582 wbits = wnd->nbits - wbit; 583 584 do { 585 used = find_next_zero_bit(buf, wbits, wpos); 586 587 if (used > wpos && prev_tail) { 588 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 589 prev_tail, true); 590 prev_tail = 0; 591 } 592 593 wpos = used; 594 595 if (wpos >= wbits) { 596 /* No free blocks. */ 597 prev_tail = 0; 598 break; 599 } 600 601 frb = find_next_bit(buf, wbits, wpos); 602 if (frb >= wbits) { 603 /* Keep last free block. */ 604 prev_tail += frb - wpos; 605 break; 606 } 607 608 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 609 frb + prev_tail - wpos, true); 610 611 /* Skip free block and first '1'. */ 612 wpos = frb + 1; 613 /* Reset previous tail. */ 614 prev_tail = 0; 615 } while (wpos < wbits); 616 617 next_wnd: 618 619 if (bh) 620 put_bh(bh); 621 bh = NULL; 622 623 vbo += blocksize; 624 if (len) { 625 len -= blocksize; 626 lbo += blocksize; 627 } 628 } 629 630 /* Add last block. */ 631 if (prev_tail) 632 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); 633 634 /* 635 * Before init cycle wnd->uptodated was 0. 636 * If any errors or limits occurs while initialization then 637 * wnd->uptodated will be -1. 638 * If 'uptodated' is still 0 then Tree is really updated. 639 */ 640 if (!wnd->uptodated) 641 wnd->uptodated = 1; 642 643 if (wnd->zone_bit != wnd->zone_end) { 644 size_t zlen = wnd->zone_end - wnd->zone_bit; 645 646 wnd->zone_end = wnd->zone_bit; 647 wnd_zone_set(wnd, wnd->zone_bit, zlen); 648 } 649 650 out: 651 return err; 652 } 653 654 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) 655 { 656 int err; 657 u32 blocksize = sb->s_blocksize; 658 u32 wbits = blocksize * 8; 659 660 init_rwsem(&wnd->rw_lock); 661 662 wnd->sb = sb; 663 wnd->nbits = nbits; 664 wnd->total_zeroes = nbits; 665 wnd->extent_max = MINUS_ONE_T; 666 wnd->zone_bit = wnd->zone_end = 0; 667 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits)); 668 wnd->bits_last = nbits & (wbits - 1); 669 if (!wnd->bits_last) 670 wnd->bits_last = wbits; 671 672 wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS); 673 if (!wnd->free_bits) 674 return -ENOMEM; 675 676 err = wnd_rescan(wnd); 677 if (err) 678 return err; 679 680 wnd->inited = true; 681 682 return 0; 683 } 684 685 /* 686 * wnd_map - Call sb_bread for requested window. 687 */ 688 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) 689 { 690 size_t vbo; 691 CLST lcn, clen; 692 struct super_block *sb = wnd->sb; 693 struct ntfs_sb_info *sbi; 694 struct buffer_head *bh; 695 u64 lbo; 696 697 sbi = sb->s_fs_info; 698 vbo = (u64)iw << sb->s_blocksize_bits; 699 700 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, 701 NULL)) { 702 return ERR_PTR(-ENOENT); 703 } 704 705 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); 706 707 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); 708 if (!bh) 709 return ERR_PTR(-EIO); 710 711 return bh; 712 } 713 714 /* 715 * wnd_set_free - Mark the bits range from bit to bit + bits as free. 716 */ 717 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 718 { 719 int err = 0; 720 struct super_block *sb = wnd->sb; 721 size_t bits0 = bits; 722 u32 wbits = 8 * sb->s_blocksize; 723 size_t iw = bit >> (sb->s_blocksize_bits + 3); 724 u32 wbit = bit & (wbits - 1); 725 struct buffer_head *bh; 726 727 while (iw < wnd->nwnd && bits) { 728 u32 tail, op; 729 ulong *buf; 730 731 if (iw + 1 == wnd->nwnd) 732 wbits = wnd->bits_last; 733 734 tail = wbits - wbit; 735 op = tail < bits ? tail : bits; 736 737 bh = wnd_map(wnd, iw); 738 if (IS_ERR(bh)) { 739 err = PTR_ERR(bh); 740 break; 741 } 742 743 buf = (ulong *)bh->b_data; 744 745 lock_buffer(bh); 746 747 __bitmap_clear(buf, wbit, op); 748 749 wnd->free_bits[iw] += op; 750 751 set_buffer_uptodate(bh); 752 mark_buffer_dirty(bh); 753 unlock_buffer(bh); 754 put_bh(bh); 755 756 wnd->total_zeroes += op; 757 bits -= op; 758 wbit = 0; 759 iw += 1; 760 } 761 762 wnd_add_free_ext(wnd, bit, bits0, false); 763 764 return err; 765 } 766 767 /* 768 * wnd_set_used - Mark the bits range from bit to bit + bits as used. 769 */ 770 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 771 { 772 int err = 0; 773 struct super_block *sb = wnd->sb; 774 size_t bits0 = bits; 775 size_t iw = bit >> (sb->s_blocksize_bits + 3); 776 u32 wbits = 8 * sb->s_blocksize; 777 u32 wbit = bit & (wbits - 1); 778 struct buffer_head *bh; 779 780 while (iw < wnd->nwnd && bits) { 781 u32 tail, op; 782 ulong *buf; 783 784 if (unlikely(iw + 1 == wnd->nwnd)) 785 wbits = wnd->bits_last; 786 787 tail = wbits - wbit; 788 op = tail < bits ? tail : bits; 789 790 bh = wnd_map(wnd, iw); 791 if (IS_ERR(bh)) { 792 err = PTR_ERR(bh); 793 break; 794 } 795 buf = (ulong *)bh->b_data; 796 797 lock_buffer(bh); 798 799 __bitmap_set(buf, wbit, op); 800 wnd->free_bits[iw] -= op; 801 802 set_buffer_uptodate(bh); 803 mark_buffer_dirty(bh); 804 unlock_buffer(bh); 805 put_bh(bh); 806 807 wnd->total_zeroes -= op; 808 bits -= op; 809 wbit = 0; 810 iw += 1; 811 } 812 813 if (!RB_EMPTY_ROOT(&wnd->start_tree)) 814 wnd_remove_free_ext(wnd, bit, bits0); 815 816 return err; 817 } 818 819 /* 820 * wnd_is_free_hlp 821 * 822 * Return: True if all clusters [bit, bit+bits) are free (bitmap only). 823 */ 824 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) 825 { 826 struct super_block *sb = wnd->sb; 827 size_t iw = bit >> (sb->s_blocksize_bits + 3); 828 u32 wbits = 8 * sb->s_blocksize; 829 u32 wbit = bit & (wbits - 1); 830 831 while (iw < wnd->nwnd && bits) { 832 u32 tail, op; 833 834 if (unlikely(iw + 1 == wnd->nwnd)) 835 wbits = wnd->bits_last; 836 837 tail = wbits - wbit; 838 op = tail < bits ? tail : bits; 839 840 if (wbits != wnd->free_bits[iw]) { 841 bool ret; 842 struct buffer_head *bh = wnd_map(wnd, iw); 843 844 if (IS_ERR(bh)) 845 return false; 846 847 ret = are_bits_clear((ulong *)bh->b_data, wbit, op); 848 849 put_bh(bh); 850 if (!ret) 851 return false; 852 } 853 854 bits -= op; 855 wbit = 0; 856 iw += 1; 857 } 858 859 return true; 860 } 861 862 /* 863 * wnd_is_free 864 * 865 * Return: True if all clusters [bit, bit+bits) are free. 866 */ 867 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 868 { 869 bool ret; 870 struct rb_node *n; 871 size_t end; 872 struct e_node *e; 873 874 if (RB_EMPTY_ROOT(&wnd->start_tree)) 875 goto use_wnd; 876 877 n = rb_lookup(&wnd->start_tree, bit); 878 if (!n) 879 goto use_wnd; 880 881 e = rb_entry(n, struct e_node, start.node); 882 883 end = e->start.key + e->count.key; 884 885 if (bit < end && bit + bits <= end) 886 return true; 887 888 use_wnd: 889 ret = wnd_is_free_hlp(wnd, bit, bits); 890 891 return ret; 892 } 893 894 /* 895 * wnd_is_used 896 * 897 * Return: True if all clusters [bit, bit+bits) are used. 898 */ 899 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 900 { 901 bool ret = false; 902 struct super_block *sb = wnd->sb; 903 size_t iw = bit >> (sb->s_blocksize_bits + 3); 904 u32 wbits = 8 * sb->s_blocksize; 905 u32 wbit = bit & (wbits - 1); 906 size_t end; 907 struct rb_node *n; 908 struct e_node *e; 909 910 if (RB_EMPTY_ROOT(&wnd->start_tree)) 911 goto use_wnd; 912 913 end = bit + bits; 914 n = rb_lookup(&wnd->start_tree, end - 1); 915 if (!n) 916 goto use_wnd; 917 918 e = rb_entry(n, struct e_node, start.node); 919 if (e->start.key + e->count.key > bit) 920 return false; 921 922 use_wnd: 923 while (iw < wnd->nwnd && bits) { 924 u32 tail, op; 925 926 if (unlikely(iw + 1 == wnd->nwnd)) 927 wbits = wnd->bits_last; 928 929 tail = wbits - wbit; 930 op = tail < bits ? tail : bits; 931 932 if (wnd->free_bits[iw]) { 933 bool ret; 934 struct buffer_head *bh = wnd_map(wnd, iw); 935 936 if (IS_ERR(bh)) 937 goto out; 938 939 ret = are_bits_set((ulong *)bh->b_data, wbit, op); 940 put_bh(bh); 941 if (!ret) 942 goto out; 943 } 944 945 bits -= op; 946 wbit = 0; 947 iw += 1; 948 } 949 ret = true; 950 951 out: 952 return ret; 953 } 954 955 /* 956 * wnd_find - Look for free space. 957 * 958 * - flags - BITMAP_FIND_XXX flags 959 * 960 * Return: 0 if not found. 961 */ 962 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, 963 size_t flags, size_t *allocated) 964 { 965 struct super_block *sb; 966 u32 wbits, wpos, wzbit, wzend; 967 size_t fnd, max_alloc, b_len, b_pos; 968 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; 969 size_t to_alloc0 = to_alloc; 970 const ulong *buf; 971 const struct e_node *e; 972 const struct rb_node *pr, *cr; 973 u8 log2_bits; 974 bool fbits_valid; 975 struct buffer_head *bh; 976 977 /* Fast checking for available free space. */ 978 if (flags & BITMAP_FIND_FULL) { 979 size_t zeroes = wnd_zeroes(wnd); 980 981 zeroes -= wnd->zone_end - wnd->zone_bit; 982 if (zeroes < to_alloc0) 983 goto no_space; 984 985 if (to_alloc0 > wnd->extent_max) 986 goto no_space; 987 } else { 988 if (to_alloc > wnd->extent_max) 989 to_alloc = wnd->extent_max; 990 } 991 992 if (wnd->zone_bit <= hint && hint < wnd->zone_end) 993 hint = wnd->zone_end; 994 995 max_alloc = wnd->nbits; 996 b_len = b_pos = 0; 997 998 if (hint >= max_alloc) 999 hint = 0; 1000 1001 if (RB_EMPTY_ROOT(&wnd->start_tree)) { 1002 if (wnd->uptodated == 1) { 1003 /* Extents tree is updated -> No free space. */ 1004 goto no_space; 1005 } 1006 goto scan_bitmap; 1007 } 1008 1009 e = NULL; 1010 if (!hint) 1011 goto allocate_biggest; 1012 1013 /* Use hint: Enumerate extents by start >= hint. */ 1014 pr = NULL; 1015 cr = wnd->start_tree.rb_node; 1016 1017 for (;;) { 1018 e = rb_entry(cr, struct e_node, start.node); 1019 1020 if (e->start.key == hint) 1021 break; 1022 1023 if (e->start.key < hint) { 1024 pr = cr; 1025 cr = cr->rb_right; 1026 if (!cr) 1027 break; 1028 continue; 1029 } 1030 1031 cr = cr->rb_left; 1032 if (!cr) { 1033 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; 1034 break; 1035 } 1036 } 1037 1038 if (!e) 1039 goto allocate_biggest; 1040 1041 if (e->start.key + e->count.key > hint) { 1042 /* We have found extension with 'hint' inside. */ 1043 size_t len = e->start.key + e->count.key - hint; 1044 1045 if (len >= to_alloc && hint + to_alloc <= max_alloc) { 1046 fnd = hint; 1047 goto found; 1048 } 1049 1050 if (!(flags & BITMAP_FIND_FULL)) { 1051 if (len > to_alloc) 1052 len = to_alloc; 1053 1054 if (hint + len <= max_alloc) { 1055 fnd = hint; 1056 to_alloc = len; 1057 goto found; 1058 } 1059 } 1060 } 1061 1062 allocate_biggest: 1063 /* Allocate from biggest free extent. */ 1064 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); 1065 if (e->count.key != wnd->extent_max) 1066 wnd->extent_max = e->count.key; 1067 1068 if (e->count.key < max_alloc) { 1069 if (e->count.key >= to_alloc) { 1070 ; 1071 } else if (flags & BITMAP_FIND_FULL) { 1072 if (e->count.key < to_alloc0) { 1073 /* Biggest free block is less then requested. */ 1074 goto no_space; 1075 } 1076 to_alloc = e->count.key; 1077 } else if (-1 != wnd->uptodated) { 1078 to_alloc = e->count.key; 1079 } else { 1080 /* Check if we can use more bits. */ 1081 size_t op, max_check; 1082 struct rb_root start_tree; 1083 1084 memcpy(&start_tree, &wnd->start_tree, 1085 sizeof(struct rb_root)); 1086 memset(&wnd->start_tree, 0, sizeof(struct rb_root)); 1087 1088 max_check = e->start.key + to_alloc; 1089 if (max_check > max_alloc) 1090 max_check = max_alloc; 1091 for (op = e->start.key + e->count.key; op < max_check; 1092 op++) { 1093 if (!wnd_is_free(wnd, op, 1)) 1094 break; 1095 } 1096 memcpy(&wnd->start_tree, &start_tree, 1097 sizeof(struct rb_root)); 1098 to_alloc = op - e->start.key; 1099 } 1100 1101 /* Prepare to return. */ 1102 fnd = e->start.key; 1103 if (e->start.key + to_alloc > max_alloc) 1104 to_alloc = max_alloc - e->start.key; 1105 goto found; 1106 } 1107 1108 if (wnd->uptodated == 1) { 1109 /* Extents tree is updated -> no free space. */ 1110 goto no_space; 1111 } 1112 1113 b_len = e->count.key; 1114 b_pos = e->start.key; 1115 1116 scan_bitmap: 1117 sb = wnd->sb; 1118 log2_bits = sb->s_blocksize_bits + 3; 1119 1120 /* At most two ranges [hint, max_alloc) + [0, hint) */ 1121 Again: 1122 1123 /* TODO: Optimize request for case nbits > wbits. */ 1124 iw = hint >> log2_bits; 1125 wbits = sb->s_blocksize * 8; 1126 wpos = hint & (wbits - 1); 1127 prev_tail = 0; 1128 fbits_valid = true; 1129 1130 if (max_alloc == wnd->nbits) { 1131 nwnd = wnd->nwnd; 1132 } else { 1133 size_t t = max_alloc + wbits - 1; 1134 1135 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; 1136 } 1137 1138 /* Enumerate all windows. */ 1139 for (; iw < nwnd; iw++) { 1140 wbit = iw << log2_bits; 1141 1142 if (!wnd->free_bits[iw]) { 1143 if (prev_tail > b_len) { 1144 b_pos = wbit - prev_tail; 1145 b_len = prev_tail; 1146 } 1147 1148 /* Skip full used window. */ 1149 prev_tail = 0; 1150 wpos = 0; 1151 continue; 1152 } 1153 1154 if (unlikely(iw + 1 == nwnd)) { 1155 if (max_alloc == wnd->nbits) { 1156 wbits = wnd->bits_last; 1157 } else { 1158 size_t t = max_alloc & (wbits - 1); 1159 1160 if (t) { 1161 wbits = t; 1162 fbits_valid = false; 1163 } 1164 } 1165 } 1166 1167 if (wnd->zone_end > wnd->zone_bit) { 1168 ebit = wbit + wbits; 1169 zbit = max(wnd->zone_bit, wbit); 1170 zend = min(wnd->zone_end, ebit); 1171 1172 /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ 1173 if (zend <= zbit) { 1174 /* Zone does not overlap window. */ 1175 } else { 1176 wzbit = zbit - wbit; 1177 wzend = zend - wbit; 1178 1179 /* Zone overlaps window. */ 1180 if (wnd->free_bits[iw] == wzend - wzbit) { 1181 prev_tail = 0; 1182 wpos = 0; 1183 continue; 1184 } 1185 1186 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ 1187 bh = wnd_map(wnd, iw); 1188 1189 if (IS_ERR(bh)) { 1190 /* TODO: Error */ 1191 prev_tail = 0; 1192 wpos = 0; 1193 continue; 1194 } 1195 1196 buf = (ulong *)bh->b_data; 1197 1198 /* Scan range [wbit, zbit). */ 1199 if (wpos < wzbit) { 1200 /* Scan range [wpos, zbit). */ 1201 fnd = wnd_scan(buf, wbit, wpos, wzbit, 1202 to_alloc, &prev_tail, 1203 &b_pos, &b_len); 1204 if (fnd != MINUS_ONE_T) { 1205 put_bh(bh); 1206 goto found; 1207 } 1208 } 1209 1210 prev_tail = 0; 1211 1212 /* Scan range [zend, ebit). */ 1213 if (wzend < wbits) { 1214 fnd = wnd_scan(buf, wbit, 1215 max(wzend, wpos), wbits, 1216 to_alloc, &prev_tail, 1217 &b_pos, &b_len); 1218 if (fnd != MINUS_ONE_T) { 1219 put_bh(bh); 1220 goto found; 1221 } 1222 } 1223 1224 wpos = 0; 1225 put_bh(bh); 1226 continue; 1227 } 1228 } 1229 1230 /* Current window does not overlap zone. */ 1231 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { 1232 /* Window is empty. */ 1233 if (prev_tail + wbits >= to_alloc) { 1234 fnd = wbit + wpos - prev_tail; 1235 goto found; 1236 } 1237 1238 /* Increase 'prev_tail' and process next window. */ 1239 prev_tail += wbits; 1240 wpos = 0; 1241 continue; 1242 } 1243 1244 /* Read window */ 1245 bh = wnd_map(wnd, iw); 1246 if (IS_ERR(bh)) { 1247 // TODO: Error. 1248 prev_tail = 0; 1249 wpos = 0; 1250 continue; 1251 } 1252 1253 buf = (ulong *)bh->b_data; 1254 1255 /* Scan range [wpos, eBits). */ 1256 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, 1257 &b_pos, &b_len); 1258 put_bh(bh); 1259 if (fnd != MINUS_ONE_T) 1260 goto found; 1261 } 1262 1263 if (b_len < prev_tail) { 1264 /* The last fragment. */ 1265 b_len = prev_tail; 1266 b_pos = max_alloc - prev_tail; 1267 } 1268 1269 if (hint) { 1270 /* 1271 * We have scanned range [hint max_alloc). 1272 * Prepare to scan range [0 hint + to_alloc). 1273 */ 1274 size_t nextmax = hint + to_alloc; 1275 1276 if (likely(nextmax >= hint) && nextmax < max_alloc) 1277 max_alloc = nextmax; 1278 hint = 0; 1279 goto Again; 1280 } 1281 1282 if (!b_len) 1283 goto no_space; 1284 1285 wnd->extent_max = b_len; 1286 1287 if (flags & BITMAP_FIND_FULL) 1288 goto no_space; 1289 1290 fnd = b_pos; 1291 to_alloc = b_len; 1292 1293 found: 1294 if (flags & BITMAP_FIND_MARK_AS_USED) { 1295 /* TODO: Optimize remove extent (pass 'e'?). */ 1296 if (wnd_set_used(wnd, fnd, to_alloc)) 1297 goto no_space; 1298 } else if (wnd->extent_max != MINUS_ONE_T && 1299 to_alloc > wnd->extent_max) { 1300 wnd->extent_max = to_alloc; 1301 } 1302 1303 *allocated = fnd; 1304 return to_alloc; 1305 1306 no_space: 1307 return 0; 1308 } 1309 1310 /* 1311 * wnd_extend - Extend bitmap ($MFT bitmap). 1312 */ 1313 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) 1314 { 1315 int err; 1316 struct super_block *sb = wnd->sb; 1317 struct ntfs_sb_info *sbi = sb->s_fs_info; 1318 u32 blocksize = sb->s_blocksize; 1319 u32 wbits = blocksize * 8; 1320 u32 b0, new_last; 1321 size_t bits, iw, new_wnd; 1322 size_t old_bits = wnd->nbits; 1323 u16 *new_free; 1324 1325 if (new_bits <= old_bits) 1326 return -EINVAL; 1327 1328 /* Align to 8 byte boundary. */ 1329 new_wnd = bytes_to_block(sb, bitmap_size(new_bits)); 1330 new_last = new_bits & (wbits - 1); 1331 if (!new_last) 1332 new_last = wbits; 1333 1334 if (new_wnd != wnd->nwnd) { 1335 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS); 1336 if (!new_free) 1337 return -ENOMEM; 1338 1339 if (new_free != wnd->free_bits) 1340 memcpy(new_free, wnd->free_bits, 1341 wnd->nwnd * sizeof(short)); 1342 memset(new_free + wnd->nwnd, 0, 1343 (new_wnd - wnd->nwnd) * sizeof(short)); 1344 kfree(wnd->free_bits); 1345 wnd->free_bits = new_free; 1346 } 1347 1348 /* Zero bits [old_bits,new_bits). */ 1349 bits = new_bits - old_bits; 1350 b0 = old_bits & (wbits - 1); 1351 1352 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { 1353 u32 op; 1354 size_t frb; 1355 u64 vbo, lbo, bytes; 1356 struct buffer_head *bh; 1357 ulong *buf; 1358 1359 if (iw + 1 == new_wnd) 1360 wbits = new_last; 1361 1362 op = b0 + bits > wbits ? wbits - b0 : bits; 1363 vbo = (u64)iw * blocksize; 1364 1365 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); 1366 if (err) 1367 break; 1368 1369 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 1370 if (!bh) 1371 return -EIO; 1372 1373 lock_buffer(bh); 1374 buf = (ulong *)bh->b_data; 1375 1376 __bitmap_clear(buf, b0, blocksize * 8 - b0); 1377 frb = wbits - __bitmap_weight(buf, wbits); 1378 wnd->total_zeroes += frb - wnd->free_bits[iw]; 1379 wnd->free_bits[iw] = frb; 1380 1381 set_buffer_uptodate(bh); 1382 mark_buffer_dirty(bh); 1383 unlock_buffer(bh); 1384 /* err = sync_dirty_buffer(bh); */ 1385 1386 b0 = 0; 1387 bits -= op; 1388 } 1389 1390 wnd->nbits = new_bits; 1391 wnd->nwnd = new_wnd; 1392 wnd->bits_last = new_last; 1393 1394 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); 1395 1396 return 0; 1397 } 1398 1399 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) 1400 { 1401 size_t zlen; 1402 1403 zlen = wnd->zone_end - wnd->zone_bit; 1404 if (zlen) 1405 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); 1406 1407 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) 1408 wnd_remove_free_ext(wnd, lcn, len); 1409 1410 wnd->zone_bit = lcn; 1411 wnd->zone_end = lcn + len; 1412 } 1413 1414 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) 1415 { 1416 int err = 0; 1417 struct super_block *sb = sbi->sb; 1418 struct wnd_bitmap *wnd = &sbi->used.bitmap; 1419 u32 wbits = 8 * sb->s_blocksize; 1420 CLST len = 0, lcn = 0, done = 0; 1421 CLST minlen = bytes_to_cluster(sbi, range->minlen); 1422 CLST lcn_from = bytes_to_cluster(sbi, range->start); 1423 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); 1424 u32 wbit = lcn_from & (wbits - 1); 1425 const ulong *buf; 1426 CLST lcn_to; 1427 1428 if (!minlen) 1429 minlen = 1; 1430 1431 if (range->len == (u64)-1) 1432 lcn_to = wnd->nbits; 1433 else 1434 lcn_to = bytes_to_cluster(sbi, range->start + range->len); 1435 1436 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); 1437 1438 for (; iw < wnd->nbits; iw++, wbit = 0) { 1439 CLST lcn_wnd = iw * wbits; 1440 struct buffer_head *bh; 1441 1442 if (lcn_wnd > lcn_to) 1443 break; 1444 1445 if (!wnd->free_bits[iw]) 1446 continue; 1447 1448 if (iw + 1 == wnd->nwnd) 1449 wbits = wnd->bits_last; 1450 1451 if (lcn_wnd + wbits > lcn_to) 1452 wbits = lcn_to - lcn_wnd; 1453 1454 bh = wnd_map(wnd, iw); 1455 if (IS_ERR(bh)) { 1456 err = PTR_ERR(bh); 1457 break; 1458 } 1459 1460 buf = (ulong *)bh->b_data; 1461 1462 for (; wbit < wbits; wbit++) { 1463 if (!test_bit(wbit, buf)) { 1464 if (!len) 1465 lcn = lcn_wnd + wbit; 1466 len += 1; 1467 continue; 1468 } 1469 if (len >= minlen) { 1470 err = ntfs_discard(sbi, lcn, len); 1471 if (err) 1472 goto out; 1473 done += len; 1474 } 1475 len = 0; 1476 } 1477 put_bh(bh); 1478 } 1479 1480 /* Process the last fragment. */ 1481 if (len >= minlen) { 1482 err = ntfs_discard(sbi, lcn, len); 1483 if (err) 1484 goto out; 1485 done += len; 1486 } 1487 1488 out: 1489 range->len = (u64)done << sbi->cluster_bits; 1490 1491 up_read(&wnd->rw_lock); 1492 1493 return err; 1494 } 1495