1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * 4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. 5 * 6 * This code builds two trees of free clusters extents. 7 * Trees are sorted by start of extent and by length of extent. 8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. 9 * In extreme case code reads on-disk bitmap to find free clusters. 10 * 11 */ 12 13 #include <linux/blkdev.h> 14 #include <linux/buffer_head.h> 15 #include <linux/fs.h> 16 #include <linux/nls.h> 17 18 #include "debug.h" 19 #include "ntfs.h" 20 #include "ntfs_fs.h" 21 22 /* 23 * Maximum number of extents in tree. 24 */ 25 #define NTFS_MAX_WND_EXTENTS (32u * 1024u) 26 27 struct rb_node_key { 28 struct rb_node node; 29 size_t key; 30 }; 31 32 struct e_node { 33 struct rb_node_key start; /* Tree sorted by start. */ 34 struct rb_node_key count; /* Tree sorted by len. */ 35 }; 36 37 static int wnd_rescan(struct wnd_bitmap *wnd); 38 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); 39 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); 40 41 static struct kmem_cache *ntfs_enode_cachep; 42 43 int __init ntfs3_init_bitmap(void) 44 { 45 ntfs_enode_cachep = 46 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, 47 SLAB_RECLAIM_ACCOUNT, NULL); 48 return ntfs_enode_cachep ? 0 : -ENOMEM; 49 } 50 51 void ntfs3_exit_bitmap(void) 52 { 53 kmem_cache_destroy(ntfs_enode_cachep); 54 } 55 56 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i) 57 { 58 return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8; 59 } 60 61 /* 62 * wnd_scan 63 * 64 * b_pos + b_len - biggest fragment. 65 * Scan range [wpos wbits) window @buf. 66 * 67 * Return: -1 if not found. 68 */ 69 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, 70 size_t to_alloc, size_t *prev_tail, size_t *b_pos, 71 size_t *b_len) 72 { 73 while (wpos < wend) { 74 size_t free_len; 75 u32 free_bits, end; 76 u32 used = find_next_zero_bit(buf, wend, wpos); 77 78 if (used >= wend) { 79 if (*b_len < *prev_tail) { 80 *b_pos = wbit - *prev_tail; 81 *b_len = *prev_tail; 82 } 83 84 *prev_tail = 0; 85 return -1; 86 } 87 88 if (used > wpos) { 89 wpos = used; 90 if (*b_len < *prev_tail) { 91 *b_pos = wbit - *prev_tail; 92 *b_len = *prev_tail; 93 } 94 95 *prev_tail = 0; 96 } 97 98 /* 99 * Now we have a fragment [wpos, wend) staring with 0. 100 */ 101 end = wpos + to_alloc - *prev_tail; 102 free_bits = find_next_bit(buf, min(end, wend), wpos); 103 104 free_len = *prev_tail + free_bits - wpos; 105 106 if (*b_len < free_len) { 107 *b_pos = wbit + wpos - *prev_tail; 108 *b_len = free_len; 109 } 110 111 if (free_len >= to_alloc) 112 return wbit + wpos - *prev_tail; 113 114 if (free_bits >= wend) { 115 *prev_tail += free_bits - wpos; 116 return -1; 117 } 118 119 wpos = free_bits + 1; 120 121 *prev_tail = 0; 122 } 123 124 return -1; 125 } 126 127 /* 128 * wnd_close - Frees all resources. 129 */ 130 void wnd_close(struct wnd_bitmap *wnd) 131 { 132 struct rb_node *node, *next; 133 134 kfree(wnd->free_bits); 135 run_close(&wnd->run); 136 137 node = rb_first(&wnd->start_tree); 138 139 while (node) { 140 next = rb_next(node); 141 rb_erase(node, &wnd->start_tree); 142 kmem_cache_free(ntfs_enode_cachep, 143 rb_entry(node, struct e_node, start.node)); 144 node = next; 145 } 146 } 147 148 static struct rb_node *rb_lookup(struct rb_root *root, size_t v) 149 { 150 struct rb_node **p = &root->rb_node; 151 struct rb_node *r = NULL; 152 153 while (*p) { 154 struct rb_node_key *k; 155 156 k = rb_entry(*p, struct rb_node_key, node); 157 if (v < k->key) { 158 p = &(*p)->rb_left; 159 } else if (v > k->key) { 160 r = &k->node; 161 p = &(*p)->rb_right; 162 } else { 163 return &k->node; 164 } 165 } 166 167 return r; 168 } 169 170 /* 171 * rb_insert_count - Helper function to insert special kind of 'count' tree. 172 */ 173 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) 174 { 175 struct rb_node **p = &root->rb_node; 176 struct rb_node *parent = NULL; 177 size_t e_ckey = e->count.key; 178 size_t e_skey = e->start.key; 179 180 while (*p) { 181 struct e_node *k = 182 rb_entry(parent = *p, struct e_node, count.node); 183 184 if (e_ckey > k->count.key) { 185 p = &(*p)->rb_left; 186 } else if (e_ckey < k->count.key) { 187 p = &(*p)->rb_right; 188 } else if (e_skey < k->start.key) { 189 p = &(*p)->rb_left; 190 } else if (e_skey > k->start.key) { 191 p = &(*p)->rb_right; 192 } else { 193 WARN_ON(1); 194 return false; 195 } 196 } 197 198 rb_link_node(&e->count.node, parent, p); 199 rb_insert_color(&e->count.node, root); 200 return true; 201 } 202 203 /* 204 * rb_insert_start - Helper function to insert special kind of 'count' tree. 205 */ 206 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) 207 { 208 struct rb_node **p = &root->rb_node; 209 struct rb_node *parent = NULL; 210 size_t e_skey = e->start.key; 211 212 while (*p) { 213 struct e_node *k; 214 215 parent = *p; 216 217 k = rb_entry(parent, struct e_node, start.node); 218 if (e_skey < k->start.key) { 219 p = &(*p)->rb_left; 220 } else if (e_skey > k->start.key) { 221 p = &(*p)->rb_right; 222 } else { 223 WARN_ON(1); 224 return false; 225 } 226 } 227 228 rb_link_node(&e->start.node, parent, p); 229 rb_insert_color(&e->start.node, root); 230 return true; 231 } 232 233 /* 234 * wnd_add_free_ext - Adds a new extent of free space. 235 * @build: 1 when building tree. 236 */ 237 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, 238 bool build) 239 { 240 struct e_node *e, *e0 = NULL; 241 size_t ib, end_in = bit + len; 242 struct rb_node *n; 243 244 if (build) { 245 /* Use extent_min to filter too short extents. */ 246 if (wnd->count >= NTFS_MAX_WND_EXTENTS && 247 len <= wnd->extent_min) { 248 wnd->uptodated = -1; 249 return; 250 } 251 } else { 252 /* Try to find extent before 'bit'. */ 253 n = rb_lookup(&wnd->start_tree, bit); 254 255 if (!n) { 256 n = rb_first(&wnd->start_tree); 257 } else { 258 e = rb_entry(n, struct e_node, start.node); 259 n = rb_next(n); 260 if (e->start.key + e->count.key == bit) { 261 /* Remove left. */ 262 bit = e->start.key; 263 len += e->count.key; 264 rb_erase(&e->start.node, &wnd->start_tree); 265 rb_erase(&e->count.node, &wnd->count_tree); 266 wnd->count -= 1; 267 e0 = e; 268 } 269 } 270 271 while (n) { 272 size_t next_end; 273 274 e = rb_entry(n, struct e_node, start.node); 275 next_end = e->start.key + e->count.key; 276 if (e->start.key > end_in) 277 break; 278 279 /* Remove right. */ 280 n = rb_next(n); 281 len += next_end - end_in; 282 end_in = next_end; 283 rb_erase(&e->start.node, &wnd->start_tree); 284 rb_erase(&e->count.node, &wnd->count_tree); 285 wnd->count -= 1; 286 287 if (!e0) 288 e0 = e; 289 else 290 kmem_cache_free(ntfs_enode_cachep, e); 291 } 292 293 if (wnd->uptodated != 1) { 294 /* Check bits before 'bit'. */ 295 ib = wnd->zone_bit == wnd->zone_end || 296 bit < wnd->zone_end 297 ? 0 298 : wnd->zone_end; 299 300 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { 301 bit -= 1; 302 len += 1; 303 } 304 305 /* Check bits after 'end_in'. */ 306 ib = wnd->zone_bit == wnd->zone_end || 307 end_in > wnd->zone_bit 308 ? wnd->nbits 309 : wnd->zone_bit; 310 311 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { 312 end_in += 1; 313 len += 1; 314 } 315 } 316 } 317 /* Insert new fragment. */ 318 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 319 if (e0) 320 kmem_cache_free(ntfs_enode_cachep, e0); 321 322 wnd->uptodated = -1; 323 324 /* Compare with smallest fragment. */ 325 n = rb_last(&wnd->count_tree); 326 e = rb_entry(n, struct e_node, count.node); 327 if (len <= e->count.key) 328 goto out; /* Do not insert small fragments. */ 329 330 if (build) { 331 struct e_node *e2; 332 333 n = rb_prev(n); 334 e2 = rb_entry(n, struct e_node, count.node); 335 /* Smallest fragment will be 'e2->count.key'. */ 336 wnd->extent_min = e2->count.key; 337 } 338 339 /* Replace smallest fragment by new one. */ 340 rb_erase(&e->start.node, &wnd->start_tree); 341 rb_erase(&e->count.node, &wnd->count_tree); 342 wnd->count -= 1; 343 } else { 344 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 345 if (!e) { 346 wnd->uptodated = -1; 347 goto out; 348 } 349 350 if (build && len <= wnd->extent_min) 351 wnd->extent_min = len; 352 } 353 e->start.key = bit; 354 e->count.key = len; 355 if (len > wnd->extent_max) 356 wnd->extent_max = len; 357 358 rb_insert_start(&wnd->start_tree, e); 359 rb_insert_count(&wnd->count_tree, e); 360 wnd->count += 1; 361 362 out:; 363 } 364 365 /* 366 * wnd_remove_free_ext - Remove a run from the cached free space. 367 */ 368 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) 369 { 370 struct rb_node *n, *n3; 371 struct e_node *e, *e3; 372 size_t end_in = bit + len; 373 size_t end3, end, new_key, new_len, max_new_len; 374 375 /* Try to find extent before 'bit'. */ 376 n = rb_lookup(&wnd->start_tree, bit); 377 378 if (!n) 379 return; 380 381 e = rb_entry(n, struct e_node, start.node); 382 end = e->start.key + e->count.key; 383 384 new_key = new_len = 0; 385 len = e->count.key; 386 387 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ 388 if (e->start.key > bit) 389 ; 390 else if (end_in <= end) { 391 /* Range [bit,end_in) inside 'e'. */ 392 new_key = end_in; 393 new_len = end - end_in; 394 len = bit - e->start.key; 395 } else if (bit > end) { 396 bool bmax = false; 397 398 n3 = rb_next(n); 399 400 while (n3) { 401 e3 = rb_entry(n3, struct e_node, start.node); 402 if (e3->start.key >= end_in) 403 break; 404 405 if (e3->count.key == wnd->extent_max) 406 bmax = true; 407 408 end3 = e3->start.key + e3->count.key; 409 if (end3 > end_in) { 410 e3->start.key = end_in; 411 rb_erase(&e3->count.node, &wnd->count_tree); 412 e3->count.key = end3 - end_in; 413 rb_insert_count(&wnd->count_tree, e3); 414 break; 415 } 416 417 n3 = rb_next(n3); 418 rb_erase(&e3->start.node, &wnd->start_tree); 419 rb_erase(&e3->count.node, &wnd->count_tree); 420 wnd->count -= 1; 421 kmem_cache_free(ntfs_enode_cachep, e3); 422 } 423 if (!bmax) 424 return; 425 n3 = rb_first(&wnd->count_tree); 426 wnd->extent_max = 427 n3 ? rb_entry(n3, struct e_node, count.node)->count.key 428 : 0; 429 return; 430 } 431 432 if (e->count.key != wnd->extent_max) { 433 ; 434 } else if (rb_prev(&e->count.node)) { 435 ; 436 } else { 437 n3 = rb_next(&e->count.node); 438 max_new_len = len > new_len ? len : new_len; 439 if (!n3) { 440 wnd->extent_max = max_new_len; 441 } else { 442 e3 = rb_entry(n3, struct e_node, count.node); 443 wnd->extent_max = max(e3->count.key, max_new_len); 444 } 445 } 446 447 if (!len) { 448 if (new_len) { 449 e->start.key = new_key; 450 rb_erase(&e->count.node, &wnd->count_tree); 451 e->count.key = new_len; 452 rb_insert_count(&wnd->count_tree, e); 453 } else { 454 rb_erase(&e->start.node, &wnd->start_tree); 455 rb_erase(&e->count.node, &wnd->count_tree); 456 wnd->count -= 1; 457 kmem_cache_free(ntfs_enode_cachep, e); 458 } 459 goto out; 460 } 461 rb_erase(&e->count.node, &wnd->count_tree); 462 e->count.key = len; 463 rb_insert_count(&wnd->count_tree, e); 464 465 if (!new_len) 466 goto out; 467 468 if (wnd->count >= NTFS_MAX_WND_EXTENTS) { 469 wnd->uptodated = -1; 470 471 /* Get minimal extent. */ 472 e = rb_entry(rb_last(&wnd->count_tree), struct e_node, 473 count.node); 474 if (e->count.key > new_len) 475 goto out; 476 477 /* Replace minimum. */ 478 rb_erase(&e->start.node, &wnd->start_tree); 479 rb_erase(&e->count.node, &wnd->count_tree); 480 wnd->count -= 1; 481 } else { 482 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); 483 if (!e) 484 wnd->uptodated = -1; 485 } 486 487 if (e) { 488 e->start.key = new_key; 489 e->count.key = new_len; 490 rb_insert_start(&wnd->start_tree, e); 491 rb_insert_count(&wnd->count_tree, e); 492 wnd->count += 1; 493 } 494 495 out: 496 if (!wnd->count && 1 != wnd->uptodated) 497 wnd_rescan(wnd); 498 } 499 500 /* 501 * wnd_rescan - Scan all bitmap. Used while initialization. 502 */ 503 static int wnd_rescan(struct wnd_bitmap *wnd) 504 { 505 int err = 0; 506 size_t prev_tail = 0; 507 struct super_block *sb = wnd->sb; 508 struct ntfs_sb_info *sbi = sb->s_fs_info; 509 u64 lbo, len = 0; 510 u32 blocksize = sb->s_blocksize; 511 u8 cluster_bits = sbi->cluster_bits; 512 u32 wbits = 8 * sb->s_blocksize; 513 u32 used, frb; 514 const ulong *buf; 515 size_t wpos, wbit, iw, vbo; 516 struct buffer_head *bh = NULL; 517 CLST lcn, clen; 518 519 wnd->uptodated = 0; 520 wnd->extent_max = 0; 521 wnd->extent_min = MINUS_ONE_T; 522 wnd->total_zeroes = 0; 523 524 vbo = 0; 525 526 for (iw = 0; iw < wnd->nwnd; iw++) { 527 if (iw + 1 == wnd->nwnd) 528 wbits = wnd->bits_last; 529 530 if (wnd->inited) { 531 if (!wnd->free_bits[iw]) { 532 /* All ones. */ 533 if (prev_tail) { 534 wnd_add_free_ext(wnd, 535 vbo * 8 - prev_tail, 536 prev_tail, true); 537 prev_tail = 0; 538 } 539 goto next_wnd; 540 } 541 if (wbits == wnd->free_bits[iw]) { 542 /* All zeroes. */ 543 prev_tail += wbits; 544 wnd->total_zeroes += wbits; 545 goto next_wnd; 546 } 547 } 548 549 if (!len) { 550 u32 off = vbo & sbi->cluster_mask; 551 552 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, 553 &lcn, &clen, NULL)) { 554 err = -ENOENT; 555 goto out; 556 } 557 558 lbo = ((u64)lcn << cluster_bits) + off; 559 len = ((u64)clen << cluster_bits) - off; 560 } 561 562 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 563 if (!bh) { 564 err = -EIO; 565 goto out; 566 } 567 568 buf = (ulong *)bh->b_data; 569 570 used = __bitmap_weight(buf, wbits); 571 if (used < wbits) { 572 frb = wbits - used; 573 wnd->free_bits[iw] = frb; 574 wnd->total_zeroes += frb; 575 } 576 577 wpos = 0; 578 wbit = vbo * 8; 579 580 if (wbit + wbits > wnd->nbits) 581 wbits = wnd->nbits - wbit; 582 583 do { 584 used = find_next_zero_bit(buf, wbits, wpos); 585 586 if (used > wpos && prev_tail) { 587 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 588 prev_tail, true); 589 prev_tail = 0; 590 } 591 592 wpos = used; 593 594 if (wpos >= wbits) { 595 /* No free blocks. */ 596 prev_tail = 0; 597 break; 598 } 599 600 frb = find_next_bit(buf, wbits, wpos); 601 if (frb >= wbits) { 602 /* Keep last free block. */ 603 prev_tail += frb - wpos; 604 break; 605 } 606 607 wnd_add_free_ext(wnd, wbit + wpos - prev_tail, 608 frb + prev_tail - wpos, true); 609 610 /* Skip free block and first '1'. */ 611 wpos = frb + 1; 612 /* Reset previous tail. */ 613 prev_tail = 0; 614 } while (wpos < wbits); 615 616 next_wnd: 617 618 if (bh) 619 put_bh(bh); 620 bh = NULL; 621 622 vbo += blocksize; 623 if (len) { 624 len -= blocksize; 625 lbo += blocksize; 626 } 627 } 628 629 /* Add last block. */ 630 if (prev_tail) 631 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); 632 633 /* 634 * Before init cycle wnd->uptodated was 0. 635 * If any errors or limits occurs while initialization then 636 * wnd->uptodated will be -1. 637 * If 'uptodated' is still 0 then Tree is really updated. 638 */ 639 if (!wnd->uptodated) 640 wnd->uptodated = 1; 641 642 if (wnd->zone_bit != wnd->zone_end) { 643 size_t zlen = wnd->zone_end - wnd->zone_bit; 644 645 wnd->zone_end = wnd->zone_bit; 646 wnd_zone_set(wnd, wnd->zone_bit, zlen); 647 } 648 649 out: 650 return err; 651 } 652 653 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) 654 { 655 int err; 656 u32 blocksize = sb->s_blocksize; 657 u32 wbits = blocksize * 8; 658 659 init_rwsem(&wnd->rw_lock); 660 661 wnd->sb = sb; 662 wnd->nbits = nbits; 663 wnd->total_zeroes = nbits; 664 wnd->extent_max = MINUS_ONE_T; 665 wnd->zone_bit = wnd->zone_end = 0; 666 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits)); 667 wnd->bits_last = nbits & (wbits - 1); 668 if (!wnd->bits_last) 669 wnd->bits_last = wbits; 670 671 wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS); 672 if (!wnd->free_bits) 673 return -ENOMEM; 674 675 err = wnd_rescan(wnd); 676 if (err) 677 return err; 678 679 wnd->inited = true; 680 681 return 0; 682 } 683 684 /* 685 * wnd_map - Call sb_bread for requested window. 686 */ 687 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) 688 { 689 size_t vbo; 690 CLST lcn, clen; 691 struct super_block *sb = wnd->sb; 692 struct ntfs_sb_info *sbi; 693 struct buffer_head *bh; 694 u64 lbo; 695 696 sbi = sb->s_fs_info; 697 vbo = (u64)iw << sb->s_blocksize_bits; 698 699 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, 700 NULL)) { 701 return ERR_PTR(-ENOENT); 702 } 703 704 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); 705 706 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); 707 if (!bh) 708 return ERR_PTR(-EIO); 709 710 return bh; 711 } 712 713 /* 714 * wnd_set_free - Mark the bits range from bit to bit + bits as free. 715 */ 716 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 717 { 718 int err = 0; 719 struct super_block *sb = wnd->sb; 720 size_t bits0 = bits; 721 u32 wbits = 8 * sb->s_blocksize; 722 size_t iw = bit >> (sb->s_blocksize_bits + 3); 723 u32 wbit = bit & (wbits - 1); 724 struct buffer_head *bh; 725 726 while (iw < wnd->nwnd && bits) { 727 u32 tail, op; 728 ulong *buf; 729 730 if (iw + 1 == wnd->nwnd) 731 wbits = wnd->bits_last; 732 733 tail = wbits - wbit; 734 op = tail < bits ? tail : bits; 735 736 bh = wnd_map(wnd, iw); 737 if (IS_ERR(bh)) { 738 err = PTR_ERR(bh); 739 break; 740 } 741 742 buf = (ulong *)bh->b_data; 743 744 lock_buffer(bh); 745 746 __bitmap_clear(buf, wbit, op); 747 748 wnd->free_bits[iw] += op; 749 750 set_buffer_uptodate(bh); 751 mark_buffer_dirty(bh); 752 unlock_buffer(bh); 753 put_bh(bh); 754 755 wnd->total_zeroes += op; 756 bits -= op; 757 wbit = 0; 758 iw += 1; 759 } 760 761 wnd_add_free_ext(wnd, bit, bits0, false); 762 763 return err; 764 } 765 766 /* 767 * wnd_set_used - Mark the bits range from bit to bit + bits as used. 768 */ 769 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 770 { 771 int err = 0; 772 struct super_block *sb = wnd->sb; 773 size_t bits0 = bits; 774 size_t iw = bit >> (sb->s_blocksize_bits + 3); 775 u32 wbits = 8 * sb->s_blocksize; 776 u32 wbit = bit & (wbits - 1); 777 struct buffer_head *bh; 778 779 while (iw < wnd->nwnd && bits) { 780 u32 tail, op; 781 ulong *buf; 782 783 if (unlikely(iw + 1 == wnd->nwnd)) 784 wbits = wnd->bits_last; 785 786 tail = wbits - wbit; 787 op = tail < bits ? tail : bits; 788 789 bh = wnd_map(wnd, iw); 790 if (IS_ERR(bh)) { 791 err = PTR_ERR(bh); 792 break; 793 } 794 buf = (ulong *)bh->b_data; 795 796 lock_buffer(bh); 797 798 __bitmap_set(buf, wbit, op); 799 wnd->free_bits[iw] -= op; 800 801 set_buffer_uptodate(bh); 802 mark_buffer_dirty(bh); 803 unlock_buffer(bh); 804 put_bh(bh); 805 806 wnd->total_zeroes -= op; 807 bits -= op; 808 wbit = 0; 809 iw += 1; 810 } 811 812 if (!RB_EMPTY_ROOT(&wnd->start_tree)) 813 wnd_remove_free_ext(wnd, bit, bits0); 814 815 return err; 816 } 817 818 /* 819 * wnd_is_free_hlp 820 * 821 * Return: True if all clusters [bit, bit+bits) are free (bitmap only). 822 */ 823 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) 824 { 825 struct super_block *sb = wnd->sb; 826 size_t iw = bit >> (sb->s_blocksize_bits + 3); 827 u32 wbits = 8 * sb->s_blocksize; 828 u32 wbit = bit & (wbits - 1); 829 830 while (iw < wnd->nwnd && bits) { 831 u32 tail, op; 832 833 if (unlikely(iw + 1 == wnd->nwnd)) 834 wbits = wnd->bits_last; 835 836 tail = wbits - wbit; 837 op = tail < bits ? tail : bits; 838 839 if (wbits != wnd->free_bits[iw]) { 840 bool ret; 841 struct buffer_head *bh = wnd_map(wnd, iw); 842 843 if (IS_ERR(bh)) 844 return false; 845 846 ret = are_bits_clear((ulong *)bh->b_data, wbit, op); 847 848 put_bh(bh); 849 if (!ret) 850 return false; 851 } 852 853 bits -= op; 854 wbit = 0; 855 iw += 1; 856 } 857 858 return true; 859 } 860 861 /* 862 * wnd_is_free 863 * 864 * Return: True if all clusters [bit, bit+bits) are free. 865 */ 866 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) 867 { 868 bool ret; 869 struct rb_node *n; 870 size_t end; 871 struct e_node *e; 872 873 if (RB_EMPTY_ROOT(&wnd->start_tree)) 874 goto use_wnd; 875 876 n = rb_lookup(&wnd->start_tree, bit); 877 if (!n) 878 goto use_wnd; 879 880 e = rb_entry(n, struct e_node, start.node); 881 882 end = e->start.key + e->count.key; 883 884 if (bit < end && bit + bits <= end) 885 return true; 886 887 use_wnd: 888 ret = wnd_is_free_hlp(wnd, bit, bits); 889 890 return ret; 891 } 892 893 /* 894 * wnd_is_used 895 * 896 * Return: True if all clusters [bit, bit+bits) are used. 897 */ 898 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) 899 { 900 bool ret = false; 901 struct super_block *sb = wnd->sb; 902 size_t iw = bit >> (sb->s_blocksize_bits + 3); 903 u32 wbits = 8 * sb->s_blocksize; 904 u32 wbit = bit & (wbits - 1); 905 size_t end; 906 struct rb_node *n; 907 struct e_node *e; 908 909 if (RB_EMPTY_ROOT(&wnd->start_tree)) 910 goto use_wnd; 911 912 end = bit + bits; 913 n = rb_lookup(&wnd->start_tree, end - 1); 914 if (!n) 915 goto use_wnd; 916 917 e = rb_entry(n, struct e_node, start.node); 918 if (e->start.key + e->count.key > bit) 919 return false; 920 921 use_wnd: 922 while (iw < wnd->nwnd && bits) { 923 u32 tail, op; 924 925 if (unlikely(iw + 1 == wnd->nwnd)) 926 wbits = wnd->bits_last; 927 928 tail = wbits - wbit; 929 op = tail < bits ? tail : bits; 930 931 if (wnd->free_bits[iw]) { 932 bool ret; 933 struct buffer_head *bh = wnd_map(wnd, iw); 934 935 if (IS_ERR(bh)) 936 goto out; 937 938 ret = are_bits_set((ulong *)bh->b_data, wbit, op); 939 put_bh(bh); 940 if (!ret) 941 goto out; 942 } 943 944 bits -= op; 945 wbit = 0; 946 iw += 1; 947 } 948 ret = true; 949 950 out: 951 return ret; 952 } 953 954 /* 955 * wnd_find - Look for free space. 956 * 957 * - flags - BITMAP_FIND_XXX flags 958 * 959 * Return: 0 if not found. 960 */ 961 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, 962 size_t flags, size_t *allocated) 963 { 964 struct super_block *sb; 965 u32 wbits, wpos, wzbit, wzend; 966 size_t fnd, max_alloc, b_len, b_pos; 967 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; 968 size_t to_alloc0 = to_alloc; 969 const ulong *buf; 970 const struct e_node *e; 971 const struct rb_node *pr, *cr; 972 u8 log2_bits; 973 bool fbits_valid; 974 struct buffer_head *bh; 975 976 /* Fast checking for available free space. */ 977 if (flags & BITMAP_FIND_FULL) { 978 size_t zeroes = wnd_zeroes(wnd); 979 980 zeroes -= wnd->zone_end - wnd->zone_bit; 981 if (zeroes < to_alloc0) 982 goto no_space; 983 984 if (to_alloc0 > wnd->extent_max) 985 goto no_space; 986 } else { 987 if (to_alloc > wnd->extent_max) 988 to_alloc = wnd->extent_max; 989 } 990 991 if (wnd->zone_bit <= hint && hint < wnd->zone_end) 992 hint = wnd->zone_end; 993 994 max_alloc = wnd->nbits; 995 b_len = b_pos = 0; 996 997 if (hint >= max_alloc) 998 hint = 0; 999 1000 if (RB_EMPTY_ROOT(&wnd->start_tree)) { 1001 if (wnd->uptodated == 1) { 1002 /* Extents tree is updated -> No free space. */ 1003 goto no_space; 1004 } 1005 goto scan_bitmap; 1006 } 1007 1008 e = NULL; 1009 if (!hint) 1010 goto allocate_biggest; 1011 1012 /* Use hint: Enumerate extents by start >= hint. */ 1013 pr = NULL; 1014 cr = wnd->start_tree.rb_node; 1015 1016 for (;;) { 1017 e = rb_entry(cr, struct e_node, start.node); 1018 1019 if (e->start.key == hint) 1020 break; 1021 1022 if (e->start.key < hint) { 1023 pr = cr; 1024 cr = cr->rb_right; 1025 if (!cr) 1026 break; 1027 continue; 1028 } 1029 1030 cr = cr->rb_left; 1031 if (!cr) { 1032 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; 1033 break; 1034 } 1035 } 1036 1037 if (!e) 1038 goto allocate_biggest; 1039 1040 if (e->start.key + e->count.key > hint) { 1041 /* We have found extension with 'hint' inside. */ 1042 size_t len = e->start.key + e->count.key - hint; 1043 1044 if (len >= to_alloc && hint + to_alloc <= max_alloc) { 1045 fnd = hint; 1046 goto found; 1047 } 1048 1049 if (!(flags & BITMAP_FIND_FULL)) { 1050 if (len > to_alloc) 1051 len = to_alloc; 1052 1053 if (hint + len <= max_alloc) { 1054 fnd = hint; 1055 to_alloc = len; 1056 goto found; 1057 } 1058 } 1059 } 1060 1061 allocate_biggest: 1062 /* Allocate from biggest free extent. */ 1063 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); 1064 if (e->count.key != wnd->extent_max) 1065 wnd->extent_max = e->count.key; 1066 1067 if (e->count.key < max_alloc) { 1068 if (e->count.key >= to_alloc) { 1069 ; 1070 } else if (flags & BITMAP_FIND_FULL) { 1071 if (e->count.key < to_alloc0) { 1072 /* Biggest free block is less then requested. */ 1073 goto no_space; 1074 } 1075 to_alloc = e->count.key; 1076 } else if (-1 != wnd->uptodated) { 1077 to_alloc = e->count.key; 1078 } else { 1079 /* Check if we can use more bits. */ 1080 size_t op, max_check; 1081 struct rb_root start_tree; 1082 1083 memcpy(&start_tree, &wnd->start_tree, 1084 sizeof(struct rb_root)); 1085 memset(&wnd->start_tree, 0, sizeof(struct rb_root)); 1086 1087 max_check = e->start.key + to_alloc; 1088 if (max_check > max_alloc) 1089 max_check = max_alloc; 1090 for (op = e->start.key + e->count.key; op < max_check; 1091 op++) { 1092 if (!wnd_is_free(wnd, op, 1)) 1093 break; 1094 } 1095 memcpy(&wnd->start_tree, &start_tree, 1096 sizeof(struct rb_root)); 1097 to_alloc = op - e->start.key; 1098 } 1099 1100 /* Prepare to return. */ 1101 fnd = e->start.key; 1102 if (e->start.key + to_alloc > max_alloc) 1103 to_alloc = max_alloc - e->start.key; 1104 goto found; 1105 } 1106 1107 if (wnd->uptodated == 1) { 1108 /* Extents tree is updated -> no free space. */ 1109 goto no_space; 1110 } 1111 1112 b_len = e->count.key; 1113 b_pos = e->start.key; 1114 1115 scan_bitmap: 1116 sb = wnd->sb; 1117 log2_bits = sb->s_blocksize_bits + 3; 1118 1119 /* At most two ranges [hint, max_alloc) + [0, hint). */ 1120 Again: 1121 1122 /* TODO: Optimize request for case nbits > wbits. */ 1123 iw = hint >> log2_bits; 1124 wbits = sb->s_blocksize * 8; 1125 wpos = hint & (wbits - 1); 1126 prev_tail = 0; 1127 fbits_valid = true; 1128 1129 if (max_alloc == wnd->nbits) { 1130 nwnd = wnd->nwnd; 1131 } else { 1132 size_t t = max_alloc + wbits - 1; 1133 1134 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; 1135 } 1136 1137 /* Enumerate all windows. */ 1138 for (; iw < nwnd; iw++) { 1139 wbit = iw << log2_bits; 1140 1141 if (!wnd->free_bits[iw]) { 1142 if (prev_tail > b_len) { 1143 b_pos = wbit - prev_tail; 1144 b_len = prev_tail; 1145 } 1146 1147 /* Skip full used window. */ 1148 prev_tail = 0; 1149 wpos = 0; 1150 continue; 1151 } 1152 1153 if (unlikely(iw + 1 == nwnd)) { 1154 if (max_alloc == wnd->nbits) { 1155 wbits = wnd->bits_last; 1156 } else { 1157 size_t t = max_alloc & (wbits - 1); 1158 1159 if (t) { 1160 wbits = t; 1161 fbits_valid = false; 1162 } 1163 } 1164 } 1165 1166 if (wnd->zone_end > wnd->zone_bit) { 1167 ebit = wbit + wbits; 1168 zbit = max(wnd->zone_bit, wbit); 1169 zend = min(wnd->zone_end, ebit); 1170 1171 /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ 1172 if (zend <= zbit) { 1173 /* Zone does not overlap window. */ 1174 } else { 1175 wzbit = zbit - wbit; 1176 wzend = zend - wbit; 1177 1178 /* Zone overlaps window. */ 1179 if (wnd->free_bits[iw] == wzend - wzbit) { 1180 prev_tail = 0; 1181 wpos = 0; 1182 continue; 1183 } 1184 1185 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ 1186 bh = wnd_map(wnd, iw); 1187 1188 if (IS_ERR(bh)) { 1189 /* TODO: Error */ 1190 prev_tail = 0; 1191 wpos = 0; 1192 continue; 1193 } 1194 1195 buf = (ulong *)bh->b_data; 1196 1197 /* Scan range [wbit, zbit). */ 1198 if (wpos < wzbit) { 1199 /* Scan range [wpos, zbit). */ 1200 fnd = wnd_scan(buf, wbit, wpos, wzbit, 1201 to_alloc, &prev_tail, 1202 &b_pos, &b_len); 1203 if (fnd != MINUS_ONE_T) { 1204 put_bh(bh); 1205 goto found; 1206 } 1207 } 1208 1209 prev_tail = 0; 1210 1211 /* Scan range [zend, ebit). */ 1212 if (wzend < wbits) { 1213 fnd = wnd_scan(buf, wbit, 1214 max(wzend, wpos), wbits, 1215 to_alloc, &prev_tail, 1216 &b_pos, &b_len); 1217 if (fnd != MINUS_ONE_T) { 1218 put_bh(bh); 1219 goto found; 1220 } 1221 } 1222 1223 wpos = 0; 1224 put_bh(bh); 1225 continue; 1226 } 1227 } 1228 1229 /* Current window does not overlap zone. */ 1230 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { 1231 /* Window is empty. */ 1232 if (prev_tail + wbits >= to_alloc) { 1233 fnd = wbit + wpos - prev_tail; 1234 goto found; 1235 } 1236 1237 /* Increase 'prev_tail' and process next window. */ 1238 prev_tail += wbits; 1239 wpos = 0; 1240 continue; 1241 } 1242 1243 /* Read window. */ 1244 bh = wnd_map(wnd, iw); 1245 if (IS_ERR(bh)) { 1246 // TODO: Error. 1247 prev_tail = 0; 1248 wpos = 0; 1249 continue; 1250 } 1251 1252 buf = (ulong *)bh->b_data; 1253 1254 /* Scan range [wpos, eBits). */ 1255 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, 1256 &b_pos, &b_len); 1257 put_bh(bh); 1258 if (fnd != MINUS_ONE_T) 1259 goto found; 1260 } 1261 1262 if (b_len < prev_tail) { 1263 /* The last fragment. */ 1264 b_len = prev_tail; 1265 b_pos = max_alloc - prev_tail; 1266 } 1267 1268 if (hint) { 1269 /* 1270 * We have scanned range [hint max_alloc). 1271 * Prepare to scan range [0 hint + to_alloc). 1272 */ 1273 size_t nextmax = hint + to_alloc; 1274 1275 if (likely(nextmax >= hint) && nextmax < max_alloc) 1276 max_alloc = nextmax; 1277 hint = 0; 1278 goto Again; 1279 } 1280 1281 if (!b_len) 1282 goto no_space; 1283 1284 wnd->extent_max = b_len; 1285 1286 if (flags & BITMAP_FIND_FULL) 1287 goto no_space; 1288 1289 fnd = b_pos; 1290 to_alloc = b_len; 1291 1292 found: 1293 if (flags & BITMAP_FIND_MARK_AS_USED) { 1294 /* TODO: Optimize remove extent (pass 'e'?). */ 1295 if (wnd_set_used(wnd, fnd, to_alloc)) 1296 goto no_space; 1297 } else if (wnd->extent_max != MINUS_ONE_T && 1298 to_alloc > wnd->extent_max) { 1299 wnd->extent_max = to_alloc; 1300 } 1301 1302 *allocated = fnd; 1303 return to_alloc; 1304 1305 no_space: 1306 return 0; 1307 } 1308 1309 /* 1310 * wnd_extend - Extend bitmap ($MFT bitmap). 1311 */ 1312 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) 1313 { 1314 int err; 1315 struct super_block *sb = wnd->sb; 1316 struct ntfs_sb_info *sbi = sb->s_fs_info; 1317 u32 blocksize = sb->s_blocksize; 1318 u32 wbits = blocksize * 8; 1319 u32 b0, new_last; 1320 size_t bits, iw, new_wnd; 1321 size_t old_bits = wnd->nbits; 1322 u16 *new_free; 1323 1324 if (new_bits <= old_bits) 1325 return -EINVAL; 1326 1327 /* Align to 8 byte boundary. */ 1328 new_wnd = bytes_to_block(sb, bitmap_size(new_bits)); 1329 new_last = new_bits & (wbits - 1); 1330 if (!new_last) 1331 new_last = wbits; 1332 1333 if (new_wnd != wnd->nwnd) { 1334 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS); 1335 if (!new_free) 1336 return -ENOMEM; 1337 1338 if (new_free != wnd->free_bits) 1339 memcpy(new_free, wnd->free_bits, 1340 wnd->nwnd * sizeof(short)); 1341 memset(new_free + wnd->nwnd, 0, 1342 (new_wnd - wnd->nwnd) * sizeof(short)); 1343 kfree(wnd->free_bits); 1344 wnd->free_bits = new_free; 1345 } 1346 1347 /* Zero bits [old_bits,new_bits). */ 1348 bits = new_bits - old_bits; 1349 b0 = old_bits & (wbits - 1); 1350 1351 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { 1352 u32 op; 1353 size_t frb; 1354 u64 vbo, lbo, bytes; 1355 struct buffer_head *bh; 1356 ulong *buf; 1357 1358 if (iw + 1 == new_wnd) 1359 wbits = new_last; 1360 1361 op = b0 + bits > wbits ? wbits - b0 : bits; 1362 vbo = (u64)iw * blocksize; 1363 1364 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); 1365 if (err) 1366 break; 1367 1368 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); 1369 if (!bh) 1370 return -EIO; 1371 1372 lock_buffer(bh); 1373 buf = (ulong *)bh->b_data; 1374 1375 __bitmap_clear(buf, b0, blocksize * 8 - b0); 1376 frb = wbits - __bitmap_weight(buf, wbits); 1377 wnd->total_zeroes += frb - wnd->free_bits[iw]; 1378 wnd->free_bits[iw] = frb; 1379 1380 set_buffer_uptodate(bh); 1381 mark_buffer_dirty(bh); 1382 unlock_buffer(bh); 1383 /* err = sync_dirty_buffer(bh); */ 1384 1385 b0 = 0; 1386 bits -= op; 1387 } 1388 1389 wnd->nbits = new_bits; 1390 wnd->nwnd = new_wnd; 1391 wnd->bits_last = new_last; 1392 1393 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); 1394 1395 return 0; 1396 } 1397 1398 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) 1399 { 1400 size_t zlen; 1401 1402 zlen = wnd->zone_end - wnd->zone_bit; 1403 if (zlen) 1404 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); 1405 1406 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) 1407 wnd_remove_free_ext(wnd, lcn, len); 1408 1409 wnd->zone_bit = lcn; 1410 wnd->zone_end = lcn + len; 1411 } 1412 1413 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) 1414 { 1415 int err = 0; 1416 struct super_block *sb = sbi->sb; 1417 struct wnd_bitmap *wnd = &sbi->used.bitmap; 1418 u32 wbits = 8 * sb->s_blocksize; 1419 CLST len = 0, lcn = 0, done = 0; 1420 CLST minlen = bytes_to_cluster(sbi, range->minlen); 1421 CLST lcn_from = bytes_to_cluster(sbi, range->start); 1422 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); 1423 u32 wbit = lcn_from & (wbits - 1); 1424 const ulong *buf; 1425 CLST lcn_to; 1426 1427 if (!minlen) 1428 minlen = 1; 1429 1430 if (range->len == (u64)-1) 1431 lcn_to = wnd->nbits; 1432 else 1433 lcn_to = bytes_to_cluster(sbi, range->start + range->len); 1434 1435 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); 1436 1437 for (; iw < wnd->nbits; iw++, wbit = 0) { 1438 CLST lcn_wnd = iw * wbits; 1439 struct buffer_head *bh; 1440 1441 if (lcn_wnd > lcn_to) 1442 break; 1443 1444 if (!wnd->free_bits[iw]) 1445 continue; 1446 1447 if (iw + 1 == wnd->nwnd) 1448 wbits = wnd->bits_last; 1449 1450 if (lcn_wnd + wbits > lcn_to) 1451 wbits = lcn_to - lcn_wnd; 1452 1453 bh = wnd_map(wnd, iw); 1454 if (IS_ERR(bh)) { 1455 err = PTR_ERR(bh); 1456 break; 1457 } 1458 1459 buf = (ulong *)bh->b_data; 1460 1461 for (; wbit < wbits; wbit++) { 1462 if (!test_bit(wbit, buf)) { 1463 if (!len) 1464 lcn = lcn_wnd + wbit; 1465 len += 1; 1466 continue; 1467 } 1468 if (len >= minlen) { 1469 err = ntfs_discard(sbi, lcn, len); 1470 if (err) 1471 goto out; 1472 done += len; 1473 } 1474 len = 0; 1475 } 1476 put_bh(bh); 1477 } 1478 1479 /* Process the last fragment. */ 1480 if (len >= minlen) { 1481 err = ntfs_discard(sbi, lcn, len); 1482 if (err) 1483 goto out; 1484 done += len; 1485 } 1486 1487 out: 1488 range->len = (u64)done << sbi->cluster_bits; 1489 1490 up_read(&wnd->rw_lock); 1491 1492 return err; 1493 } 1494