1 /* 2 * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. 3 * 4 * Copyright (c) 2004 Anton Altaparmakov 5 * 6 * This program/include file is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as published 8 * by the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program/include file is distributed in the hope that it will be 12 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program (in the main directory of the Linux-NTFS 18 * distribution in the file COPYING); if not, write to the Free Software 19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22 #ifdef NTFS_RW 23 24 #include <linux/pagemap.h> 25 26 #include "lcnalloc.h" 27 #include "debug.h" 28 #include "bitmap.h" 29 #include "inode.h" 30 #include "volume.h" 31 #include "attrib.h" 32 #include "malloc.h" 33 #include "aops.h" 34 #include "ntfs.h" 35 36 /** 37 * ntfs_cluster_free_from_rl_nolock - free clusters from runlist 38 * @vol: mounted ntfs volume on which to free the clusters 39 * @rl: runlist describing the clusters to free 40 * 41 * Free all the clusters described by the runlist @rl on the volume @vol. In 42 * the case of an error being returned, at least some of the clusters were not 43 * freed. 44 * 45 * Return 0 on success and -errno on error. 46 * 47 * Locking: - The volume lcn bitmap must be locked for writing on entry and is 48 * left locked on return. 49 */ 50 int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 51 const runlist_element *rl) 52 { 53 struct inode *lcnbmp_vi = vol->lcnbmp_ino; 54 int ret = 0; 55 56 ntfs_debug("Entering."); 57 for (; rl->length; rl++) { 58 int err; 59 60 if (rl->lcn < 0) 61 continue; 62 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); 63 if (unlikely(err && (!ret || ret == ENOMEM) && ret != err)) 64 ret = err; 65 } 66 ntfs_debug("Done."); 67 return ret; 68 } 69 70 /** 71 * ntfs_cluster_alloc - allocate clusters on an ntfs volume 72 * @vol: mounted ntfs volume on which to allocate the clusters 73 * @start_vcn: vcn to use for the first allocated cluster 74 * @count: number of clusters to allocate 75 * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) 76 * @zone: zone from which to allocate the clusters 77 * 78 * Allocate @count clusters preferably starting at cluster @start_lcn or at the 79 * current allocator position if @start_lcn is -1, on the mounted ntfs volume 80 * @vol. @zone is either DATA_ZONE for allocation of normal clusters or 81 * MFT_ZONE for allocation of clusters for the master file table, i.e. the 82 * $MFT/$DATA attribute. 83 * 84 * @start_vcn specifies the vcn of the first allocated cluster. This makes 85 * merging the resulting runlist with the old runlist easier. 86 * 87 * You need to check the return value with IS_ERR(). If this is false, the 88 * function was successful and the return value is a runlist describing the 89 * allocated cluster(s). If IS_ERR() is true, the function failed and 90 * PTR_ERR() gives you the error code. 91 * 92 * Notes on the allocation algorithm 93 * ================================= 94 * 95 * There are two data zones. First is the area between the end of the mft zone 96 * and the end of the volume, and second is the area between the start of the 97 * volume and the start of the mft zone. On unmodified/standard NTFS 1.x 98 * volumes, the second data zone does not exist due to the mft zone being 99 * expanded to cover the start of the volume in order to reserve space for the 100 * mft bitmap attribute. 101 * 102 * This is not the prettiest function but the complexity stems from the need of 103 * implementing the mft vs data zoned approach and from the fact that we have 104 * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we 105 * need to cope with crossing over boundaries of two buffers. Further, the 106 * fact that the allocator allows for caller supplied hints as to the location 107 * of where allocation should begin and the fact that the allocator keeps track 108 * of where in the data zones the next natural allocation should occur, 109 * contribute to the complexity of the function. But it should all be 110 * worthwhile, because this allocator should: 1) be a full implementation of 111 * the MFT zone approach used by Windows NT, 2) cause reduction in 112 * fragmentation, and 3) be speedy in allocations (the code is not optimized 113 * for speed, but the algorithm is, so further speed improvements are probably 114 * possible). 115 * 116 * FIXME: We should be monitoring cluster allocation and increment the MFT zone 117 * size dynamically but this is something for the future. We will just cause 118 * heavier fragmentation by not doing it and I am not even sure Windows would 119 * grow the MFT zone dynamically, so it might even be correct not to do this. 120 * The overhead in doing dynamic MFT zone expansion would be very large and 121 * unlikely worth the effort. (AIA) 122 * 123 * TODO: I have added in double the required zone position pointer wrap around 124 * logic which can be optimized to having only one of the two logic sets. 125 * However, having the double logic will work fine, but if we have only one of 126 * the sets and we get it wrong somewhere, then we get into trouble, so 127 * removing the duplicate logic requires _very_ careful consideration of _all_ 128 * possible code paths. So at least for now, I am leaving the double logic - 129 * better safe than sorry... (AIA) 130 * 131 * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked 132 * on return. 133 * - This function takes the volume lcn bitmap lock for writing and 134 * modifies the bitmap contents. 135 */ 136 runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, 137 const s64 count, const LCN start_lcn, 138 const NTFS_CLUSTER_ALLOCATION_ZONES zone) 139 { 140 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 141 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; 142 s64 clusters; 143 struct inode *lcnbmp_vi; 144 runlist_element *rl = NULL; 145 struct address_space *mapping; 146 struct page *page = NULL; 147 u8 *buf, *byte; 148 int err = 0, rlpos, rlsize, buf_size; 149 u8 pass, done_zones, search_zone, need_writeback = 0, bit; 150 151 ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn " 152 "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn, 153 (unsigned long long)count, 154 (unsigned long long)start_lcn, 155 zone == MFT_ZONE ? "MFT" : "DATA"); 156 BUG_ON(!vol); 157 lcnbmp_vi = vol->lcnbmp_ino; 158 BUG_ON(!lcnbmp_vi); 159 BUG_ON(start_vcn < 0); 160 BUG_ON(count < 0); 161 BUG_ON(start_lcn < -1); 162 BUG_ON(zone < FIRST_ZONE); 163 BUG_ON(zone > LAST_ZONE); 164 165 /* Return empty runlist if @count == 0 */ 166 // FIXME: Do we want to just return NULL instead? (AIA) 167 if (!count) { 168 rl = ntfs_malloc_nofs(PAGE_SIZE); 169 if (!rl) 170 return ERR_PTR(-ENOMEM); 171 rl[0].vcn = start_vcn; 172 rl[0].lcn = LCN_RL_NOT_MAPPED; 173 rl[0].length = 0; 174 return rl; 175 } 176 /* Take the lcnbmp lock for writing. */ 177 down_write(&vol->lcnbmp_lock); 178 /* 179 * If no specific @start_lcn was requested, use the current data zone 180 * position, otherwise use the requested @start_lcn but make sure it 181 * lies outside the mft zone. Also set done_zones to 0 (no zones done) 182 * and pass depending on whether we are starting inside a zone (1) or 183 * at the beginning of a zone (2). If requesting from the MFT_ZONE, 184 * we either start at the current position within the mft zone or at 185 * the specified position. If the latter is out of bounds then we start 186 * at the beginning of the MFT_ZONE. 187 */ 188 done_zones = 0; 189 pass = 1; 190 /* 191 * zone_start and zone_end are the current search range. search_zone 192 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of 193 * volume) and 4 for data zone 2 (start of volume till start of mft 194 * zone). 195 */ 196 zone_start = start_lcn; 197 if (zone_start < 0) { 198 if (zone == DATA_ZONE) 199 zone_start = vol->data1_zone_pos; 200 else 201 zone_start = vol->mft_zone_pos; 202 if (!zone_start) { 203 /* 204 * Zone starts at beginning of volume which means a 205 * single pass is sufficient. 206 */ 207 pass = 2; 208 } 209 } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start && 210 zone_start < vol->mft_zone_end) { 211 zone_start = vol->mft_zone_end; 212 /* 213 * Starting at beginning of data1_zone which means a single 214 * pass in this zone is sufficient. 215 */ 216 pass = 2; 217 } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start || 218 zone_start >= vol->mft_zone_end)) { 219 zone_start = vol->mft_lcn; 220 if (!vol->mft_zone_end) 221 zone_start = 0; 222 /* 223 * Starting at beginning of volume which means a single pass 224 * is sufficient. 225 */ 226 pass = 2; 227 } 228 if (zone == MFT_ZONE) { 229 zone_end = vol->mft_zone_end; 230 search_zone = 1; 231 } else /* if (zone == DATA_ZONE) */ { 232 /* Skip searching the mft zone. */ 233 done_zones |= 1; 234 if (zone_start >= vol->mft_zone_end) { 235 zone_end = vol->nr_clusters; 236 search_zone = 2; 237 } else { 238 zone_end = vol->mft_zone_start; 239 search_zone = 4; 240 } 241 } 242 /* 243 * bmp_pos is the current bit position inside the bitmap. We use 244 * bmp_initial_pos to determine whether or not to do a zone switch. 245 */ 246 bmp_pos = bmp_initial_pos = zone_start; 247 248 /* Loop until all clusters are allocated, i.e. clusters == 0. */ 249 clusters = count; 250 rlpos = rlsize = 0; 251 mapping = lcnbmp_vi->i_mapping; 252 while (1) { 253 ntfs_debug("Start of outer while loop: done_zones 0x%x, " 254 "search_zone %i, pass %i, zone_start 0x%llx, " 255 "zone_end 0x%llx, bmp_initial_pos 0x%llx, " 256 "bmp_pos 0x%llx, rlpos %i, rlsize %i.", 257 done_zones, search_zone, pass, 258 (unsigned long long)zone_start, 259 (unsigned long long)zone_end, 260 (unsigned long long)bmp_initial_pos, 261 (unsigned long long)bmp_pos, rlpos, rlsize); 262 /* Loop until we run out of free clusters. */ 263 last_read_pos = bmp_pos >> 3; 264 ntfs_debug("last_read_pos 0x%llx.", 265 (unsigned long long)last_read_pos); 266 if (last_read_pos > lcnbmp_vi->i_size) { 267 ntfs_debug("End of attribute reached. " 268 "Skipping to zone_pass_done."); 269 goto zone_pass_done; 270 } 271 if (likely(page)) { 272 if (need_writeback) { 273 ntfs_debug("Marking page dirty."); 274 flush_dcache_page(page); 275 set_page_dirty(page); 276 need_writeback = 0; 277 } 278 ntfs_unmap_page(page); 279 } 280 page = ntfs_map_page(mapping, last_read_pos >> 281 PAGE_CACHE_SHIFT); 282 if (IS_ERR(page)) { 283 err = PTR_ERR(page); 284 ntfs_error(vol->sb, "Failed to map page."); 285 goto out; 286 } 287 buf_size = last_read_pos & ~PAGE_CACHE_MASK; 288 buf = page_address(page) + buf_size; 289 buf_size = PAGE_CACHE_SIZE - buf_size; 290 if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size)) 291 buf_size = lcnbmp_vi->i_size - last_read_pos; 292 buf_size <<= 3; 293 lcn = bmp_pos & 7; 294 bmp_pos &= ~7; 295 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, " 296 "bmp_pos 0x%llx, need_writeback %i.", buf_size, 297 (unsigned long long)lcn, 298 (unsigned long long)bmp_pos, need_writeback); 299 while (lcn < buf_size && lcn + bmp_pos < zone_end) { 300 byte = buf + (lcn >> 3); 301 ntfs_debug("In inner while loop: buf_size %i, " 302 "lcn 0x%llx, bmp_pos 0x%llx, " 303 "need_writeback %i, byte ofs 0x%x, " 304 "*byte 0x%x.", buf_size, 305 (unsigned long long)lcn, 306 (unsigned long long)bmp_pos, 307 need_writeback, 308 (unsigned int)(lcn >> 3), 309 (unsigned int)*byte); 310 /* Skip full bytes. */ 311 if (*byte == 0xff) { 312 lcn = (lcn + 8) & ~7; 313 ntfs_debug("Continuing while loop 1."); 314 continue; 315 } 316 bit = 1 << (lcn & 7); 317 ntfs_debug("bit %i.", bit); 318 /* If the bit is already set, go onto the next one. */ 319 if (*byte & bit) { 320 lcn++; 321 ntfs_debug("Continuing while loop 2."); 322 continue; 323 } 324 /* 325 * Allocate more memory if needed, including space for 326 * the terminator element. 327 * ntfs_malloc_nofs() operates on whole pages only. 328 */ 329 if ((rlpos + 2) * sizeof(*rl) > rlsize) { 330 runlist_element *rl2; 331 332 ntfs_debug("Reallocating memory."); 333 if (!rl) 334 ntfs_debug("First free bit is at LCN " 335 "0x%llx.", 336 (unsigned long long) 337 (lcn + bmp_pos)); 338 rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); 339 if (unlikely(!rl2)) { 340 err = -ENOMEM; 341 ntfs_error(vol->sb, "Failed to " 342 "allocate memory."); 343 goto out; 344 } 345 memcpy(rl2, rl, rlsize); 346 ntfs_free(rl); 347 rl = rl2; 348 rlsize += PAGE_SIZE; 349 ntfs_debug("Reallocated memory, rlsize 0x%x.", 350 rlsize); 351 } 352 /* Allocate the bitmap bit. */ 353 *byte |= bit; 354 /* We need to write this bitmap page to disk. */ 355 need_writeback = 1; 356 ntfs_debug("*byte 0x%x, need_writeback is set.", 357 (unsigned int)*byte); 358 /* 359 * Coalesce with previous run if adjacent LCNs. 360 * Otherwise, append a new run. 361 */ 362 ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), " 363 "prev_lcn 0x%llx, lcn 0x%llx, " 364 "bmp_pos 0x%llx, prev_run_len 0x%llx, " 365 "rlpos %i.", 366 (unsigned long long)(lcn + bmp_pos), 367 1ULL, (unsigned long long)prev_lcn, 368 (unsigned long long)lcn, 369 (unsigned long long)bmp_pos, 370 (unsigned long long)prev_run_len, 371 rlpos); 372 if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) { 373 ntfs_debug("Coalescing to run (lcn 0x%llx, " 374 "len 0x%llx).", 375 (unsigned long long) 376 rl[rlpos - 1].lcn, 377 (unsigned long long) 378 rl[rlpos - 1].length); 379 rl[rlpos - 1].length = ++prev_run_len; 380 ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), " 381 "prev_run_len 0x%llx.", 382 (unsigned long long) 383 rl[rlpos - 1].lcn, 384 (unsigned long long) 385 rl[rlpos - 1].length, 386 (unsigned long long) 387 prev_run_len); 388 } else { 389 if (likely(rlpos)) { 390 ntfs_debug("Adding new run, (previous " 391 "run lcn 0x%llx, " 392 "len 0x%llx).", 393 (unsigned long long) 394 rl[rlpos - 1].lcn, 395 (unsigned long long) 396 rl[rlpos - 1].length); 397 rl[rlpos].vcn = rl[rlpos - 1].vcn + 398 prev_run_len; 399 } else { 400 ntfs_debug("Adding new run, is first " 401 "run."); 402 rl[rlpos].vcn = start_vcn; 403 } 404 rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; 405 rl[rlpos].length = prev_run_len = 1; 406 rlpos++; 407 } 408 /* Done? */ 409 if (!--clusters) { 410 LCN tc; 411 /* 412 * Update the current zone position. Positions 413 * of already scanned zones have been updated 414 * during the respective zone switches. 415 */ 416 tc = lcn + bmp_pos + 1; 417 ntfs_debug("Done. Updating current zone " 418 "position, tc 0x%llx, " 419 "search_zone %i.", 420 (unsigned long long)tc, 421 search_zone); 422 switch (search_zone) { 423 case 1: 424 ntfs_debug("Before checks, " 425 "vol->mft_zone_pos " 426 "0x%llx.", 427 (unsigned long long) 428 vol->mft_zone_pos); 429 if (tc >= vol->mft_zone_end) { 430 vol->mft_zone_pos = 431 vol->mft_lcn; 432 if (!vol->mft_zone_end) 433 vol->mft_zone_pos = 0; 434 } else if ((bmp_initial_pos >= 435 vol->mft_zone_pos || 436 tc > vol->mft_zone_pos) 437 && tc >= vol->mft_lcn) 438 vol->mft_zone_pos = tc; 439 ntfs_debug("After checks, " 440 "vol->mft_zone_pos " 441 "0x%llx.", 442 (unsigned long long) 443 vol->mft_zone_pos); 444 break; 445 case 2: 446 ntfs_debug("Before checks, " 447 "vol->data1_zone_pos " 448 "0x%llx.", 449 (unsigned long long) 450 vol->data1_zone_pos); 451 if (tc >= vol->nr_clusters) 452 vol->data1_zone_pos = 453 vol->mft_zone_end; 454 else if ((bmp_initial_pos >= 455 vol->data1_zone_pos || 456 tc > vol->data1_zone_pos) 457 && tc >= vol->mft_zone_end) 458 vol->data1_zone_pos = tc; 459 ntfs_debug("After checks, " 460 "vol->data1_zone_pos " 461 "0x%llx.", 462 (unsigned long long) 463 vol->data1_zone_pos); 464 break; 465 case 4: 466 ntfs_debug("Before checks, " 467 "vol->data2_zone_pos " 468 "0x%llx.", 469 (unsigned long long) 470 vol->data2_zone_pos); 471 if (tc >= vol->mft_zone_start) 472 vol->data2_zone_pos = 0; 473 else if (bmp_initial_pos >= 474 vol->data2_zone_pos || 475 tc > vol->data2_zone_pos) 476 vol->data2_zone_pos = tc; 477 ntfs_debug("After checks, " 478 "vol->data2_zone_pos " 479 "0x%llx.", 480 (unsigned long long) 481 vol->data2_zone_pos); 482 break; 483 default: 484 BUG(); 485 } 486 ntfs_debug("Finished. Going to out."); 487 goto out; 488 } 489 lcn++; 490 } 491 bmp_pos += buf_size; 492 ntfs_debug("After inner while loop: buf_size 0x%x, lcn " 493 "0x%llx, bmp_pos 0x%llx, need_writeback %i.", 494 buf_size, (unsigned long long)lcn, 495 (unsigned long long)bmp_pos, need_writeback); 496 if (bmp_pos < zone_end) { 497 ntfs_debug("Continuing outer while loop, " 498 "bmp_pos 0x%llx, zone_end 0x%llx.", 499 (unsigned long long)bmp_pos, 500 (unsigned long long)zone_end); 501 continue; 502 } 503 zone_pass_done: /* Finished with the current zone pass. */ 504 ntfs_debug("At zone_pass_done, pass %i.", pass); 505 if (pass == 1) { 506 /* 507 * Now do pass 2, scanning the first part of the zone 508 * we omitted in pass 1. 509 */ 510 pass = 2; 511 zone_end = zone_start; 512 switch (search_zone) { 513 case 1: /* mft_zone */ 514 zone_start = vol->mft_zone_start; 515 break; 516 case 2: /* data1_zone */ 517 zone_start = vol->mft_zone_end; 518 break; 519 case 4: /* data2_zone */ 520 zone_start = 0; 521 break; 522 default: 523 BUG(); 524 } 525 /* Sanity check. */ 526 if (zone_end < zone_start) 527 zone_end = zone_start; 528 bmp_pos = zone_start; 529 ntfs_debug("Continuing outer while loop, pass 2, " 530 "zone_start 0x%llx, zone_end 0x%llx, " 531 "bmp_pos 0x%llx.", 532 (unsigned long long)zone_start, 533 (unsigned long long)zone_end, 534 (unsigned long long)bmp_pos); 535 continue; 536 } /* pass == 2 */ 537 done_zones_check: 538 ntfs_debug("At done_zones_check, search_zone %i, done_zones " 539 "before 0x%x, done_zones after 0x%x.", 540 search_zone, done_zones, 541 done_zones | search_zone); 542 done_zones |= search_zone; 543 if (done_zones < 7) { 544 ntfs_debug("Switching zone."); 545 /* Now switch to the next zone we haven't done yet. */ 546 pass = 1; 547 switch (search_zone) { 548 case 1: 549 ntfs_debug("Switching from mft zone to data1 " 550 "zone."); 551 /* Update mft zone position. */ 552 if (rlpos) { 553 LCN tc; 554 555 ntfs_debug("Before checks, " 556 "vol->mft_zone_pos " 557 "0x%llx.", 558 (unsigned long long) 559 vol->mft_zone_pos); 560 tc = rl[rlpos - 1].lcn + 561 rl[rlpos - 1].length; 562 if (tc >= vol->mft_zone_end) { 563 vol->mft_zone_pos = 564 vol->mft_lcn; 565 if (!vol->mft_zone_end) 566 vol->mft_zone_pos = 0; 567 } else if ((bmp_initial_pos >= 568 vol->mft_zone_pos || 569 tc > vol->mft_zone_pos) 570 && tc >= vol->mft_lcn) 571 vol->mft_zone_pos = tc; 572 ntfs_debug("After checks, " 573 "vol->mft_zone_pos " 574 "0x%llx.", 575 (unsigned long long) 576 vol->mft_zone_pos); 577 } 578 /* Switch from mft zone to data1 zone. */ 579 switch_to_data1_zone: search_zone = 2; 580 zone_start = bmp_initial_pos = 581 vol->data1_zone_pos; 582 zone_end = vol->nr_clusters; 583 if (zone_start == vol->mft_zone_end) 584 pass = 2; 585 if (zone_start >= zone_end) { 586 vol->data1_zone_pos = zone_start = 587 vol->mft_zone_end; 588 pass = 2; 589 } 590 break; 591 case 2: 592 ntfs_debug("Switching from data1 zone to " 593 "data2 zone."); 594 /* Update data1 zone position. */ 595 if (rlpos) { 596 LCN tc; 597 598 ntfs_debug("Before checks, " 599 "vol->data1_zone_pos " 600 "0x%llx.", 601 (unsigned long long) 602 vol->data1_zone_pos); 603 tc = rl[rlpos - 1].lcn + 604 rl[rlpos - 1].length; 605 if (tc >= vol->nr_clusters) 606 vol->data1_zone_pos = 607 vol->mft_zone_end; 608 else if ((bmp_initial_pos >= 609 vol->data1_zone_pos || 610 tc > vol->data1_zone_pos) 611 && tc >= vol->mft_zone_end) 612 vol->data1_zone_pos = tc; 613 ntfs_debug("After checks, " 614 "vol->data1_zone_pos " 615 "0x%llx.", 616 (unsigned long long) 617 vol->data1_zone_pos); 618 } 619 /* Switch from data1 zone to data2 zone. */ 620 search_zone = 4; 621 zone_start = bmp_initial_pos = 622 vol->data2_zone_pos; 623 zone_end = vol->mft_zone_start; 624 if (!zone_start) 625 pass = 2; 626 if (zone_start >= zone_end) { 627 vol->data2_zone_pos = zone_start = 628 bmp_initial_pos = 0; 629 pass = 2; 630 } 631 break; 632 case 4: 633 ntfs_debug("Switching from data2 zone to " 634 "data1 zone."); 635 /* Update data2 zone position. */ 636 if (rlpos) { 637 LCN tc; 638 639 ntfs_debug("Before checks, " 640 "vol->data2_zone_pos " 641 "0x%llx.", 642 (unsigned long long) 643 vol->data2_zone_pos); 644 tc = rl[rlpos - 1].lcn + 645 rl[rlpos - 1].length; 646 if (tc >= vol->mft_zone_start) 647 vol->data2_zone_pos = 0; 648 else if (bmp_initial_pos >= 649 vol->data2_zone_pos || 650 tc > vol->data2_zone_pos) 651 vol->data2_zone_pos = tc; 652 ntfs_debug("After checks, " 653 "vol->data2_zone_pos " 654 "0x%llx.", 655 (unsigned long long) 656 vol->data2_zone_pos); 657 } 658 /* Switch from data2 zone to data1 zone. */ 659 goto switch_to_data1_zone; 660 default: 661 BUG(); 662 } 663 ntfs_debug("After zone switch, search_zone %i, " 664 "pass %i, bmp_initial_pos 0x%llx, " 665 "zone_start 0x%llx, zone_end 0x%llx.", 666 search_zone, pass, 667 (unsigned long long)bmp_initial_pos, 668 (unsigned long long)zone_start, 669 (unsigned long long)zone_end); 670 bmp_pos = zone_start; 671 if (zone_start == zone_end) { 672 ntfs_debug("Empty zone, going to " 673 "done_zones_check."); 674 /* Empty zone. Don't bother searching it. */ 675 goto done_zones_check; 676 } 677 ntfs_debug("Continuing outer while loop."); 678 continue; 679 } /* done_zones == 7 */ 680 ntfs_debug("All zones are finished."); 681 /* 682 * All zones are finished! If DATA_ZONE, shrink mft zone. If 683 * MFT_ZONE, we have really run out of space. 684 */ 685 mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; 686 ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end " 687 "0x%llx, mft_zone_size 0x%llx.", 688 (unsigned long long)vol->mft_zone_start, 689 (unsigned long long)vol->mft_zone_end, 690 (unsigned long long)mft_zone_size); 691 if (zone == MFT_ZONE || mft_zone_size <= 0) { 692 ntfs_debug("No free clusters left, going to out."); 693 /* Really no more space left on device. */ 694 err = ENOSPC; 695 goto out; 696 } /* zone == DATA_ZONE && mft_zone_size > 0 */ 697 ntfs_debug("Shrinking mft zone."); 698 zone_end = vol->mft_zone_end; 699 mft_zone_size >>= 1; 700 if (mft_zone_size > 0) 701 vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; 702 else /* mft zone and data2 zone no longer exist. */ 703 vol->data2_zone_pos = vol->mft_zone_start = 704 vol->mft_zone_end = 0; 705 if (vol->mft_zone_pos >= vol->mft_zone_end) { 706 vol->mft_zone_pos = vol->mft_lcn; 707 if (!vol->mft_zone_end) 708 vol->mft_zone_pos = 0; 709 } 710 bmp_pos = zone_start = bmp_initial_pos = 711 vol->data1_zone_pos = vol->mft_zone_end; 712 search_zone = 2; 713 pass = 2; 714 done_zones &= ~2; 715 ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, " 716 "vol->mft_zone_start 0x%llx, " 717 "vol->mft_zone_end 0x%llx, " 718 "vol->mft_zone_pos 0x%llx, search_zone 2, " 719 "pass 2, dones_zones 0x%x, zone_start 0x%llx, " 720 "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, " 721 "continuing outer while loop.", 722 (unsigned long long)mft_zone_size, 723 (unsigned long long)vol->mft_zone_start, 724 (unsigned long long)vol->mft_zone_end, 725 (unsigned long long)vol->mft_zone_pos, 726 done_zones, (unsigned long long)zone_start, 727 (unsigned long long)zone_end, 728 (unsigned long long)vol->data1_zone_pos); 729 } 730 ntfs_debug("After outer while loop."); 731 out: 732 ntfs_debug("At out."); 733 /* Add runlist terminator element. */ 734 if (likely(rl)) { 735 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; 736 rl[rlpos].lcn = LCN_RL_NOT_MAPPED; 737 rl[rlpos].length = 0; 738 } 739 if (likely(page && !IS_ERR(page))) { 740 if (need_writeback) { 741 ntfs_debug("Marking page dirty."); 742 flush_dcache_page(page); 743 set_page_dirty(page); 744 need_writeback = 0; 745 } 746 ntfs_unmap_page(page); 747 } 748 if (likely(!err)) { 749 up_write(&vol->lcnbmp_lock); 750 ntfs_debug("Done."); 751 return rl; 752 } 753 ntfs_error(vol->sb, "Failed to allocate clusters, aborting " 754 "(error %i).", err); 755 if (rl) { 756 int err2; 757 758 if (err == ENOSPC) 759 ntfs_debug("Not enough space to complete allocation, " 760 "err ENOSPC, first free lcn 0x%llx, " 761 "could allocate up to 0x%llx " 762 "clusters.", 763 (unsigned long long)rl[0].lcn, 764 (unsigned long long)count - clusters); 765 /* Deallocate all allocated clusters. */ 766 ntfs_debug("Attempting rollback..."); 767 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); 768 if (err2) { 769 ntfs_error(vol->sb, "Failed to rollback (error %i). " 770 "Leaving inconsistent metadata! " 771 "Unmount and run chkdsk.", err2); 772 NVolSetErrors(vol); 773 } 774 /* Free the runlist. */ 775 ntfs_free(rl); 776 } else if (err == ENOSPC) 777 ntfs_debug("No space left at all, err = ENOSPC, " 778 "first free lcn = 0x%llx.", 779 (unsigned long long)vol->data1_zone_pos); 780 up_write(&vol->lcnbmp_lock); 781 return ERR_PTR(err); 782 } 783 784 /** 785 * __ntfs_cluster_free - free clusters on an ntfs volume 786 * @vi: vfs inode whose runlist describes the clusters to free 787 * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters 788 * @count: number of clusters to free or -1 for all clusters 789 * @is_rollback: if TRUE this is a rollback operation 790 * 791 * Free @count clusters starting at the cluster @start_vcn in the runlist 792 * described by the vfs inode @vi. 793 * 794 * If @count is -1, all clusters from @start_vcn to the end of the runlist are 795 * deallocated. Thus, to completely free all clusters in a runlist, use 796 * @start_vcn = 0 and @count = -1. 797 * 798 * @is_rollback should always be FALSE, it is for internal use to rollback 799 * errors. You probably want to use ntfs_cluster_free() instead. 800 * 801 * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller 802 * has to deal with it later. 803 * 804 * Return the number of deallocated clusters (not counting sparse ones) on 805 * success and -errno on error. 806 * 807 * Locking: - The runlist described by @vi must be unlocked on entry and is 808 * unlocked on return. 809 * - This function takes the runlist lock of @vi for reading and 810 * sometimes for writing and sometimes modifies the runlist. 811 * - The volume lcn bitmap must be unlocked on entry and is unlocked 812 * on return. 813 * - This function takes the volume lcn bitmap lock for writing and 814 * modifies the bitmap contents. 815 */ 816 s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, 817 const BOOL is_rollback) 818 { 819 s64 delta, to_free, total_freed, real_freed; 820 ntfs_inode *ni; 821 ntfs_volume *vol; 822 struct inode *lcnbmp_vi; 823 runlist_element *rl; 824 int err; 825 826 BUG_ON(!vi); 827 ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count " 828 "0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn, 829 (unsigned long long)count, 830 is_rollback ? " (rollback)" : ""); 831 ni = NTFS_I(vi); 832 vol = ni->vol; 833 lcnbmp_vi = vol->lcnbmp_ino; 834 BUG_ON(!lcnbmp_vi); 835 BUG_ON(start_vcn < 0); 836 BUG_ON(count < -1); 837 /* 838 * Lock the lcn bitmap for writing but only if not rolling back. We 839 * must hold the lock all the way including through rollback otherwise 840 * rollback is not possible because once we have cleared a bit and 841 * dropped the lock, anyone could have set the bit again, thus 842 * allocating the cluster for another use. 843 */ 844 if (likely(!is_rollback)) 845 down_write(&vol->lcnbmp_lock); 846 847 total_freed = real_freed = 0; 848 849 /* This returns with ni->runlist locked for reading on success. */ 850 rl = ntfs_find_vcn(ni, start_vcn, FALSE); 851 if (IS_ERR(rl)) { 852 if (!is_rollback) 853 ntfs_error(vol->sb, "Failed to find first runlist " 854 "element (error %li), aborting.", 855 PTR_ERR(rl)); 856 err = PTR_ERR(rl); 857 goto err_out; 858 } 859 if (unlikely(rl->lcn < LCN_HOLE)) { 860 if (!is_rollback) 861 ntfs_error(vol->sb, "First runlist element has " 862 "invalid lcn, aborting."); 863 err = -EIO; 864 goto unl_err_out; 865 } 866 /* Find the starting cluster inside the run that needs freeing. */ 867 delta = start_vcn - rl->vcn; 868 869 /* The number of clusters in this run that need freeing. */ 870 to_free = rl->length - delta; 871 if (count >= 0 && to_free > count) 872 to_free = count; 873 874 if (likely(rl->lcn >= 0)) { 875 /* Do the actual freeing of the clusters in this run. */ 876 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta, 877 to_free, likely(!is_rollback) ? 0 : 1); 878 if (unlikely(err)) { 879 if (!is_rollback) 880 ntfs_error(vol->sb, "Failed to clear first run " 881 "(error %i), aborting.", err); 882 goto unl_err_out; 883 } 884 /* We have freed @to_free real clusters. */ 885 real_freed = to_free; 886 }; 887 /* Go to the next run and adjust the number of clusters left to free. */ 888 ++rl; 889 if (count >= 0) 890 count -= to_free; 891 892 /* Keep track of the total "freed" clusters, including sparse ones. */ 893 total_freed = to_free; 894 /* 895 * Loop over the remaining runs, using @count as a capping value, and 896 * free them. 897 */ 898 for (; rl->length && count != 0; ++rl) { 899 if (unlikely(rl->lcn < LCN_HOLE)) { 900 VCN vcn; 901 902 /* 903 * Attempt to map runlist, dropping runlist lock for 904 * the duration. 905 */ 906 vcn = rl->vcn; 907 up_read(&ni->runlist.lock); 908 err = ntfs_map_runlist(ni, vcn); 909 if (err) { 910 if (!is_rollback) 911 ntfs_error(vol->sb, "Failed to map " 912 "runlist fragment."); 913 if (err == -EINVAL || err == -ENOENT) 914 err = -EIO; 915 goto err_out; 916 } 917 /* 918 * This returns with ni->runlist locked for reading on 919 * success. 920 */ 921 rl = ntfs_find_vcn(ni, vcn, FALSE); 922 if (IS_ERR(rl)) { 923 err = PTR_ERR(rl); 924 if (!is_rollback) 925 ntfs_error(vol->sb, "Failed to find " 926 "subsequent runlist " 927 "element."); 928 goto err_out; 929 } 930 if (unlikely(rl->lcn < LCN_HOLE)) { 931 if (!is_rollback) 932 ntfs_error(vol->sb, "Runlist element " 933 "has invalid lcn " 934 "(0x%llx).", 935 (unsigned long long) 936 rl->lcn); 937 err = -EIO; 938 goto unl_err_out; 939 } 940 } 941 /* The number of clusters in this run that need freeing. */ 942 to_free = rl->length; 943 if (count >= 0 && to_free > count) 944 to_free = count; 945 946 if (likely(rl->lcn >= 0)) { 947 /* Do the actual freeing of the clusters in the run. */ 948 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn, 949 to_free, likely(!is_rollback) ? 0 : 1); 950 if (unlikely(err)) { 951 if (!is_rollback) 952 ntfs_error(vol->sb, "Failed to clear " 953 "subsequent run."); 954 goto unl_err_out; 955 } 956 /* We have freed @to_free real clusters. */ 957 real_freed += to_free; 958 } 959 /* Adjust the number of clusters left to free. */ 960 if (count >= 0) 961 count -= to_free; 962 963 /* Update the total done clusters. */ 964 total_freed += to_free; 965 } 966 up_read(&ni->runlist.lock); 967 if (likely(!is_rollback)) 968 up_write(&vol->lcnbmp_lock); 969 970 BUG_ON(count > 0); 971 972 /* We are done. Return the number of actually freed clusters. */ 973 ntfs_debug("Done."); 974 return real_freed; 975 unl_err_out: 976 up_read(&ni->runlist.lock); 977 err_out: 978 if (is_rollback) 979 return err; 980 /* If no real clusters were freed, no need to rollback. */ 981 if (!real_freed) { 982 up_write(&vol->lcnbmp_lock); 983 return err; 984 } 985 /* 986 * Attempt to rollback and if that succeeds just return the error code. 987 * If rollback fails, set the volume errors flag, emit an error 988 * message, and return the error code. 989 */ 990 delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); 991 if (delta < 0) { 992 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " 993 "inconsistent metadata! Unmount and run " 994 "chkdsk.", (int)delta); 995 NVolSetErrors(vol); 996 } 997 up_write(&vol->lcnbmp_lock); 998 ntfs_error(vol->sb, "Aborting (error %i).", err); 999 return err; 1000 } 1001 1002 #endif /* NTFS_RW */ 1003