1 /** 2 * aops.c - NTFS kernel address space operations and page cache handling. 3 * Part of the Linux-NTFS project. 4 * 5 * Copyright (c) 2001-2004 Anton Altaparmakov 6 * Copyright (c) 2002 Richard Russon 7 * 8 * This program/include file is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License as published 10 * by the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program/include file is distributed in the hope that it will be 14 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program (in the main directory of the Linux-NTFS 20 * distribution in the file COPYING); if not, write to the Free Software 21 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 */ 23 24 #include <linux/errno.h> 25 #include <linux/mm.h> 26 #include <linux/pagemap.h> 27 #include <linux/swap.h> 28 #include <linux/buffer_head.h> 29 #include <linux/writeback.h> 30 31 #include "aops.h" 32 #include "attrib.h" 33 #include "debug.h" 34 #include "inode.h" 35 #include "mft.h" 36 #include "runlist.h" 37 #include "types.h" 38 #include "ntfs.h" 39 40 /** 41 * ntfs_end_buffer_async_read - async io completion for reading attributes 42 * @bh: buffer head on which io is completed 43 * @uptodate: whether @bh is now uptodate or not 44 * 45 * Asynchronous I/O completion handler for reading pages belonging to the 46 * attribute address space of an inode. The inodes can either be files or 47 * directories or they can be fake inodes describing some attribute. 48 * 49 * If NInoMstProtected(), perform the post read mst fixups when all IO on the 50 * page has been completed and mark the page uptodate or set the error bit on 51 * the page. To determine the size of the records that need fixing up, we 52 * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs 53 * record size, and index_block_size_bits, to the log(base 2) of the ntfs 54 * record size. 55 */ 56 static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) 57 { 58 static DEFINE_SPINLOCK(page_uptodate_lock); 59 unsigned long flags; 60 struct buffer_head *tmp; 61 struct page *page; 62 ntfs_inode *ni; 63 int page_uptodate = 1; 64 65 page = bh->b_page; 66 ni = NTFS_I(page->mapping->host); 67 68 if (likely(uptodate)) { 69 s64 file_ofs; 70 71 set_buffer_uptodate(bh); 72 73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + 74 bh_offset(bh); 75 /* Check for the current buffer head overflowing. */ 76 if (file_ofs + bh->b_size > ni->initialized_size) { 77 char *addr; 78 int ofs = 0; 79 80 if (file_ofs < ni->initialized_size) 81 ofs = ni->initialized_size - file_ofs; 82 addr = kmap_atomic(page, KM_BIO_SRC_IRQ); 83 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); 84 flush_dcache_page(page); 85 kunmap_atomic(addr, KM_BIO_SRC_IRQ); 86 } 87 } else { 88 clear_buffer_uptodate(bh); 89 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", 90 (unsigned long long)bh->b_blocknr); 91 SetPageError(page); 92 } 93 spin_lock_irqsave(&page_uptodate_lock, flags); 94 clear_buffer_async_read(bh); 95 unlock_buffer(bh); 96 tmp = bh; 97 do { 98 if (!buffer_uptodate(tmp)) 99 page_uptodate = 0; 100 if (buffer_async_read(tmp)) { 101 if (likely(buffer_locked(tmp))) 102 goto still_busy; 103 /* Async buffers must be locked. */ 104 BUG(); 105 } 106 tmp = tmp->b_this_page; 107 } while (tmp != bh); 108 spin_unlock_irqrestore(&page_uptodate_lock, flags); 109 /* 110 * If none of the buffers had errors then we can set the page uptodate, 111 * but we first have to perform the post read mst fixups, if the 112 * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. 113 * Note we ignore fixup errors as those are detected when 114 * map_mft_record() is called which gives us per record granularity 115 * rather than per page granularity. 116 */ 117 if (!NInoMstProtected(ni)) { 118 if (likely(page_uptodate && !PageError(page))) 119 SetPageUptodate(page); 120 } else { 121 char *addr; 122 unsigned int i, recs; 123 u32 rec_size; 124 125 rec_size = ni->itype.index.block_size; 126 recs = PAGE_CACHE_SIZE / rec_size; 127 /* Should have been verified before we got here... */ 128 BUG_ON(!recs); 129 addr = kmap_atomic(page, KM_BIO_SRC_IRQ); 130 for (i = 0; i < recs; i++) 131 post_read_mst_fixup((NTFS_RECORD*)(addr + 132 i * rec_size), rec_size); 133 flush_dcache_page(page); 134 kunmap_atomic(addr, KM_BIO_SRC_IRQ); 135 if (likely(!PageError(page) && page_uptodate)) 136 SetPageUptodate(page); 137 } 138 unlock_page(page); 139 return; 140 still_busy: 141 spin_unlock_irqrestore(&page_uptodate_lock, flags); 142 return; 143 } 144 145 /** 146 * ntfs_read_block - fill a @page of an address space with data 147 * @page: page cache page to fill with data 148 * 149 * Fill the page @page of the address space belonging to the @page->host inode. 150 * We read each buffer asynchronously and when all buffers are read in, our io 151 * completion handler ntfs_end_buffer_read_async(), if required, automatically 152 * applies the mst fixups to the page before finally marking it uptodate and 153 * unlocking it. 154 * 155 * We only enforce allocated_size limit because i_size is checked for in 156 * generic_file_read(). 157 * 158 * Return 0 on success and -errno on error. 159 * 160 * Contains an adapted version of fs/buffer.c::block_read_full_page(). 161 */ 162 static int ntfs_read_block(struct page *page) 163 { 164 VCN vcn; 165 LCN lcn; 166 ntfs_inode *ni; 167 ntfs_volume *vol; 168 runlist_element *rl; 169 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 170 sector_t iblock, lblock, zblock; 171 unsigned int blocksize, vcn_ofs; 172 int i, nr; 173 unsigned char blocksize_bits; 174 175 ni = NTFS_I(page->mapping->host); 176 vol = ni->vol; 177 178 /* $MFT/$DATA must have its complete runlist in memory at all times. */ 179 BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); 180 181 blocksize_bits = VFS_I(ni)->i_blkbits; 182 blocksize = 1 << blocksize_bits; 183 184 if (!page_has_buffers(page)) 185 create_empty_buffers(page, blocksize, 0); 186 bh = head = page_buffers(page); 187 if (unlikely(!bh)) { 188 unlock_page(page); 189 return -ENOMEM; 190 } 191 192 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 193 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; 194 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; 195 196 /* Loop through all the buffers in the page. */ 197 rl = NULL; 198 nr = i = 0; 199 do { 200 u8 *kaddr; 201 202 if (unlikely(buffer_uptodate(bh))) 203 continue; 204 if (unlikely(buffer_mapped(bh))) { 205 arr[nr++] = bh; 206 continue; 207 } 208 bh->b_bdev = vol->sb->s_bdev; 209 /* Is the block within the allowed limits? */ 210 if (iblock < lblock) { 211 BOOL is_retry = FALSE; 212 213 /* Convert iblock into corresponding vcn and offset. */ 214 vcn = (VCN)iblock << blocksize_bits >> 215 vol->cluster_size_bits; 216 vcn_ofs = ((VCN)iblock << blocksize_bits) & 217 vol->cluster_size_mask; 218 if (!rl) { 219 lock_retry_remap: 220 down_read(&ni->runlist.lock); 221 rl = ni->runlist.rl; 222 } 223 if (likely(rl != NULL)) { 224 /* Seek to element containing target vcn. */ 225 while (rl->length && rl[1].vcn <= vcn) 226 rl++; 227 lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 228 } else 229 lcn = LCN_RL_NOT_MAPPED; 230 /* Successful remap. */ 231 if (lcn >= 0) { 232 /* Setup buffer head to correct block. */ 233 bh->b_blocknr = ((lcn << vol->cluster_size_bits) 234 + vcn_ofs) >> blocksize_bits; 235 set_buffer_mapped(bh); 236 /* Only read initialized data blocks. */ 237 if (iblock < zblock) { 238 arr[nr++] = bh; 239 continue; 240 } 241 /* Fully non-initialized data block, zero it. */ 242 goto handle_zblock; 243 } 244 /* It is a hole, need to zero it. */ 245 if (lcn == LCN_HOLE) 246 goto handle_hole; 247 /* If first try and runlist unmapped, map and retry. */ 248 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 249 int err; 250 is_retry = TRUE; 251 /* 252 * Attempt to map runlist, dropping lock for 253 * the duration. 254 */ 255 up_read(&ni->runlist.lock); 256 err = ntfs_map_runlist(ni, vcn); 257 if (likely(!err)) 258 goto lock_retry_remap; 259 rl = NULL; 260 lcn = err; 261 } 262 /* Hard error, zero out region. */ 263 bh->b_blocknr = -1; 264 SetPageError(page); 265 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " 266 "attribute type 0x%x, vcn 0x%llx, " 267 "offset 0x%x because its location on " 268 "disk could not be determined%s " 269 "(error code %lli).", ni->mft_no, 270 ni->type, (unsigned long long)vcn, 271 vcn_ofs, is_retry ? " even after " 272 "retrying" : "", (long long)lcn); 273 } 274 /* 275 * Either iblock was outside lblock limits or 276 * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion 277 * of the page and set the buffer uptodate. 278 */ 279 handle_hole: 280 bh->b_blocknr = -1UL; 281 clear_buffer_mapped(bh); 282 handle_zblock: 283 kaddr = kmap_atomic(page, KM_USER0); 284 memset(kaddr + i * blocksize, 0, blocksize); 285 flush_dcache_page(page); 286 kunmap_atomic(kaddr, KM_USER0); 287 set_buffer_uptodate(bh); 288 } while (i++, iblock++, (bh = bh->b_this_page) != head); 289 290 /* Release the lock if we took it. */ 291 if (rl) 292 up_read(&ni->runlist.lock); 293 294 /* Check we have at least one buffer ready for i/o. */ 295 if (nr) { 296 struct buffer_head *tbh; 297 298 /* Lock the buffers. */ 299 for (i = 0; i < nr; i++) { 300 tbh = arr[i]; 301 lock_buffer(tbh); 302 tbh->b_end_io = ntfs_end_buffer_async_read; 303 set_buffer_async_read(tbh); 304 } 305 /* Finally, start i/o on the buffers. */ 306 for (i = 0; i < nr; i++) { 307 tbh = arr[i]; 308 if (likely(!buffer_uptodate(tbh))) 309 submit_bh(READ, tbh); 310 else 311 ntfs_end_buffer_async_read(tbh, 1); 312 } 313 return 0; 314 } 315 /* No i/o was scheduled on any of the buffers. */ 316 if (likely(!PageError(page))) 317 SetPageUptodate(page); 318 else /* Signal synchronous i/o error. */ 319 nr = -EIO; 320 unlock_page(page); 321 return nr; 322 } 323 324 /** 325 * ntfs_readpage - fill a @page of a @file with data from the device 326 * @file: open file to which the page @page belongs or NULL 327 * @page: page cache page to fill with data 328 * 329 * For non-resident attributes, ntfs_readpage() fills the @page of the open 330 * file @file by calling the ntfs version of the generic block_read_full_page() 331 * function, ntfs_read_block(), which in turn creates and reads in the buffers 332 * associated with the page asynchronously. 333 * 334 * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the 335 * data from the mft record (which at this stage is most likely in memory) and 336 * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as 337 * even if the mft record is not cached at this point in time, we need to wait 338 * for it to be read in before we can do the copy. 339 * 340 * Return 0 on success and -errno on error. 341 */ 342 static int ntfs_readpage(struct file *file, struct page *page) 343 { 344 loff_t i_size; 345 ntfs_inode *ni, *base_ni; 346 u8 *kaddr; 347 ntfs_attr_search_ctx *ctx; 348 MFT_RECORD *mrec; 349 u32 attr_len; 350 int err = 0; 351 352 BUG_ON(!PageLocked(page)); 353 /* 354 * This can potentially happen because we clear PageUptodate() during 355 * ntfs_writepage() of MstProtected() attributes. 356 */ 357 if (PageUptodate(page)) { 358 unlock_page(page); 359 return 0; 360 } 361 ni = NTFS_I(page->mapping->host); 362 363 /* NInoNonResident() == NInoIndexAllocPresent() */ 364 if (NInoNonResident(ni)) { 365 /* 366 * Only unnamed $DATA attributes can be compressed or 367 * encrypted. 368 */ 369 if (ni->type == AT_DATA && !ni->name_len) { 370 /* If file is encrypted, deny access, just like NT4. */ 371 if (NInoEncrypted(ni)) { 372 err = -EACCES; 373 goto err_out; 374 } 375 /* Compressed data streams are handled in compress.c. */ 376 if (NInoCompressed(ni)) 377 return ntfs_read_compressed_block(page); 378 } 379 /* Normal data stream. */ 380 return ntfs_read_block(page); 381 } 382 /* 383 * Attribute is resident, implying it is not compressed or encrypted. 384 * This also means the attribute is smaller than an mft record and 385 * hence smaller than a page, so can simply zero out any pages with 386 * index above 0. We can also do this if the file size is 0. 387 */ 388 if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { 389 kaddr = kmap_atomic(page, KM_USER0); 390 memset(kaddr, 0, PAGE_CACHE_SIZE); 391 flush_dcache_page(page); 392 kunmap_atomic(kaddr, KM_USER0); 393 goto done; 394 } 395 if (!NInoAttr(ni)) 396 base_ni = ni; 397 else 398 base_ni = ni->ext.base_ntfs_ino; 399 /* Map, pin, and lock the mft record. */ 400 mrec = map_mft_record(base_ni); 401 if (IS_ERR(mrec)) { 402 err = PTR_ERR(mrec); 403 goto err_out; 404 } 405 ctx = ntfs_attr_get_search_ctx(base_ni, mrec); 406 if (unlikely(!ctx)) { 407 err = -ENOMEM; 408 goto unm_err_out; 409 } 410 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 411 CASE_SENSITIVE, 0, NULL, 0, ctx); 412 if (unlikely(err)) 413 goto put_unm_err_out; 414 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 415 i_size = i_size_read(VFS_I(ni)); 416 if (unlikely(attr_len > i_size)) 417 attr_len = i_size; 418 kaddr = kmap_atomic(page, KM_USER0); 419 /* Copy the data to the page. */ 420 memcpy(kaddr, (u8*)ctx->attr + 421 le16_to_cpu(ctx->attr->data.resident.value_offset), 422 attr_len); 423 /* Zero the remainder of the page. */ 424 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 425 flush_dcache_page(page); 426 kunmap_atomic(kaddr, KM_USER0); 427 put_unm_err_out: 428 ntfs_attr_put_search_ctx(ctx); 429 unm_err_out: 430 unmap_mft_record(base_ni); 431 done: 432 SetPageUptodate(page); 433 err_out: 434 unlock_page(page); 435 return err; 436 } 437 438 #ifdef NTFS_RW 439 440 /** 441 * ntfs_write_block - write a @page to the backing store 442 * @page: page cache page to write out 443 * @wbc: writeback control structure 444 * 445 * This function is for writing pages belonging to non-resident, non-mst 446 * protected attributes to their backing store. 447 * 448 * For a page with buffers, map and write the dirty buffers asynchronously 449 * under page writeback. For a page without buffers, create buffers for the 450 * page, then proceed as above. 451 * 452 * If a page doesn't have buffers the page dirty state is definitive. If a page 453 * does have buffers, the page dirty state is just a hint, and the buffer dirty 454 * state is definitive. (A hint which has rules: dirty buffers against a clean 455 * page is illegal. Other combinations are legal and need to be handled. In 456 * particular a dirty page containing clean buffers for example.) 457 * 458 * Return 0 on success and -errno on error. 459 * 460 * Based on ntfs_read_block() and __block_write_full_page(). 461 */ 462 static int ntfs_write_block(struct page *page, struct writeback_control *wbc) 463 { 464 VCN vcn; 465 LCN lcn; 466 sector_t block, dblock, iblock; 467 struct inode *vi; 468 ntfs_inode *ni; 469 ntfs_volume *vol; 470 runlist_element *rl; 471 struct buffer_head *bh, *head; 472 unsigned int blocksize, vcn_ofs; 473 int err; 474 BOOL need_end_writeback; 475 unsigned char blocksize_bits; 476 477 vi = page->mapping->host; 478 ni = NTFS_I(vi); 479 vol = ni->vol; 480 481 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 482 "0x%lx.", ni->mft_no, ni->type, page->index); 483 484 BUG_ON(!NInoNonResident(ni)); 485 BUG_ON(NInoMstProtected(ni)); 486 487 blocksize_bits = vi->i_blkbits; 488 blocksize = 1 << blocksize_bits; 489 490 if (!page_has_buffers(page)) { 491 BUG_ON(!PageUptodate(page)); 492 create_empty_buffers(page, blocksize, 493 (1 << BH_Uptodate) | (1 << BH_Dirty)); 494 } 495 bh = head = page_buffers(page); 496 if (unlikely(!bh)) { 497 ntfs_warning(vol->sb, "Error allocating page buffers. " 498 "Redirtying page so we try again later."); 499 /* 500 * Put the page back on mapping->dirty_pages, but leave its 501 * buffer's dirty state as-is. 502 */ 503 redirty_page_for_writepage(wbc, page); 504 unlock_page(page); 505 return 0; 506 } 507 508 /* NOTE: Different naming scheme to ntfs_read_block()! */ 509 510 /* The first block in the page. */ 511 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 512 513 /* The first out of bounds block for the data size. */ 514 dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; 515 516 /* The last (fully or partially) initialized block. */ 517 iblock = ni->initialized_size >> blocksize_bits; 518 519 /* 520 * Be very careful. We have no exclusion from __set_page_dirty_buffers 521 * here, and the (potentially unmapped) buffers may become dirty at 522 * any time. If a buffer becomes dirty here after we've inspected it 523 * then we just miss that fact, and the page stays dirty. 524 * 525 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; 526 * handle that here by just cleaning them. 527 */ 528 529 /* 530 * Loop through all the buffers in the page, mapping all the dirty 531 * buffers to disk addresses and handling any aliases from the 532 * underlying block device's mapping. 533 */ 534 rl = NULL; 535 err = 0; 536 do { 537 BOOL is_retry = FALSE; 538 539 if (unlikely(block >= dblock)) { 540 /* 541 * Mapped buffers outside i_size will occur, because 542 * this page can be outside i_size when there is a 543 * truncate in progress. The contents of such buffers 544 * were zeroed by ntfs_writepage(). 545 * 546 * FIXME: What about the small race window where 547 * ntfs_writepage() has not done any clearing because 548 * the page was within i_size but before we get here, 549 * vmtruncate() modifies i_size? 550 */ 551 clear_buffer_dirty(bh); 552 set_buffer_uptodate(bh); 553 continue; 554 } 555 556 /* Clean buffers are not written out, so no need to map them. */ 557 if (!buffer_dirty(bh)) 558 continue; 559 560 /* Make sure we have enough initialized size. */ 561 if (unlikely((block >= iblock) && 562 (ni->initialized_size < vi->i_size))) { 563 /* 564 * If this page is fully outside initialized size, zero 565 * out all pages between the current initialized size 566 * and the current page. Just use ntfs_readpage() to do 567 * the zeroing transparently. 568 */ 569 if (block > iblock) { 570 // TODO: 571 // For each page do: 572 // - read_cache_page() 573 // Again for each page do: 574 // - wait_on_page_locked() 575 // - Check (PageUptodate(page) && 576 // !PageError(page)) 577 // Update initialized size in the attribute and 578 // in the inode. 579 // Again, for each page do: 580 // __set_page_dirty_buffers(); 581 // page_cache_release() 582 // We don't need to wait on the writes. 583 // Update iblock. 584 } 585 /* 586 * The current page straddles initialized size. Zero 587 * all non-uptodate buffers and set them uptodate (and 588 * dirty?). Note, there aren't any non-uptodate buffers 589 * if the page is uptodate. 590 * FIXME: For an uptodate page, the buffers may need to 591 * be written out because they were not initialized on 592 * disk before. 593 */ 594 if (!PageUptodate(page)) { 595 // TODO: 596 // Zero any non-uptodate buffers up to i_size. 597 // Set them uptodate and dirty. 598 } 599 // TODO: 600 // Update initialized size in the attribute and in the 601 // inode (up to i_size). 602 // Update iblock. 603 // FIXME: This is inefficient. Try to batch the two 604 // size changes to happen in one go. 605 ntfs_error(vol->sb, "Writing beyond initialized size " 606 "is not supported yet. Sorry."); 607 err = -EOPNOTSUPP; 608 break; 609 // Do NOT set_buffer_new() BUT DO clear buffer range 610 // outside write request range. 611 // set_buffer_uptodate() on complete buffers as well as 612 // set_buffer_dirty(). 613 } 614 615 /* No need to map buffers that are already mapped. */ 616 if (buffer_mapped(bh)) 617 continue; 618 619 /* Unmapped, dirty buffer. Need to map it. */ 620 bh->b_bdev = vol->sb->s_bdev; 621 622 /* Convert block into corresponding vcn and offset. */ 623 vcn = (VCN)block << blocksize_bits; 624 vcn_ofs = vcn & vol->cluster_size_mask; 625 vcn >>= vol->cluster_size_bits; 626 if (!rl) { 627 lock_retry_remap: 628 down_read(&ni->runlist.lock); 629 rl = ni->runlist.rl; 630 } 631 if (likely(rl != NULL)) { 632 /* Seek to element containing target vcn. */ 633 while (rl->length && rl[1].vcn <= vcn) 634 rl++; 635 lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 636 } else 637 lcn = LCN_RL_NOT_MAPPED; 638 /* Successful remap. */ 639 if (lcn >= 0) { 640 /* Setup buffer head to point to correct block. */ 641 bh->b_blocknr = ((lcn << vol->cluster_size_bits) + 642 vcn_ofs) >> blocksize_bits; 643 set_buffer_mapped(bh); 644 continue; 645 } 646 /* It is a hole, need to instantiate it. */ 647 if (lcn == LCN_HOLE) { 648 // TODO: Instantiate the hole. 649 // clear_buffer_new(bh); 650 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); 651 ntfs_error(vol->sb, "Writing into sparse regions is " 652 "not supported yet. Sorry."); 653 err = -EOPNOTSUPP; 654 break; 655 } 656 /* If first try and runlist unmapped, map and retry. */ 657 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 658 is_retry = TRUE; 659 /* 660 * Attempt to map runlist, dropping lock for 661 * the duration. 662 */ 663 up_read(&ni->runlist.lock); 664 err = ntfs_map_runlist(ni, vcn); 665 if (likely(!err)) 666 goto lock_retry_remap; 667 rl = NULL; 668 lcn = err; 669 } 670 /* Failed to map the buffer, even after retrying. */ 671 bh->b_blocknr = -1; 672 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 673 "attribute type 0x%x, vcn 0x%llx, offset 0x%x " 674 "because its location on disk could not be " 675 "determined%s (error code %lli).", ni->mft_no, 676 ni->type, (unsigned long long)vcn, 677 vcn_ofs, is_retry ? " even after " 678 "retrying" : "", (long long)lcn); 679 if (!err) 680 err = -EIO; 681 break; 682 } while (block++, (bh = bh->b_this_page) != head); 683 684 /* Release the lock if we took it. */ 685 if (rl) 686 up_read(&ni->runlist.lock); 687 688 /* For the error case, need to reset bh to the beginning. */ 689 bh = head; 690 691 /* Just an optimization, so ->readpage() isn't called later. */ 692 if (unlikely(!PageUptodate(page))) { 693 int uptodate = 1; 694 do { 695 if (!buffer_uptodate(bh)) { 696 uptodate = 0; 697 bh = head; 698 break; 699 } 700 } while ((bh = bh->b_this_page) != head); 701 if (uptodate) 702 SetPageUptodate(page); 703 } 704 705 /* Setup all mapped, dirty buffers for async write i/o. */ 706 do { 707 get_bh(bh); 708 if (buffer_mapped(bh) && buffer_dirty(bh)) { 709 lock_buffer(bh); 710 if (test_clear_buffer_dirty(bh)) { 711 BUG_ON(!buffer_uptodate(bh)); 712 mark_buffer_async_write(bh); 713 } else 714 unlock_buffer(bh); 715 } else if (unlikely(err)) { 716 /* 717 * For the error case. The buffer may have been set 718 * dirty during attachment to a dirty page. 719 */ 720 if (err != -ENOMEM) 721 clear_buffer_dirty(bh); 722 } 723 } while ((bh = bh->b_this_page) != head); 724 725 if (unlikely(err)) { 726 // TODO: Remove the -EOPNOTSUPP check later on... 727 if (unlikely(err == -EOPNOTSUPP)) 728 err = 0; 729 else if (err == -ENOMEM) { 730 ntfs_warning(vol->sb, "Error allocating memory. " 731 "Redirtying page so we try again " 732 "later."); 733 /* 734 * Put the page back on mapping->dirty_pages, but 735 * leave its buffer's dirty state as-is. 736 */ 737 redirty_page_for_writepage(wbc, page); 738 err = 0; 739 } else 740 SetPageError(page); 741 } 742 743 BUG_ON(PageWriteback(page)); 744 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ 745 unlock_page(page); 746 747 /* 748 * Submit the prepared buffers for i/o. Note the page is unlocked, 749 * and the async write i/o completion handler can end_page_writeback() 750 * at any time after the *first* submit_bh(). So the buffers can then 751 * disappear... 752 */ 753 need_end_writeback = TRUE; 754 do { 755 struct buffer_head *next = bh->b_this_page; 756 if (buffer_async_write(bh)) { 757 submit_bh(WRITE, bh); 758 need_end_writeback = FALSE; 759 } 760 put_bh(bh); 761 bh = next; 762 } while (bh != head); 763 764 /* If no i/o was started, need to end_page_writeback(). */ 765 if (unlikely(need_end_writeback)) 766 end_page_writeback(page); 767 768 ntfs_debug("Done."); 769 return err; 770 } 771 772 /** 773 * ntfs_write_mst_block - write a @page to the backing store 774 * @page: page cache page to write out 775 * @wbc: writeback control structure 776 * 777 * This function is for writing pages belonging to non-resident, mst protected 778 * attributes to their backing store. The only supported attributes are index 779 * allocation and $MFT/$DATA. Both directory inodes and index inodes are 780 * supported for the index allocation case. 781 * 782 * The page must remain locked for the duration of the write because we apply 783 * the mst fixups, write, and then undo the fixups, so if we were to unlock the 784 * page before undoing the fixups, any other user of the page will see the 785 * page contents as corrupt. 786 * 787 * We clear the page uptodate flag for the duration of the function to ensure 788 * exclusion for the $MFT/$DATA case against someone mapping an mft record we 789 * are about to apply the mst fixups to. 790 * 791 * Return 0 on success and -errno on error. 792 * 793 * Based on ntfs_write_block(), ntfs_mft_writepage(), and 794 * write_mft_record_nolock(). 795 */ 796 static int ntfs_write_mst_block(struct page *page, 797 struct writeback_control *wbc) 798 { 799 sector_t block, dblock, rec_block; 800 struct inode *vi = page->mapping->host; 801 ntfs_inode *ni = NTFS_I(vi); 802 ntfs_volume *vol = ni->vol; 803 u8 *kaddr; 804 unsigned char bh_size_bits = vi->i_blkbits; 805 unsigned int bh_size = 1 << bh_size_bits; 806 unsigned int rec_size = ni->itype.index.block_size; 807 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size]; 808 struct buffer_head *bh, *head, *tbh, *rec_start_bh; 809 int max_bhs = PAGE_CACHE_SIZE / bh_size; 810 struct buffer_head *bhs[max_bhs]; 811 runlist_element *rl; 812 int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2; 813 unsigned rec_size_bits; 814 BOOL sync, is_mft, page_is_dirty, rec_is_dirty; 815 816 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 817 "0x%lx.", vi->i_ino, ni->type, page->index); 818 BUG_ON(!NInoNonResident(ni)); 819 BUG_ON(!NInoMstProtected(ni)); 820 is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino); 821 /* 822 * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page 823 * in its page cache were to be marked dirty. However this should 824 * never happen with the current driver and considering we do not 825 * handle this case here we do want to BUG(), at least for now. 826 */ 827 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || 828 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); 829 BUG_ON(!max_bhs); 830 831 /* Were we called for sync purposes? */ 832 sync = (wbc->sync_mode == WB_SYNC_ALL); 833 834 /* Make sure we have mapped buffers. */ 835 BUG_ON(!page_has_buffers(page)); 836 bh = head = page_buffers(page); 837 BUG_ON(!bh); 838 839 rec_size_bits = ni->itype.index.block_size_bits; 840 BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits)); 841 bhs_per_rec = rec_size >> bh_size_bits; 842 BUG_ON(!bhs_per_rec); 843 844 /* The first block in the page. */ 845 rec_block = block = (sector_t)page->index << 846 (PAGE_CACHE_SHIFT - bh_size_bits); 847 848 /* The first out of bounds block for the data size. */ 849 dblock = (vi->i_size + bh_size - 1) >> bh_size_bits; 850 851 rl = NULL; 852 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; 853 page_is_dirty = rec_is_dirty = FALSE; 854 rec_start_bh = NULL; 855 do { 856 BOOL is_retry = FALSE; 857 858 if (likely(block < rec_block)) { 859 if (unlikely(block >= dblock)) { 860 clear_buffer_dirty(bh); 861 continue; 862 } 863 /* 864 * This block is not the first one in the record. We 865 * ignore the buffer's dirty state because we could 866 * have raced with a parallel mark_ntfs_record_dirty(). 867 */ 868 if (!rec_is_dirty) 869 continue; 870 if (unlikely(err2)) { 871 if (err2 != -ENOMEM) 872 clear_buffer_dirty(bh); 873 continue; 874 } 875 } else /* if (block == rec_block) */ { 876 BUG_ON(block > rec_block); 877 /* This block is the first one in the record. */ 878 rec_block += bhs_per_rec; 879 err2 = 0; 880 if (unlikely(block >= dblock)) { 881 clear_buffer_dirty(bh); 882 continue; 883 } 884 if (!buffer_dirty(bh)) { 885 /* Clean records are not written out. */ 886 rec_is_dirty = FALSE; 887 continue; 888 } 889 rec_is_dirty = TRUE; 890 rec_start_bh = bh; 891 } 892 /* Need to map the buffer if it is not mapped already. */ 893 if (unlikely(!buffer_mapped(bh))) { 894 VCN vcn; 895 LCN lcn; 896 unsigned int vcn_ofs; 897 898 /* Obtain the vcn and offset of the current block. */ 899 vcn = (VCN)block << bh_size_bits; 900 vcn_ofs = vcn & vol->cluster_size_mask; 901 vcn >>= vol->cluster_size_bits; 902 if (!rl) { 903 lock_retry_remap: 904 down_read(&ni->runlist.lock); 905 rl = ni->runlist.rl; 906 } 907 if (likely(rl != NULL)) { 908 /* Seek to element containing target vcn. */ 909 while (rl->length && rl[1].vcn <= vcn) 910 rl++; 911 lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 912 } else 913 lcn = LCN_RL_NOT_MAPPED; 914 /* Successful remap. */ 915 if (likely(lcn >= 0)) { 916 /* Setup buffer head to correct block. */ 917 bh->b_blocknr = ((lcn << 918 vol->cluster_size_bits) + 919 vcn_ofs) >> bh_size_bits; 920 set_buffer_mapped(bh); 921 } else { 922 /* 923 * Remap failed. Retry to map the runlist once 924 * unless we are working on $MFT which always 925 * has the whole of its runlist in memory. 926 */ 927 if (!is_mft && !is_retry && 928 lcn == LCN_RL_NOT_MAPPED) { 929 is_retry = TRUE; 930 /* 931 * Attempt to map runlist, dropping 932 * lock for the duration. 933 */ 934 up_read(&ni->runlist.lock); 935 err2 = ntfs_map_runlist(ni, vcn); 936 if (likely(!err2)) 937 goto lock_retry_remap; 938 if (err2 == -ENOMEM) 939 page_is_dirty = TRUE; 940 lcn = err2; 941 } else 942 err2 = -EIO; 943 /* Hard error. Abort writing this record. */ 944 if (!err || err == -ENOMEM) 945 err = err2; 946 bh->b_blocknr = -1; 947 ntfs_error(vol->sb, "Cannot write ntfs record " 948 "0x%llx (inode 0x%lx, " 949 "attribute type 0x%x) because " 950 "its location on disk could " 951 "not be determined (error " 952 "code %lli).", (s64)block << 953 bh_size_bits >> 954 vol->mft_record_size_bits, 955 ni->mft_no, ni->type, 956 (long long)lcn); 957 /* 958 * If this is not the first buffer, remove the 959 * buffers in this record from the list of 960 * buffers to write and clear their dirty bit 961 * if not error -ENOMEM. 962 */ 963 if (rec_start_bh != bh) { 964 while (bhs[--nr_bhs] != rec_start_bh) 965 ; 966 if (err2 != -ENOMEM) { 967 do { 968 clear_buffer_dirty( 969 rec_start_bh); 970 } while ((rec_start_bh = 971 rec_start_bh-> 972 b_this_page) != 973 bh); 974 } 975 } 976 continue; 977 } 978 } 979 BUG_ON(!buffer_uptodate(bh)); 980 BUG_ON(nr_bhs >= max_bhs); 981 bhs[nr_bhs++] = bh; 982 } while (block++, (bh = bh->b_this_page) != head); 983 if (unlikely(rl)) 984 up_read(&ni->runlist.lock); 985 /* If there were no dirty buffers, we are done. */ 986 if (!nr_bhs) 987 goto done; 988 /* Map the page so we can access its contents. */ 989 kaddr = kmap(page); 990 /* Clear the page uptodate flag whilst the mst fixups are applied. */ 991 BUG_ON(!PageUptodate(page)); 992 ClearPageUptodate(page); 993 for (i = 0; i < nr_bhs; i++) { 994 unsigned int ofs; 995 996 /* Skip buffers which are not at the beginning of records. */ 997 if (i % bhs_per_rec) 998 continue; 999 tbh = bhs[i]; 1000 ofs = bh_offset(tbh); 1001 if (is_mft) { 1002 ntfs_inode *tni; 1003 unsigned long mft_no; 1004 1005 /* Get the mft record number. */ 1006 mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs) 1007 >> rec_size_bits; 1008 /* Check whether to write this mft record. */ 1009 tni = NULL; 1010 if (!ntfs_may_write_mft_record(vol, mft_no, 1011 (MFT_RECORD*)(kaddr + ofs), &tni)) { 1012 /* 1013 * The record should not be written. This 1014 * means we need to redirty the page before 1015 * returning. 1016 */ 1017 page_is_dirty = TRUE; 1018 /* 1019 * Remove the buffers in this mft record from 1020 * the list of buffers to write. 1021 */ 1022 do { 1023 bhs[i] = NULL; 1024 } while (++i % bhs_per_rec); 1025 continue; 1026 } 1027 /* 1028 * The record should be written. If a locked ntfs 1029 * inode was returned, add it to the array of locked 1030 * ntfs inodes. 1031 */ 1032 if (tni) 1033 locked_nis[nr_locked_nis++] = tni; 1034 } 1035 /* Apply the mst protection fixups. */ 1036 err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs), 1037 rec_size); 1038 if (unlikely(err2)) { 1039 if (!err || err == -ENOMEM) 1040 err = -EIO; 1041 ntfs_error(vol->sb, "Failed to apply mst fixups " 1042 "(inode 0x%lx, attribute type 0x%x, " 1043 "page index 0x%lx, page offset 0x%x)!" 1044 " Unmount and run chkdsk.", vi->i_ino, 1045 ni->type, page->index, ofs); 1046 /* 1047 * Mark all the buffers in this record clean as we do 1048 * not want to write corrupt data to disk. 1049 */ 1050 do { 1051 clear_buffer_dirty(bhs[i]); 1052 bhs[i] = NULL; 1053 } while (++i % bhs_per_rec); 1054 continue; 1055 } 1056 nr_recs++; 1057 } 1058 /* If no records are to be written out, we are done. */ 1059 if (!nr_recs) 1060 goto unm_done; 1061 flush_dcache_page(page); 1062 /* Lock buffers and start synchronous write i/o on them. */ 1063 for (i = 0; i < nr_bhs; i++) { 1064 tbh = bhs[i]; 1065 if (!tbh) 1066 continue; 1067 if (unlikely(test_set_buffer_locked(tbh))) 1068 BUG(); 1069 /* The buffer dirty state is now irrelevant, just clean it. */ 1070 clear_buffer_dirty(tbh); 1071 BUG_ON(!buffer_uptodate(tbh)); 1072 BUG_ON(!buffer_mapped(tbh)); 1073 get_bh(tbh); 1074 tbh->b_end_io = end_buffer_write_sync; 1075 submit_bh(WRITE, tbh); 1076 } 1077 /* Synchronize the mft mirror now if not @sync. */ 1078 if (is_mft && !sync) 1079 goto do_mirror; 1080 do_wait: 1081 /* Wait on i/o completion of buffers. */ 1082 for (i = 0; i < nr_bhs; i++) { 1083 tbh = bhs[i]; 1084 if (!tbh) 1085 continue; 1086 wait_on_buffer(tbh); 1087 if (unlikely(!buffer_uptodate(tbh))) { 1088 ntfs_error(vol->sb, "I/O error while writing ntfs " 1089 "record buffer (inode 0x%lx, " 1090 "attribute type 0x%x, page index " 1091 "0x%lx, page offset 0x%lx)! Unmount " 1092 "and run chkdsk.", vi->i_ino, ni->type, 1093 page->index, bh_offset(tbh)); 1094 if (!err || err == -ENOMEM) 1095 err = -EIO; 1096 /* 1097 * Set the buffer uptodate so the page and buffer 1098 * states do not become out of sync. 1099 */ 1100 set_buffer_uptodate(tbh); 1101 } 1102 } 1103 /* If @sync, now synchronize the mft mirror. */ 1104 if (is_mft && sync) { 1105 do_mirror: 1106 for (i = 0; i < nr_bhs; i++) { 1107 unsigned long mft_no; 1108 unsigned int ofs; 1109 1110 /* 1111 * Skip buffers which are not at the beginning of 1112 * records. 1113 */ 1114 if (i % bhs_per_rec) 1115 continue; 1116 tbh = bhs[i]; 1117 /* Skip removed buffers (and hence records). */ 1118 if (!tbh) 1119 continue; 1120 ofs = bh_offset(tbh); 1121 /* Get the mft record number. */ 1122 mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs) 1123 >> rec_size_bits; 1124 if (mft_no < vol->mftmirr_size) 1125 ntfs_sync_mft_mirror(vol, mft_no, 1126 (MFT_RECORD*)(kaddr + ofs), 1127 sync); 1128 } 1129 if (!sync) 1130 goto do_wait; 1131 } 1132 /* Remove the mst protection fixups again. */ 1133 for (i = 0; i < nr_bhs; i++) { 1134 if (!(i % bhs_per_rec)) { 1135 tbh = bhs[i]; 1136 if (!tbh) 1137 continue; 1138 post_write_mst_fixup((NTFS_RECORD*)(kaddr + 1139 bh_offset(tbh))); 1140 } 1141 } 1142 flush_dcache_page(page); 1143 unm_done: 1144 /* Unlock any locked inodes. */ 1145 while (nr_locked_nis-- > 0) { 1146 ntfs_inode *tni, *base_tni; 1147 1148 tni = locked_nis[nr_locked_nis]; 1149 /* Get the base inode. */ 1150 down(&tni->extent_lock); 1151 if (tni->nr_extents >= 0) 1152 base_tni = tni; 1153 else { 1154 base_tni = tni->ext.base_ntfs_ino; 1155 BUG_ON(!base_tni); 1156 } 1157 up(&tni->extent_lock); 1158 ntfs_debug("Unlocking %s inode 0x%lx.", 1159 tni == base_tni ? "base" : "extent", 1160 tni->mft_no); 1161 up(&tni->mrec_lock); 1162 atomic_dec(&tni->count); 1163 iput(VFS_I(base_tni)); 1164 } 1165 SetPageUptodate(page); 1166 kunmap(page); 1167 done: 1168 if (unlikely(err && err != -ENOMEM)) { 1169 /* 1170 * Set page error if there is only one ntfs record in the page. 1171 * Otherwise we would loose per-record granularity. 1172 */ 1173 if (ni->itype.index.block_size == PAGE_CACHE_SIZE) 1174 SetPageError(page); 1175 NVolSetErrors(vol); 1176 } 1177 if (page_is_dirty) { 1178 ntfs_debug("Page still contains one or more dirty ntfs " 1179 "records. Redirtying the page starting at " 1180 "record 0x%lx.", page->index << 1181 (PAGE_CACHE_SHIFT - rec_size_bits)); 1182 redirty_page_for_writepage(wbc, page); 1183 unlock_page(page); 1184 } else { 1185 /* 1186 * Keep the VM happy. This must be done otherwise the 1187 * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though 1188 * the page is clean. 1189 */ 1190 BUG_ON(PageWriteback(page)); 1191 set_page_writeback(page); 1192 unlock_page(page); 1193 end_page_writeback(page); 1194 } 1195 if (likely(!err)) 1196 ntfs_debug("Done."); 1197 return err; 1198 } 1199 1200 /** 1201 * ntfs_writepage - write a @page to the backing store 1202 * @page: page cache page to write out 1203 * @wbc: writeback control structure 1204 * 1205 * This is called from the VM when it wants to have a dirty ntfs page cache 1206 * page cleaned. The VM has already locked the page and marked it clean. 1207 * 1208 * For non-resident attributes, ntfs_writepage() writes the @page by calling 1209 * the ntfs version of the generic block_write_full_page() function, 1210 * ntfs_write_block(), which in turn if necessary creates and writes the 1211 * buffers associated with the page asynchronously. 1212 * 1213 * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying 1214 * the data to the mft record (which at this stage is most likely in memory). 1215 * The mft record is then marked dirty and written out asynchronously via the 1216 * vfs inode dirty code path for the inode the mft record belongs to or via the 1217 * vm page dirty code path for the page the mft record is in. 1218 * 1219 * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page(). 1220 * 1221 * Return 0 on success and -errno on error. 1222 */ 1223 static int ntfs_writepage(struct page *page, struct writeback_control *wbc) 1224 { 1225 loff_t i_size; 1226 struct inode *vi; 1227 ntfs_inode *ni, *base_ni; 1228 char *kaddr; 1229 ntfs_attr_search_ctx *ctx; 1230 MFT_RECORD *m; 1231 u32 attr_len; 1232 int err; 1233 1234 BUG_ON(!PageLocked(page)); 1235 1236 vi = page->mapping->host; 1237 i_size = i_size_read(vi); 1238 1239 /* Is the page fully outside i_size? (truncate in progress) */ 1240 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> 1241 PAGE_CACHE_SHIFT)) { 1242 /* 1243 * The page may have dirty, unmapped buffers. Make them 1244 * freeable here, so the page does not leak. 1245 */ 1246 block_invalidatepage(page, 0); 1247 unlock_page(page); 1248 ntfs_debug("Write outside i_size - truncated?"); 1249 return 0; 1250 } 1251 ni = NTFS_I(vi); 1252 1253 /* NInoNonResident() == NInoIndexAllocPresent() */ 1254 if (NInoNonResident(ni)) { 1255 /* 1256 * Only unnamed $DATA attributes can be compressed, encrypted, 1257 * and/or sparse. 1258 */ 1259 if (ni->type == AT_DATA && !ni->name_len) { 1260 /* If file is encrypted, deny access, just like NT4. */ 1261 if (NInoEncrypted(ni)) { 1262 unlock_page(page); 1263 ntfs_debug("Denying write access to encrypted " 1264 "file."); 1265 return -EACCES; 1266 } 1267 /* Compressed data streams are handled in compress.c. */ 1268 if (NInoCompressed(ni)) { 1269 // TODO: Implement and replace this check with 1270 // return ntfs_write_compressed_block(page); 1271 unlock_page(page); 1272 ntfs_error(vi->i_sb, "Writing to compressed " 1273 "files is not supported yet. " 1274 "Sorry."); 1275 return -EOPNOTSUPP; 1276 } 1277 // TODO: Implement and remove this check. 1278 if (NInoSparse(ni)) { 1279 unlock_page(page); 1280 ntfs_error(vi->i_sb, "Writing to sparse files " 1281 "is not supported yet. Sorry."); 1282 return -EOPNOTSUPP; 1283 } 1284 } 1285 /* We have to zero every time due to mmap-at-end-of-file. */ 1286 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { 1287 /* The page straddles i_size. */ 1288 unsigned int ofs = i_size & ~PAGE_CACHE_MASK; 1289 kaddr = kmap_atomic(page, KM_USER0); 1290 memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); 1291 flush_dcache_page(page); 1292 kunmap_atomic(kaddr, KM_USER0); 1293 } 1294 /* Handle mst protected attributes. */ 1295 if (NInoMstProtected(ni)) 1296 return ntfs_write_mst_block(page, wbc); 1297 /* Normal data stream. */ 1298 return ntfs_write_block(page, wbc); 1299 } 1300 /* 1301 * Attribute is resident, implying it is not compressed, encrypted, 1302 * sparse, or mst protected. This also means the attribute is smaller 1303 * than an mft record and hence smaller than a page, so can simply 1304 * return error on any pages with index above 0. 1305 */ 1306 BUG_ON(page_has_buffers(page)); 1307 BUG_ON(!PageUptodate(page)); 1308 if (unlikely(page->index > 0)) { 1309 ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. " 1310 "Aborting write.", page->index); 1311 BUG_ON(PageWriteback(page)); 1312 set_page_writeback(page); 1313 unlock_page(page); 1314 end_page_writeback(page); 1315 return -EIO; 1316 } 1317 if (!NInoAttr(ni)) 1318 base_ni = ni; 1319 else 1320 base_ni = ni->ext.base_ntfs_ino; 1321 /* Map, pin, and lock the mft record. */ 1322 m = map_mft_record(base_ni); 1323 if (IS_ERR(m)) { 1324 err = PTR_ERR(m); 1325 m = NULL; 1326 ctx = NULL; 1327 goto err_out; 1328 } 1329 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1330 if (unlikely(!ctx)) { 1331 err = -ENOMEM; 1332 goto err_out; 1333 } 1334 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1335 CASE_SENSITIVE, 0, NULL, 0, ctx); 1336 if (unlikely(err)) 1337 goto err_out; 1338 /* 1339 * Keep the VM happy. This must be done otherwise the radix-tree tag 1340 * PAGECACHE_TAG_DIRTY remains set even though the page is clean. 1341 */ 1342 BUG_ON(PageWriteback(page)); 1343 set_page_writeback(page); 1344 unlock_page(page); 1345 1346 /* 1347 * Here, we don't need to zero the out of bounds area everytime because 1348 * the below memcpy() already takes care of the mmap-at-end-of-file 1349 * requirements. If the file is converted to a non-resident one, then 1350 * the code path use is switched to the non-resident one where the 1351 * zeroing happens on each ntfs_writepage() invocation. 1352 * 1353 * The above also applies nicely when i_size is decreased. 1354 * 1355 * When i_size is increased, the memory between the old and new i_size 1356 * _must_ be zeroed (or overwritten with new data). Otherwise we will 1357 * expose data to userspace/disk which should never have been exposed. 1358 * 1359 * FIXME: Ensure that i_size increases do the zeroing/overwriting and 1360 * if we cannot guarantee that, then enable the zeroing below. If the 1361 * zeroing below is enabled, we MUST move the unlock_page() from above 1362 * to after the kunmap_atomic(), i.e. just before the 1363 * end_page_writeback(). 1364 * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size 1365 * increases for resident attributes so those are ok. 1366 * TODO: ntfs_truncate(), others? 1367 */ 1368 1369 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1370 i_size = i_size_read(VFS_I(ni)); 1371 kaddr = kmap_atomic(page, KM_USER0); 1372 if (unlikely(attr_len > i_size)) { 1373 /* Zero out of bounds area in the mft record. */ 1374 memset((u8*)ctx->attr + le16_to_cpu( 1375 ctx->attr->data.resident.value_offset) + 1376 i_size, 0, attr_len - i_size); 1377 attr_len = i_size; 1378 } 1379 /* Copy the data from the page to the mft record. */ 1380 memcpy((u8*)ctx->attr + 1381 le16_to_cpu(ctx->attr->data.resident.value_offset), 1382 kaddr, attr_len); 1383 flush_dcache_mft_record_page(ctx->ntfs_ino); 1384 /* Zero out of bounds area in the page cache page. */ 1385 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 1386 flush_dcache_page(page); 1387 kunmap_atomic(kaddr, KM_USER0); 1388 1389 end_page_writeback(page); 1390 1391 /* Mark the mft record dirty, so it gets written back. */ 1392 mark_mft_record_dirty(ctx->ntfs_ino); 1393 ntfs_attr_put_search_ctx(ctx); 1394 unmap_mft_record(base_ni); 1395 return 0; 1396 err_out: 1397 if (err == -ENOMEM) { 1398 ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " 1399 "page so we try again later."); 1400 /* 1401 * Put the page back on mapping->dirty_pages, but leave its 1402 * buffers' dirty state as-is. 1403 */ 1404 redirty_page_for_writepage(wbc, page); 1405 err = 0; 1406 } else { 1407 ntfs_error(vi->i_sb, "Resident attribute write failed with " 1408 "error %i. Setting page error flag.", err); 1409 SetPageError(page); 1410 } 1411 unlock_page(page); 1412 if (ctx) 1413 ntfs_attr_put_search_ctx(ctx); 1414 if (m) 1415 unmap_mft_record(base_ni); 1416 return err; 1417 } 1418 1419 /** 1420 * ntfs_prepare_nonresident_write - 1421 * 1422 */ 1423 static int ntfs_prepare_nonresident_write(struct page *page, 1424 unsigned from, unsigned to) 1425 { 1426 VCN vcn; 1427 LCN lcn; 1428 sector_t block, ablock, iblock; 1429 struct inode *vi; 1430 ntfs_inode *ni; 1431 ntfs_volume *vol; 1432 runlist_element *rl; 1433 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; 1434 unsigned int vcn_ofs, block_start, block_end, blocksize; 1435 int err; 1436 BOOL is_retry; 1437 unsigned char blocksize_bits; 1438 1439 vi = page->mapping->host; 1440 ni = NTFS_I(vi); 1441 vol = ni->vol; 1442 1443 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 1444 "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type, 1445 page->index, from, to); 1446 1447 BUG_ON(!NInoNonResident(ni)); 1448 1449 blocksize_bits = vi->i_blkbits; 1450 blocksize = 1 << blocksize_bits; 1451 1452 /* 1453 * create_empty_buffers() will create uptodate/dirty buffers if the 1454 * page is uptodate/dirty. 1455 */ 1456 if (!page_has_buffers(page)) 1457 create_empty_buffers(page, blocksize, 0); 1458 bh = head = page_buffers(page); 1459 if (unlikely(!bh)) 1460 return -ENOMEM; 1461 1462 /* The first block in the page. */ 1463 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 1464 1465 /* 1466 * The first out of bounds block for the allocated size. No need to 1467 * round up as allocated_size is in multiples of cluster size and the 1468 * minimum cluster size is 512 bytes, which is equal to the smallest 1469 * blocksize. 1470 */ 1471 ablock = ni->allocated_size >> blocksize_bits; 1472 1473 /* The last (fully or partially) initialized block. */ 1474 iblock = ni->initialized_size >> blocksize_bits; 1475 1476 /* Loop through all the buffers in the page. */ 1477 block_start = 0; 1478 rl = NULL; 1479 err = 0; 1480 do { 1481 block_end = block_start + blocksize; 1482 /* 1483 * If buffer @bh is outside the write, just mark it uptodate 1484 * if the page is uptodate and continue with the next buffer. 1485 */ 1486 if (block_end <= from || block_start >= to) { 1487 if (PageUptodate(page)) { 1488 if (!buffer_uptodate(bh)) 1489 set_buffer_uptodate(bh); 1490 } 1491 continue; 1492 } 1493 /* 1494 * @bh is at least partially being written to. 1495 * Make sure it is not marked as new. 1496 */ 1497 //if (buffer_new(bh)) 1498 // clear_buffer_new(bh); 1499 1500 if (block >= ablock) { 1501 // TODO: block is above allocated_size, need to 1502 // allocate it. Best done in one go to accommodate not 1503 // only block but all above blocks up to and including: 1504 // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize 1505 // - 1) >> blobksize_bits. Obviously will need to round 1506 // up to next cluster boundary, too. This should be 1507 // done with a helper function, so it can be reused. 1508 ntfs_error(vol->sb, "Writing beyond allocated size " 1509 "is not supported yet. Sorry."); 1510 err = -EOPNOTSUPP; 1511 goto err_out; 1512 // Need to update ablock. 1513 // Need to set_buffer_new() on all block bhs that are 1514 // newly allocated. 1515 } 1516 /* 1517 * Now we have enough allocated size to fulfill the whole 1518 * request, i.e. block < ablock is true. 1519 */ 1520 if (unlikely((block >= iblock) && 1521 (ni->initialized_size < vi->i_size))) { 1522 /* 1523 * If this page is fully outside initialized size, zero 1524 * out all pages between the current initialized size 1525 * and the current page. Just use ntfs_readpage() to do 1526 * the zeroing transparently. 1527 */ 1528 if (block > iblock) { 1529 // TODO: 1530 // For each page do: 1531 // - read_cache_page() 1532 // Again for each page do: 1533 // - wait_on_page_locked() 1534 // - Check (PageUptodate(page) && 1535 // !PageError(page)) 1536 // Update initialized size in the attribute and 1537 // in the inode. 1538 // Again, for each page do: 1539 // __set_page_dirty_buffers(); 1540 // page_cache_release() 1541 // We don't need to wait on the writes. 1542 // Update iblock. 1543 } 1544 /* 1545 * The current page straddles initialized size. Zero 1546 * all non-uptodate buffers and set them uptodate (and 1547 * dirty?). Note, there aren't any non-uptodate buffers 1548 * if the page is uptodate. 1549 * FIXME: For an uptodate page, the buffers may need to 1550 * be written out because they were not initialized on 1551 * disk before. 1552 */ 1553 if (!PageUptodate(page)) { 1554 // TODO: 1555 // Zero any non-uptodate buffers up to i_size. 1556 // Set them uptodate and dirty. 1557 } 1558 // TODO: 1559 // Update initialized size in the attribute and in the 1560 // inode (up to i_size). 1561 // Update iblock. 1562 // FIXME: This is inefficient. Try to batch the two 1563 // size changes to happen in one go. 1564 ntfs_error(vol->sb, "Writing beyond initialized size " 1565 "is not supported yet. Sorry."); 1566 err = -EOPNOTSUPP; 1567 goto err_out; 1568 // Do NOT set_buffer_new() BUT DO clear buffer range 1569 // outside write request range. 1570 // set_buffer_uptodate() on complete buffers as well as 1571 // set_buffer_dirty(). 1572 } 1573 1574 /* Need to map unmapped buffers. */ 1575 if (!buffer_mapped(bh)) { 1576 /* Unmapped buffer. Need to map it. */ 1577 bh->b_bdev = vol->sb->s_bdev; 1578 1579 /* Convert block into corresponding vcn and offset. */ 1580 vcn = (VCN)block << blocksize_bits >> 1581 vol->cluster_size_bits; 1582 vcn_ofs = ((VCN)block << blocksize_bits) & 1583 vol->cluster_size_mask; 1584 1585 is_retry = FALSE; 1586 if (!rl) { 1587 lock_retry_remap: 1588 down_read(&ni->runlist.lock); 1589 rl = ni->runlist.rl; 1590 } 1591 if (likely(rl != NULL)) { 1592 /* Seek to element containing target vcn. */ 1593 while (rl->length && rl[1].vcn <= vcn) 1594 rl++; 1595 lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 1596 } else 1597 lcn = LCN_RL_NOT_MAPPED; 1598 if (unlikely(lcn < 0)) { 1599 /* 1600 * We extended the attribute allocation above. 1601 * If we hit an ENOENT here it means that the 1602 * allocation was insufficient which is a bug. 1603 */ 1604 BUG_ON(lcn == LCN_ENOENT); 1605 1606 /* It is a hole, need to instantiate it. */ 1607 if (lcn == LCN_HOLE) { 1608 // TODO: Instantiate the hole. 1609 // clear_buffer_new(bh); 1610 // unmap_underlying_metadata(bh->b_bdev, 1611 // bh->b_blocknr); 1612 // For non-uptodate buffers, need to 1613 // zero out the region outside the 1614 // request in this bh or all bhs, 1615 // depending on what we implemented 1616 // above. 1617 // Need to flush_dcache_page(). 1618 // Or could use set_buffer_new() 1619 // instead? 1620 ntfs_error(vol->sb, "Writing into " 1621 "sparse regions is " 1622 "not supported yet. " 1623 "Sorry."); 1624 err = -EOPNOTSUPP; 1625 goto err_out; 1626 } else if (!is_retry && 1627 lcn == LCN_RL_NOT_MAPPED) { 1628 is_retry = TRUE; 1629 /* 1630 * Attempt to map runlist, dropping 1631 * lock for the duration. 1632 */ 1633 up_read(&ni->runlist.lock); 1634 err = ntfs_map_runlist(ni, vcn); 1635 if (likely(!err)) 1636 goto lock_retry_remap; 1637 rl = NULL; 1638 lcn = err; 1639 } 1640 /* 1641 * Failed to map the buffer, even after 1642 * retrying. 1643 */ 1644 bh->b_blocknr = -1; 1645 ntfs_error(vol->sb, "Failed to write to inode " 1646 "0x%lx, attribute type 0x%x, " 1647 "vcn 0x%llx, offset 0x%x " 1648 "because its location on disk " 1649 "could not be determined%s " 1650 "(error code %lli).", 1651 ni->mft_no, ni->type, 1652 (unsigned long long)vcn, 1653 vcn_ofs, is_retry ? " even " 1654 "after retrying" : "", 1655 (long long)lcn); 1656 if (!err) 1657 err = -EIO; 1658 goto err_out; 1659 } 1660 /* We now have a successful remap, i.e. lcn >= 0. */ 1661 1662 /* Setup buffer head to correct block. */ 1663 bh->b_blocknr = ((lcn << vol->cluster_size_bits) 1664 + vcn_ofs) >> blocksize_bits; 1665 set_buffer_mapped(bh); 1666 1667 // FIXME: Something analogous to this is needed for 1668 // each newly allocated block, i.e. BH_New. 1669 // FIXME: Might need to take this out of the 1670 // if (!buffer_mapped(bh)) {}, depending on how we 1671 // implement things during the allocated_size and 1672 // initialized_size extension code above. 1673 if (buffer_new(bh)) { 1674 clear_buffer_new(bh); 1675 unmap_underlying_metadata(bh->b_bdev, 1676 bh->b_blocknr); 1677 if (PageUptodate(page)) { 1678 set_buffer_uptodate(bh); 1679 continue; 1680 } 1681 /* 1682 * Page is _not_ uptodate, zero surrounding 1683 * region. NOTE: This is how we decide if to 1684 * zero or not! 1685 */ 1686 if (block_end > to || block_start < from) { 1687 void *kaddr; 1688 1689 kaddr = kmap_atomic(page, KM_USER0); 1690 if (block_end > to) 1691 memset(kaddr + to, 0, 1692 block_end - to); 1693 if (block_start < from) 1694 memset(kaddr + block_start, 0, 1695 from - 1696 block_start); 1697 flush_dcache_page(page); 1698 kunmap_atomic(kaddr, KM_USER0); 1699 } 1700 continue; 1701 } 1702 } 1703 /* @bh is mapped, set it uptodate if the page is uptodate. */ 1704 if (PageUptodate(page)) { 1705 if (!buffer_uptodate(bh)) 1706 set_buffer_uptodate(bh); 1707 continue; 1708 } 1709 /* 1710 * The page is not uptodate. The buffer is mapped. If it is not 1711 * uptodate, and it is only partially being written to, we need 1712 * to read the buffer in before the write, i.e. right now. 1713 */ 1714 if (!buffer_uptodate(bh) && 1715 (block_start < from || block_end > to)) { 1716 ll_rw_block(READ, 1, &bh); 1717 *wait_bh++ = bh; 1718 } 1719 } while (block++, block_start = block_end, 1720 (bh = bh->b_this_page) != head); 1721 1722 /* Release the lock if we took it. */ 1723 if (rl) { 1724 up_read(&ni->runlist.lock); 1725 rl = NULL; 1726 } 1727 1728 /* If we issued read requests, let them complete. */ 1729 while (wait_bh > wait) { 1730 wait_on_buffer(*--wait_bh); 1731 if (!buffer_uptodate(*wait_bh)) 1732 return -EIO; 1733 } 1734 1735 ntfs_debug("Done."); 1736 return 0; 1737 err_out: 1738 /* 1739 * Zero out any newly allocated blocks to avoid exposing stale data. 1740 * If BH_New is set, we know that the block was newly allocated in the 1741 * above loop. 1742 * FIXME: What about initialized_size increments? Have we done all the 1743 * required zeroing above? If not this error handling is broken, and 1744 * in particular the if (block_end <= from) check is completely bogus. 1745 */ 1746 bh = head; 1747 block_start = 0; 1748 is_retry = FALSE; 1749 do { 1750 block_end = block_start + blocksize; 1751 if (block_end <= from) 1752 continue; 1753 if (block_start >= to) 1754 break; 1755 if (buffer_new(bh)) { 1756 void *kaddr; 1757 1758 clear_buffer_new(bh); 1759 kaddr = kmap_atomic(page, KM_USER0); 1760 memset(kaddr + block_start, 0, bh->b_size); 1761 kunmap_atomic(kaddr, KM_USER0); 1762 set_buffer_uptodate(bh); 1763 mark_buffer_dirty(bh); 1764 is_retry = TRUE; 1765 } 1766 } while (block_start = block_end, (bh = bh->b_this_page) != head); 1767 if (is_retry) 1768 flush_dcache_page(page); 1769 if (rl) 1770 up_read(&ni->runlist.lock); 1771 return err; 1772 } 1773 1774 /** 1775 * ntfs_prepare_write - prepare a page for receiving data 1776 * 1777 * This is called from generic_file_write() with i_sem held on the inode 1778 * (@page->mapping->host). The @page is locked but not kmap()ped. The source 1779 * data has not yet been copied into the @page. 1780 * 1781 * Need to extend the attribute/fill in holes if necessary, create blocks and 1782 * make partially overwritten blocks uptodate, 1783 * 1784 * i_size is not to be modified yet. 1785 * 1786 * Return 0 on success or -errno on error. 1787 * 1788 * Should be using block_prepare_write() [support for sparse files] or 1789 * cont_prepare_write() [no support for sparse files]. Cannot do that due to 1790 * ntfs specifics but can look at them for implementation guidance. 1791 * 1792 * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is 1793 * the first byte in the page that will be written to and @to is the first byte 1794 * after the last byte that will be written to. 1795 */ 1796 static int ntfs_prepare_write(struct file *file, struct page *page, 1797 unsigned from, unsigned to) 1798 { 1799 s64 new_size; 1800 struct inode *vi = page->mapping->host; 1801 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); 1802 ntfs_volume *vol = ni->vol; 1803 ntfs_attr_search_ctx *ctx = NULL; 1804 MFT_RECORD *m = NULL; 1805 ATTR_RECORD *a; 1806 u8 *kaddr; 1807 u32 attr_len; 1808 int err; 1809 1810 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 1811 "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, 1812 page->index, from, to); 1813 BUG_ON(!PageLocked(page)); 1814 BUG_ON(from > PAGE_CACHE_SIZE); 1815 BUG_ON(to > PAGE_CACHE_SIZE); 1816 BUG_ON(from > to); 1817 BUG_ON(NInoMstProtected(ni)); 1818 /* 1819 * If a previous ntfs_truncate() failed, repeat it and abort if it 1820 * fails again. 1821 */ 1822 if (unlikely(NInoTruncateFailed(ni))) { 1823 down_write(&vi->i_alloc_sem); 1824 err = ntfs_truncate(vi); 1825 up_write(&vi->i_alloc_sem); 1826 if (err || NInoTruncateFailed(ni)) { 1827 if (!err) 1828 err = -EIO; 1829 goto err_out; 1830 } 1831 } 1832 /* If the attribute is not resident, deal with it elsewhere. */ 1833 if (NInoNonResident(ni)) { 1834 /* 1835 * Only unnamed $DATA attributes can be compressed, encrypted, 1836 * and/or sparse. 1837 */ 1838 if (ni->type == AT_DATA && !ni->name_len) { 1839 /* If file is encrypted, deny access, just like NT4. */ 1840 if (NInoEncrypted(ni)) { 1841 ntfs_debug("Denying write access to encrypted " 1842 "file."); 1843 return -EACCES; 1844 } 1845 /* Compressed data streams are handled in compress.c. */ 1846 if (NInoCompressed(ni)) { 1847 // TODO: Implement and replace this check with 1848 // return ntfs_write_compressed_block(page); 1849 ntfs_error(vi->i_sb, "Writing to compressed " 1850 "files is not supported yet. " 1851 "Sorry."); 1852 return -EOPNOTSUPP; 1853 } 1854 // TODO: Implement and remove this check. 1855 if (NInoSparse(ni)) { 1856 ntfs_error(vi->i_sb, "Writing to sparse files " 1857 "is not supported yet. Sorry."); 1858 return -EOPNOTSUPP; 1859 } 1860 } 1861 /* Normal data stream. */ 1862 return ntfs_prepare_nonresident_write(page, from, to); 1863 } 1864 /* 1865 * Attribute is resident, implying it is not compressed, encrypted, or 1866 * sparse. 1867 */ 1868 BUG_ON(page_has_buffers(page)); 1869 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; 1870 /* If we do not need to resize the attribute allocation we are done. */ 1871 if (new_size <= vi->i_size) 1872 goto done; 1873 1874 // FIXME: We abort for now as this code is not safe. 1875 ntfs_error(vi->i_sb, "Changing the file size is not supported yet. " 1876 "Sorry."); 1877 return -EOPNOTSUPP; 1878 1879 /* Map, pin, and lock the (base) mft record. */ 1880 if (!NInoAttr(ni)) 1881 base_ni = ni; 1882 else 1883 base_ni = ni->ext.base_ntfs_ino; 1884 m = map_mft_record(base_ni); 1885 if (IS_ERR(m)) { 1886 err = PTR_ERR(m); 1887 m = NULL; 1888 ctx = NULL; 1889 goto err_out; 1890 } 1891 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1892 if (unlikely(!ctx)) { 1893 err = -ENOMEM; 1894 goto err_out; 1895 } 1896 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1897 CASE_SENSITIVE, 0, NULL, 0, ctx); 1898 if (unlikely(err)) { 1899 if (err == -ENOENT) 1900 err = -EIO; 1901 goto err_out; 1902 } 1903 m = ctx->mrec; 1904 a = ctx->attr; 1905 /* The total length of the attribute value. */ 1906 attr_len = le32_to_cpu(a->data.resident.value_length); 1907 BUG_ON(vi->i_size != attr_len); 1908 /* Check if new size is allowed in $AttrDef. */ 1909 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); 1910 if (unlikely(err)) { 1911 if (err == -ERANGE) { 1912 ntfs_error(vol->sb, "Write would cause the inode " 1913 "0x%lx to exceed the maximum size for " 1914 "its attribute type (0x%x). Aborting " 1915 "write.", vi->i_ino, 1916 le32_to_cpu(ni->type)); 1917 } else { 1918 ntfs_error(vol->sb, "Inode 0x%lx has unknown " 1919 "attribute type 0x%x. Aborting " 1920 "write.", vi->i_ino, 1921 le32_to_cpu(ni->type)); 1922 err = -EIO; 1923 } 1924 goto err_out2; 1925 } 1926 /* 1927 * Extend the attribute record to be able to store the new attribute 1928 * size. 1929 */ 1930 if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a, 1931 le16_to_cpu(a->data.resident.value_offset) + 1932 new_size)) { 1933 /* Not enough space in the mft record. */ 1934 ntfs_error(vol->sb, "Not enough space in the mft record for " 1935 "the resized attribute value. This is not " 1936 "supported yet. Aborting write."); 1937 err = -EOPNOTSUPP; 1938 goto err_out2; 1939 } 1940 /* 1941 * We have enough space in the mft record to fit the write. This 1942 * implies the attribute is smaller than the mft record and hence the 1943 * attribute must be in a single page and hence page->index must be 0. 1944 */ 1945 BUG_ON(page->index); 1946 /* 1947 * If the beginning of the write is past the old size, enlarge the 1948 * attribute value up to the beginning of the write and fill it with 1949 * zeroes. 1950 */ 1951 if (from > attr_len) { 1952 memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + 1953 attr_len, 0, from - attr_len); 1954 a->data.resident.value_length = cpu_to_le32(from); 1955 /* Zero the corresponding area in the page as well. */ 1956 if (PageUptodate(page)) { 1957 kaddr = kmap_atomic(page, KM_USER0); 1958 memset(kaddr + attr_len, 0, from - attr_len); 1959 kunmap_atomic(kaddr, KM_USER0); 1960 flush_dcache_page(page); 1961 } 1962 } 1963 flush_dcache_mft_record_page(ctx->ntfs_ino); 1964 mark_mft_record_dirty(ctx->ntfs_ino); 1965 ntfs_attr_put_search_ctx(ctx); 1966 unmap_mft_record(base_ni); 1967 /* 1968 * Because resident attributes are handled by memcpy() to/from the 1969 * corresponding MFT record, and because this form of i/o is byte 1970 * aligned rather than block aligned, there is no need to bring the 1971 * page uptodate here as in the non-resident case where we need to 1972 * bring the buffers straddled by the write uptodate before 1973 * generic_file_write() does the copying from userspace. 1974 * 1975 * We thus defer the uptodate bringing of the page region outside the 1976 * region written to to ntfs_commit_write(), which makes the code 1977 * simpler and saves one atomic kmap which is good. 1978 */ 1979 done: 1980 ntfs_debug("Done."); 1981 return 0; 1982 err_out: 1983 if (err == -ENOMEM) 1984 ntfs_warning(vi->i_sb, "Error allocating memory required to " 1985 "prepare the write."); 1986 else { 1987 ntfs_error(vi->i_sb, "Resident attribute prepare write failed " 1988 "with error %i.", err); 1989 NVolSetErrors(vol); 1990 make_bad_inode(vi); 1991 } 1992 err_out2: 1993 if (ctx) 1994 ntfs_attr_put_search_ctx(ctx); 1995 if (m) 1996 unmap_mft_record(base_ni); 1997 return err; 1998 } 1999 2000 /** 2001 * ntfs_commit_nonresident_write - 2002 * 2003 */ 2004 static int ntfs_commit_nonresident_write(struct page *page, 2005 unsigned from, unsigned to) 2006 { 2007 s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; 2008 struct inode *vi = page->mapping->host; 2009 struct buffer_head *bh, *head; 2010 unsigned int block_start, block_end, blocksize; 2011 BOOL partial; 2012 2013 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 2014 "0x%lx, from = %u, to = %u.", vi->i_ino, 2015 NTFS_I(vi)->type, page->index, from, to); 2016 blocksize = 1 << vi->i_blkbits; 2017 2018 // FIXME: We need a whole slew of special cases in here for compressed 2019 // files for example... 2020 // For now, we know ntfs_prepare_write() would have failed so we can't 2021 // get here in any of the cases which we have to special case, so we 2022 // are just a ripped off, unrolled generic_commit_write(). 2023 2024 bh = head = page_buffers(page); 2025 block_start = 0; 2026 partial = FALSE; 2027 do { 2028 block_end = block_start + blocksize; 2029 if (block_end <= from || block_start >= to) { 2030 if (!buffer_uptodate(bh)) 2031 partial = TRUE; 2032 } else { 2033 set_buffer_uptodate(bh); 2034 mark_buffer_dirty(bh); 2035 } 2036 } while (block_start = block_end, (bh = bh->b_this_page) != head); 2037 /* 2038 * If this is a partial write which happened to make all buffers 2039 * uptodate then we can optimize away a bogus ->readpage() for the next 2040 * read(). Here we 'discover' whether the page went uptodate as a 2041 * result of this (potentially partial) write. 2042 */ 2043 if (!partial) 2044 SetPageUptodate(page); 2045 /* 2046 * Not convinced about this at all. See disparity comment above. For 2047 * now we know ntfs_prepare_write() would have failed in the write 2048 * exceeds i_size case, so this will never trigger which is fine. 2049 */ 2050 if (pos > vi->i_size) { 2051 ntfs_error(vi->i_sb, "Writing beyond the existing file size is " 2052 "not supported yet. Sorry."); 2053 return -EOPNOTSUPP; 2054 // vi->i_size = pos; 2055 // mark_inode_dirty(vi); 2056 } 2057 ntfs_debug("Done."); 2058 return 0; 2059 } 2060 2061 /** 2062 * ntfs_commit_write - commit the received data 2063 * 2064 * This is called from generic_file_write() with i_sem held on the inode 2065 * (@page->mapping->host). The @page is locked but not kmap()ped. The source 2066 * data has already been copied into the @page. ntfs_prepare_write() has been 2067 * called before the data copied and it returned success so we can take the 2068 * results of various BUG checks and some error handling for granted. 2069 * 2070 * Need to mark modified blocks dirty so they get written out later when 2071 * ntfs_writepage() is invoked by the VM. 2072 * 2073 * Return 0 on success or -errno on error. 2074 * 2075 * Should be using generic_commit_write(). This marks buffers uptodate and 2076 * dirty, sets the page uptodate if all buffers in the page are uptodate, and 2077 * updates i_size if the end of io is beyond i_size. In that case, it also 2078 * marks the inode dirty. 2079 * 2080 * Cannot use generic_commit_write() due to ntfs specialities but can look at 2081 * it for implementation guidance. 2082 * 2083 * If things have gone as outlined in ntfs_prepare_write(), then we do not 2084 * need to do any page content modifications here at all, except in the write 2085 * to resident attribute case, where we need to do the uptodate bringing here 2086 * which we combine with the copying into the mft record which means we save 2087 * one atomic kmap. 2088 */ 2089 static int ntfs_commit_write(struct file *file, struct page *page, 2090 unsigned from, unsigned to) 2091 { 2092 struct inode *vi = page->mapping->host; 2093 ntfs_inode *base_ni, *ni = NTFS_I(vi); 2094 char *kaddr, *kattr; 2095 ntfs_attr_search_ctx *ctx; 2096 MFT_RECORD *m; 2097 ATTR_RECORD *a; 2098 u32 attr_len; 2099 int err; 2100 2101 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 2102 "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, 2103 page->index, from, to); 2104 /* If the attribute is not resident, deal with it elsewhere. */ 2105 if (NInoNonResident(ni)) { 2106 /* Only unnamed $DATA attributes can be compressed/encrypted. */ 2107 if (ni->type == AT_DATA && !ni->name_len) { 2108 /* Encrypted files need separate handling. */ 2109 if (NInoEncrypted(ni)) { 2110 // We never get here at present! 2111 BUG(); 2112 } 2113 /* Compressed data streams are handled in compress.c. */ 2114 if (NInoCompressed(ni)) { 2115 // TODO: Implement this! 2116 // return ntfs_write_compressed_block(page); 2117 // We never get here at present! 2118 BUG(); 2119 } 2120 } 2121 /* Normal data stream. */ 2122 return ntfs_commit_nonresident_write(page, from, to); 2123 } 2124 /* 2125 * Attribute is resident, implying it is not compressed, encrypted, or 2126 * sparse. 2127 */ 2128 if (!NInoAttr(ni)) 2129 base_ni = ni; 2130 else 2131 base_ni = ni->ext.base_ntfs_ino; 2132 /* Map, pin, and lock the mft record. */ 2133 m = map_mft_record(base_ni); 2134 if (IS_ERR(m)) { 2135 err = PTR_ERR(m); 2136 m = NULL; 2137 ctx = NULL; 2138 goto err_out; 2139 } 2140 ctx = ntfs_attr_get_search_ctx(base_ni, m); 2141 if (unlikely(!ctx)) { 2142 err = -ENOMEM; 2143 goto err_out; 2144 } 2145 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 2146 CASE_SENSITIVE, 0, NULL, 0, ctx); 2147 if (unlikely(err)) { 2148 if (err == -ENOENT) 2149 err = -EIO; 2150 goto err_out; 2151 } 2152 a = ctx->attr; 2153 /* The total length of the attribute value. */ 2154 attr_len = le32_to_cpu(a->data.resident.value_length); 2155 BUG_ON(from > attr_len); 2156 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 2157 kaddr = kmap_atomic(page, KM_USER0); 2158 /* Copy the received data from the page to the mft record. */ 2159 memcpy(kattr + from, kaddr + from, to - from); 2160 /* Update the attribute length if necessary. */ 2161 if (to > attr_len) { 2162 attr_len = to; 2163 a->data.resident.value_length = cpu_to_le32(attr_len); 2164 } 2165 /* 2166 * If the page is not uptodate, bring the out of bounds area(s) 2167 * uptodate by copying data from the mft record to the page. 2168 */ 2169 if (!PageUptodate(page)) { 2170 if (from > 0) 2171 memcpy(kaddr, kattr, from); 2172 if (to < attr_len) 2173 memcpy(kaddr + to, kattr + to, attr_len - to); 2174 /* Zero the region outside the end of the attribute value. */ 2175 if (attr_len < PAGE_CACHE_SIZE) 2176 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 2177 /* 2178 * The probability of not having done any of the above is 2179 * extremely small, so we just flush unconditionally. 2180 */ 2181 flush_dcache_page(page); 2182 SetPageUptodate(page); 2183 } 2184 kunmap_atomic(kaddr, KM_USER0); 2185 /* Update i_size if necessary. */ 2186 if (vi->i_size < attr_len) { 2187 ni->allocated_size = ni->initialized_size = attr_len; 2188 i_size_write(vi, attr_len); 2189 } 2190 /* Mark the mft record dirty, so it gets written back. */ 2191 flush_dcache_mft_record_page(ctx->ntfs_ino); 2192 mark_mft_record_dirty(ctx->ntfs_ino); 2193 ntfs_attr_put_search_ctx(ctx); 2194 unmap_mft_record(base_ni); 2195 ntfs_debug("Done."); 2196 return 0; 2197 err_out: 2198 if (err == -ENOMEM) { 2199 ntfs_warning(vi->i_sb, "Error allocating memory required to " 2200 "commit the write."); 2201 if (PageUptodate(page)) { 2202 ntfs_warning(vi->i_sb, "Page is uptodate, setting " 2203 "dirty so the write will be retried " 2204 "later on by the VM."); 2205 /* 2206 * Put the page on mapping->dirty_pages, but leave its 2207 * buffers' dirty state as-is. 2208 */ 2209 __set_page_dirty_nobuffers(page); 2210 err = 0; 2211 } else 2212 ntfs_error(vi->i_sb, "Page is not uptodate. Written " 2213 "data has been lost."); 2214 } else { 2215 ntfs_error(vi->i_sb, "Resident attribute commit write failed " 2216 "with error %i.", err); 2217 NVolSetErrors(ni->vol); 2218 make_bad_inode(vi); 2219 } 2220 if (ctx) 2221 ntfs_attr_put_search_ctx(ctx); 2222 if (m) 2223 unmap_mft_record(base_ni); 2224 return err; 2225 } 2226 2227 #endif /* NTFS_RW */ 2228 2229 /** 2230 * ntfs_aops - general address space operations for inodes and attributes 2231 */ 2232 struct address_space_operations ntfs_aops = { 2233 .readpage = ntfs_readpage, /* Fill page with data. */ 2234 .sync_page = block_sync_page, /* Currently, just unplugs the 2235 disk request queue. */ 2236 #ifdef NTFS_RW 2237 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 2238 .prepare_write = ntfs_prepare_write, /* Prepare page and buffers 2239 ready to receive data. */ 2240 .commit_write = ntfs_commit_write, /* Commit received data. */ 2241 #endif /* NTFS_RW */ 2242 }; 2243 2244 /** 2245 * ntfs_mst_aops - general address space operations for mst protecteed inodes 2246 * and attributes 2247 */ 2248 struct address_space_operations ntfs_mst_aops = { 2249 .readpage = ntfs_readpage, /* Fill page with data. */ 2250 .sync_page = block_sync_page, /* Currently, just unplugs the 2251 disk request queue. */ 2252 #ifdef NTFS_RW 2253 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 2254 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty 2255 without touching the buffers 2256 belonging to the page. */ 2257 #endif /* NTFS_RW */ 2258 }; 2259 2260 #ifdef NTFS_RW 2261 2262 /** 2263 * mark_ntfs_record_dirty - mark an ntfs record dirty 2264 * @page: page containing the ntfs record to mark dirty 2265 * @ofs: byte offset within @page at which the ntfs record begins 2266 * 2267 * Set the buffers and the page in which the ntfs record is located dirty. 2268 * 2269 * The latter also marks the vfs inode the ntfs record belongs to dirty 2270 * (I_DIRTY_PAGES only). 2271 * 2272 * If the page does not have buffers, we create them and set them uptodate. 2273 * The page may not be locked which is why we need to handle the buffers under 2274 * the mapping->private_lock. Once the buffers are marked dirty we no longer 2275 * need the lock since try_to_free_buffers() does not free dirty buffers. 2276 */ 2277 void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { 2278 struct address_space *mapping = page->mapping; 2279 ntfs_inode *ni = NTFS_I(mapping->host); 2280 struct buffer_head *bh, *head, *buffers_to_free = NULL; 2281 unsigned int end, bh_size, bh_ofs; 2282 2283 BUG_ON(!PageUptodate(page)); 2284 end = ofs + ni->itype.index.block_size; 2285 bh_size = 1 << VFS_I(ni)->i_blkbits; 2286 spin_lock(&mapping->private_lock); 2287 if (unlikely(!page_has_buffers(page))) { 2288 spin_unlock(&mapping->private_lock); 2289 bh = head = alloc_page_buffers(page, bh_size, 1); 2290 spin_lock(&mapping->private_lock); 2291 if (likely(!page_has_buffers(page))) { 2292 struct buffer_head *tail; 2293 2294 do { 2295 set_buffer_uptodate(bh); 2296 tail = bh; 2297 bh = bh->b_this_page; 2298 } while (bh); 2299 tail->b_this_page = head; 2300 attach_page_buffers(page, head); 2301 } else 2302 buffers_to_free = bh; 2303 } 2304 bh = head = page_buffers(page); 2305 do { 2306 bh_ofs = bh_offset(bh); 2307 if (bh_ofs + bh_size <= ofs) 2308 continue; 2309 if (unlikely(bh_ofs >= end)) 2310 break; 2311 set_buffer_dirty(bh); 2312 } while ((bh = bh->b_this_page) != head); 2313 spin_unlock(&mapping->private_lock); 2314 __set_page_dirty_nobuffers(page); 2315 if (unlikely(buffers_to_free)) { 2316 do { 2317 bh = buffers_to_free->b_this_page; 2318 free_buffer_head(buffers_to_free); 2319 buffers_to_free = bh; 2320 } while (buffers_to_free); 2321 } 2322 } 2323 2324 #endif /* NTFS_RW */ 2325