1 /* 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 3 * 4 * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. 5 * 6 * This program/include file is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as published 8 * by the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program/include file is distributed in the hope that it will be 12 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program (in the main directory of the Linux-NTFS 18 * distribution in the file COPYING); if not, write to the Free Software 19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22 #include <linux/backing-dev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/gfp.h> 25 #include <linux/pagemap.h> 26 #include <linux/pagevec.h> 27 #include <linux/sched.h> 28 #include <linux/swap.h> 29 #include <linux/uio.h> 30 #include <linux/writeback.h> 31 32 #include <asm/page.h> 33 #include <asm/uaccess.h> 34 35 #include "attrib.h" 36 #include "bitmap.h" 37 #include "inode.h" 38 #include "debug.h" 39 #include "lcnalloc.h" 40 #include "malloc.h" 41 #include "mft.h" 42 #include "ntfs.h" 43 44 /** 45 * ntfs_file_open - called when an inode is about to be opened 46 * @vi: inode to be opened 47 * @filp: file structure describing the inode 48 * 49 * Limit file size to the page cache limit on architectures where unsigned long 50 * is 32-bits. This is the most we can do for now without overflowing the page 51 * cache page index. Doing it this way means we don't run into problems because 52 * of existing too large files. It would be better to allow the user to read 53 * the beginning of the file but I doubt very much anyone is going to hit this 54 * check on a 32-bit architecture, so there is no point in adding the extra 55 * complexity required to support this. 56 * 57 * On 64-bit architectures, the check is hopefully optimized away by the 58 * compiler. 59 * 60 * After the check passes, just call generic_file_open() to do its work. 61 */ 62 static int ntfs_file_open(struct inode *vi, struct file *filp) 63 { 64 if (sizeof(unsigned long) < 8) { 65 if (i_size_read(vi) > MAX_LFS_FILESIZE) 66 return -EOVERFLOW; 67 } 68 return generic_file_open(vi, filp); 69 } 70 71 #ifdef NTFS_RW 72 73 /** 74 * ntfs_attr_extend_initialized - extend the initialized size of an attribute 75 * @ni: ntfs inode of the attribute to extend 76 * @new_init_size: requested new initialized size in bytes 77 * 78 * Extend the initialized size of an attribute described by the ntfs inode @ni 79 * to @new_init_size bytes. This involves zeroing any non-sparse space between 80 * the old initialized size and @new_init_size both in the page cache and on 81 * disk (if relevant complete pages are already uptodate in the page cache then 82 * these are simply marked dirty). 83 * 84 * As a side-effect, the file size (vfs inode->i_size) may be incremented as, 85 * in the resident attribute case, it is tied to the initialized size and, in 86 * the non-resident attribute case, it may not fall below the initialized size. 87 * 88 * Note that if the attribute is resident, we do not need to touch the page 89 * cache at all. This is because if the page cache page is not uptodate we 90 * bring it uptodate later, when doing the write to the mft record since we 91 * then already have the page mapped. And if the page is uptodate, the 92 * non-initialized region will already have been zeroed when the page was 93 * brought uptodate and the region may in fact already have been overwritten 94 * with new data via mmap() based writes, so we cannot just zero it. And since 95 * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped 96 * is unspecified, we choose not to do zeroing and thus we do not need to touch 97 * the page at all. For a more detailed explanation see ntfs_truncate() in 98 * fs/ntfs/inode.c. 99 * 100 * Return 0 on success and -errno on error. In the case that an error is 101 * encountered it is possible that the initialized size will already have been 102 * incremented some way towards @new_init_size but it is guaranteed that if 103 * this is the case, the necessary zeroing will also have happened and that all 104 * metadata is self-consistent. 105 * 106 * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be 107 * held by the caller. 108 */ 109 static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) 110 { 111 s64 old_init_size; 112 loff_t old_i_size; 113 pgoff_t index, end_index; 114 unsigned long flags; 115 struct inode *vi = VFS_I(ni); 116 ntfs_inode *base_ni; 117 MFT_RECORD *m = NULL; 118 ATTR_RECORD *a; 119 ntfs_attr_search_ctx *ctx = NULL; 120 struct address_space *mapping; 121 struct page *page = NULL; 122 u8 *kattr; 123 int err; 124 u32 attr_len; 125 126 read_lock_irqsave(&ni->size_lock, flags); 127 old_init_size = ni->initialized_size; 128 old_i_size = i_size_read(vi); 129 BUG_ON(new_init_size > ni->allocated_size); 130 read_unlock_irqrestore(&ni->size_lock, flags); 131 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 132 "old_initialized_size 0x%llx, " 133 "new_initialized_size 0x%llx, i_size 0x%llx.", 134 vi->i_ino, (unsigned)le32_to_cpu(ni->type), 135 (unsigned long long)old_init_size, 136 (unsigned long long)new_init_size, old_i_size); 137 if (!NInoAttr(ni)) 138 base_ni = ni; 139 else 140 base_ni = ni->ext.base_ntfs_ino; 141 /* Use goto to reduce indentation and we need the label below anyway. */ 142 if (NInoNonResident(ni)) 143 goto do_non_resident_extend; 144 BUG_ON(old_init_size != old_i_size); 145 m = map_mft_record(base_ni); 146 if (IS_ERR(m)) { 147 err = PTR_ERR(m); 148 m = NULL; 149 goto err_out; 150 } 151 ctx = ntfs_attr_get_search_ctx(base_ni, m); 152 if (unlikely(!ctx)) { 153 err = -ENOMEM; 154 goto err_out; 155 } 156 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 157 CASE_SENSITIVE, 0, NULL, 0, ctx); 158 if (unlikely(err)) { 159 if (err == -ENOENT) 160 err = -EIO; 161 goto err_out; 162 } 163 m = ctx->mrec; 164 a = ctx->attr; 165 BUG_ON(a->non_resident); 166 /* The total length of the attribute value. */ 167 attr_len = le32_to_cpu(a->data.resident.value_length); 168 BUG_ON(old_i_size != (loff_t)attr_len); 169 /* 170 * Do the zeroing in the mft record and update the attribute size in 171 * the mft record. 172 */ 173 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 174 memset(kattr + attr_len, 0, new_init_size - attr_len); 175 a->data.resident.value_length = cpu_to_le32((u32)new_init_size); 176 /* Finally, update the sizes in the vfs and ntfs inodes. */ 177 write_lock_irqsave(&ni->size_lock, flags); 178 i_size_write(vi, new_init_size); 179 ni->initialized_size = new_init_size; 180 write_unlock_irqrestore(&ni->size_lock, flags); 181 goto done; 182 do_non_resident_extend: 183 /* 184 * If the new initialized size @new_init_size exceeds the current file 185 * size (vfs inode->i_size), we need to extend the file size to the 186 * new initialized size. 187 */ 188 if (new_init_size > old_i_size) { 189 m = map_mft_record(base_ni); 190 if (IS_ERR(m)) { 191 err = PTR_ERR(m); 192 m = NULL; 193 goto err_out; 194 } 195 ctx = ntfs_attr_get_search_ctx(base_ni, m); 196 if (unlikely(!ctx)) { 197 err = -ENOMEM; 198 goto err_out; 199 } 200 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 201 CASE_SENSITIVE, 0, NULL, 0, ctx); 202 if (unlikely(err)) { 203 if (err == -ENOENT) 204 err = -EIO; 205 goto err_out; 206 } 207 m = ctx->mrec; 208 a = ctx->attr; 209 BUG_ON(!a->non_resident); 210 BUG_ON(old_i_size != (loff_t) 211 sle64_to_cpu(a->data.non_resident.data_size)); 212 a->data.non_resident.data_size = cpu_to_sle64(new_init_size); 213 flush_dcache_mft_record_page(ctx->ntfs_ino); 214 mark_mft_record_dirty(ctx->ntfs_ino); 215 /* Update the file size in the vfs inode. */ 216 i_size_write(vi, new_init_size); 217 ntfs_attr_put_search_ctx(ctx); 218 ctx = NULL; 219 unmap_mft_record(base_ni); 220 m = NULL; 221 } 222 mapping = vi->i_mapping; 223 index = old_init_size >> PAGE_CACHE_SHIFT; 224 end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 225 do { 226 /* 227 * Read the page. If the page is not present, this will zero 228 * the uninitialized regions for us. 229 */ 230 page = read_mapping_page(mapping, index, NULL); 231 if (IS_ERR(page)) { 232 err = PTR_ERR(page); 233 goto init_err_out; 234 } 235 if (unlikely(PageError(page))) { 236 page_cache_release(page); 237 err = -EIO; 238 goto init_err_out; 239 } 240 /* 241 * Update the initialized size in the ntfs inode. This is 242 * enough to make ntfs_writepage() work. 243 */ 244 write_lock_irqsave(&ni->size_lock, flags); 245 ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT; 246 if (ni->initialized_size > new_init_size) 247 ni->initialized_size = new_init_size; 248 write_unlock_irqrestore(&ni->size_lock, flags); 249 /* Set the page dirty so it gets written out. */ 250 set_page_dirty(page); 251 page_cache_release(page); 252 /* 253 * Play nice with the vm and the rest of the system. This is 254 * very much needed as we can potentially be modifying the 255 * initialised size from a very small value to a really huge 256 * value, e.g. 257 * f = open(somefile, O_TRUNC); 258 * truncate(f, 10GiB); 259 * seek(f, 10GiB); 260 * write(f, 1); 261 * And this would mean we would be marking dirty hundreds of 262 * thousands of pages or as in the above example more than 263 * two and a half million pages! 264 * 265 * TODO: For sparse pages could optimize this workload by using 266 * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This 267 * would be set in readpage for sparse pages and here we would 268 * not need to mark dirty any pages which have this bit set. 269 * The only caveat is that we have to clear the bit everywhere 270 * where we allocate any clusters that lie in the page or that 271 * contain the page. 272 * 273 * TODO: An even greater optimization would be for us to only 274 * call readpage() on pages which are not in sparse regions as 275 * determined from the runlist. This would greatly reduce the 276 * number of pages we read and make dirty in the case of sparse 277 * files. 278 */ 279 balance_dirty_pages_ratelimited(mapping); 280 cond_resched(); 281 } while (++index < end_index); 282 read_lock_irqsave(&ni->size_lock, flags); 283 BUG_ON(ni->initialized_size != new_init_size); 284 read_unlock_irqrestore(&ni->size_lock, flags); 285 /* Now bring in sync the initialized_size in the mft record. */ 286 m = map_mft_record(base_ni); 287 if (IS_ERR(m)) { 288 err = PTR_ERR(m); 289 m = NULL; 290 goto init_err_out; 291 } 292 ctx = ntfs_attr_get_search_ctx(base_ni, m); 293 if (unlikely(!ctx)) { 294 err = -ENOMEM; 295 goto init_err_out; 296 } 297 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 298 CASE_SENSITIVE, 0, NULL, 0, ctx); 299 if (unlikely(err)) { 300 if (err == -ENOENT) 301 err = -EIO; 302 goto init_err_out; 303 } 304 m = ctx->mrec; 305 a = ctx->attr; 306 BUG_ON(!a->non_resident); 307 a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size); 308 done: 309 flush_dcache_mft_record_page(ctx->ntfs_ino); 310 mark_mft_record_dirty(ctx->ntfs_ino); 311 if (ctx) 312 ntfs_attr_put_search_ctx(ctx); 313 if (m) 314 unmap_mft_record(base_ni); 315 ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.", 316 (unsigned long long)new_init_size, i_size_read(vi)); 317 return 0; 318 init_err_out: 319 write_lock_irqsave(&ni->size_lock, flags); 320 ni->initialized_size = old_init_size; 321 write_unlock_irqrestore(&ni->size_lock, flags); 322 err_out: 323 if (ctx) 324 ntfs_attr_put_search_ctx(ctx); 325 if (m) 326 unmap_mft_record(base_ni); 327 ntfs_debug("Failed. Returning error code %i.", err); 328 return err; 329 } 330 331 static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, 332 struct iov_iter *from) 333 { 334 loff_t pos; 335 s64 end, ll; 336 ssize_t err; 337 unsigned long flags; 338 struct file *file = iocb->ki_filp; 339 struct inode *vi = file_inode(file); 340 ntfs_inode *base_ni, *ni = NTFS_I(vi); 341 ntfs_volume *vol = ni->vol; 342 343 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " 344 "0x%llx, count 0x%zx.", vi->i_ino, 345 (unsigned)le32_to_cpu(ni->type), 346 (unsigned long long)iocb->ki_pos, 347 iov_iter_count(from)); 348 err = generic_write_checks(iocb, from); 349 if (unlikely(err <= 0)) 350 goto out; 351 /* 352 * All checks have passed. Before we start doing any writing we want 353 * to abort any totally illegal writes. 354 */ 355 BUG_ON(NInoMstProtected(ni)); 356 BUG_ON(ni->type != AT_DATA); 357 /* If file is encrypted, deny access, just like NT4. */ 358 if (NInoEncrypted(ni)) { 359 /* Only $DATA attributes can be encrypted. */ 360 /* 361 * Reminder for later: Encrypted files are _always_ 362 * non-resident so that the content can always be encrypted. 363 */ 364 ntfs_debug("Denying write access to encrypted file."); 365 err = -EACCES; 366 goto out; 367 } 368 if (NInoCompressed(ni)) { 369 /* Only unnamed $DATA attribute can be compressed. */ 370 BUG_ON(ni->name_len); 371 /* 372 * Reminder for later: If resident, the data is not actually 373 * compressed. Only on the switch to non-resident does 374 * compression kick in. This is in contrast to encrypted files 375 * (see above). 376 */ 377 ntfs_error(vi->i_sb, "Writing to compressed files is not " 378 "implemented yet. Sorry."); 379 err = -EOPNOTSUPP; 380 goto out; 381 } 382 base_ni = ni; 383 if (NInoAttr(ni)) 384 base_ni = ni->ext.base_ntfs_ino; 385 err = file_remove_suid(file); 386 if (unlikely(err)) 387 goto out; 388 /* 389 * Our ->update_time method always succeeds thus file_update_time() 390 * cannot fail either so there is no need to check the return code. 391 */ 392 file_update_time(file); 393 pos = iocb->ki_pos; 394 /* The first byte after the last cluster being written to. */ 395 end = (pos + iov_iter_count(from) + vol->cluster_size_mask) & 396 ~(u64)vol->cluster_size_mask; 397 /* 398 * If the write goes beyond the allocated size, extend the allocation 399 * to cover the whole of the write, rounded up to the nearest cluster. 400 */ 401 read_lock_irqsave(&ni->size_lock, flags); 402 ll = ni->allocated_size; 403 read_unlock_irqrestore(&ni->size_lock, flags); 404 if (end > ll) { 405 /* 406 * Extend the allocation without changing the data size. 407 * 408 * Note we ensure the allocation is big enough to at least 409 * write some data but we do not require the allocation to be 410 * complete, i.e. it may be partial. 411 */ 412 ll = ntfs_attr_extend_allocation(ni, end, -1, pos); 413 if (likely(ll >= 0)) { 414 BUG_ON(pos >= ll); 415 /* If the extension was partial truncate the write. */ 416 if (end > ll) { 417 ntfs_debug("Truncating write to inode 0x%lx, " 418 "attribute type 0x%x, because " 419 "the allocation was only " 420 "partially extended.", 421 vi->i_ino, (unsigned) 422 le32_to_cpu(ni->type)); 423 iov_iter_truncate(from, ll - pos); 424 } 425 } else { 426 err = ll; 427 read_lock_irqsave(&ni->size_lock, flags); 428 ll = ni->allocated_size; 429 read_unlock_irqrestore(&ni->size_lock, flags); 430 /* Perform a partial write if possible or fail. */ 431 if (pos < ll) { 432 ntfs_debug("Truncating write to inode 0x%lx " 433 "attribute type 0x%x, because " 434 "extending the allocation " 435 "failed (error %d).", 436 vi->i_ino, (unsigned) 437 le32_to_cpu(ni->type), 438 (int)-err); 439 iov_iter_truncate(from, ll - pos); 440 } else { 441 if (err != -ENOSPC) 442 ntfs_error(vi->i_sb, "Cannot perform " 443 "write to inode " 444 "0x%lx, attribute " 445 "type 0x%x, because " 446 "extending the " 447 "allocation failed " 448 "(error %ld).", 449 vi->i_ino, (unsigned) 450 le32_to_cpu(ni->type), 451 (long)-err); 452 else 453 ntfs_debug("Cannot perform write to " 454 "inode 0x%lx, " 455 "attribute type 0x%x, " 456 "because there is not " 457 "space left.", 458 vi->i_ino, (unsigned) 459 le32_to_cpu(ni->type)); 460 goto out; 461 } 462 } 463 } 464 /* 465 * If the write starts beyond the initialized size, extend it up to the 466 * beginning of the write and initialize all non-sparse space between 467 * the old initialized size and the new one. This automatically also 468 * increments the vfs inode->i_size to keep it above or equal to the 469 * initialized_size. 470 */ 471 read_lock_irqsave(&ni->size_lock, flags); 472 ll = ni->initialized_size; 473 read_unlock_irqrestore(&ni->size_lock, flags); 474 if (pos > ll) { 475 /* 476 * Wait for ongoing direct i/o to complete before proceeding. 477 * New direct i/o cannot start as we hold i_mutex. 478 */ 479 inode_dio_wait(vi); 480 err = ntfs_attr_extend_initialized(ni, pos); 481 if (unlikely(err < 0)) 482 ntfs_error(vi->i_sb, "Cannot perform write to inode " 483 "0x%lx, attribute type 0x%x, because " 484 "extending the initialized size " 485 "failed (error %d).", vi->i_ino, 486 (unsigned)le32_to_cpu(ni->type), 487 (int)-err); 488 } 489 out: 490 return err; 491 } 492 493 /** 494 * __ntfs_grab_cache_pages - obtain a number of locked pages 495 * @mapping: address space mapping from which to obtain page cache pages 496 * @index: starting index in @mapping at which to begin obtaining pages 497 * @nr_pages: number of page cache pages to obtain 498 * @pages: array of pages in which to return the obtained page cache pages 499 * @cached_page: allocated but as yet unused page 500 * 501 * Obtain @nr_pages locked page cache pages from the mapping @mapping and 502 * starting at index @index. 503 * 504 * If a page is newly created, add it to lru list 505 * 506 * Note, the page locks are obtained in ascending page index order. 507 */ 508 static inline int __ntfs_grab_cache_pages(struct address_space *mapping, 509 pgoff_t index, const unsigned nr_pages, struct page **pages, 510 struct page **cached_page) 511 { 512 int err, nr; 513 514 BUG_ON(!nr_pages); 515 err = nr = 0; 516 do { 517 pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK | 518 FGP_ACCESSED); 519 if (!pages[nr]) { 520 if (!*cached_page) { 521 *cached_page = page_cache_alloc(mapping); 522 if (unlikely(!*cached_page)) { 523 err = -ENOMEM; 524 goto err_out; 525 } 526 } 527 err = add_to_page_cache_lru(*cached_page, mapping, 528 index, GFP_KERNEL); 529 if (unlikely(err)) { 530 if (err == -EEXIST) 531 continue; 532 goto err_out; 533 } 534 pages[nr] = *cached_page; 535 *cached_page = NULL; 536 } 537 index++; 538 nr++; 539 } while (nr < nr_pages); 540 out: 541 return err; 542 err_out: 543 while (nr > 0) { 544 unlock_page(pages[--nr]); 545 page_cache_release(pages[nr]); 546 } 547 goto out; 548 } 549 550 static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) 551 { 552 lock_buffer(bh); 553 get_bh(bh); 554 bh->b_end_io = end_buffer_read_sync; 555 return submit_bh(READ, bh); 556 } 557 558 /** 559 * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data 560 * @pages: array of destination pages 561 * @nr_pages: number of pages in @pages 562 * @pos: byte position in file at which the write begins 563 * @bytes: number of bytes to be written 564 * 565 * This is called for non-resident attributes from ntfs_file_buffered_write() 566 * with i_mutex held on the inode (@pages[0]->mapping->host). There are 567 * @nr_pages pages in @pages which are locked but not kmap()ped. The source 568 * data has not yet been copied into the @pages. 569 * 570 * Need to fill any holes with actual clusters, allocate buffers if necessary, 571 * ensure all the buffers are mapped, and bring uptodate any buffers that are 572 * only partially being written to. 573 * 574 * If @nr_pages is greater than one, we are guaranteed that the cluster size is 575 * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside 576 * the same cluster and that they are the entirety of that cluster, and that 577 * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole. 578 * 579 * i_size is not to be modified yet. 580 * 581 * Return 0 on success or -errno on error. 582 */ 583 static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, 584 unsigned nr_pages, s64 pos, size_t bytes) 585 { 586 VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend; 587 LCN lcn; 588 s64 bh_pos, vcn_len, end, initialized_size; 589 sector_t lcn_block; 590 struct page *page; 591 struct inode *vi; 592 ntfs_inode *ni, *base_ni = NULL; 593 ntfs_volume *vol; 594 runlist_element *rl, *rl2; 595 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; 596 ntfs_attr_search_ctx *ctx = NULL; 597 MFT_RECORD *m = NULL; 598 ATTR_RECORD *a = NULL; 599 unsigned long flags; 600 u32 attr_rec_len = 0; 601 unsigned blocksize, u; 602 int err, mp_size; 603 bool rl_write_locked, was_hole, is_retry; 604 unsigned char blocksize_bits; 605 struct { 606 u8 runlist_merged:1; 607 u8 mft_attr_mapped:1; 608 u8 mp_rebuilt:1; 609 u8 attr_switched:1; 610 } status = { 0, 0, 0, 0 }; 611 612 BUG_ON(!nr_pages); 613 BUG_ON(!pages); 614 BUG_ON(!*pages); 615 vi = pages[0]->mapping->host; 616 ni = NTFS_I(vi); 617 vol = ni->vol; 618 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " 619 "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 620 vi->i_ino, ni->type, pages[0]->index, nr_pages, 621 (long long)pos, bytes); 622 blocksize = vol->sb->s_blocksize; 623 blocksize_bits = vol->sb->s_blocksize_bits; 624 u = 0; 625 do { 626 page = pages[u]; 627 BUG_ON(!page); 628 /* 629 * create_empty_buffers() will create uptodate/dirty buffers if 630 * the page is uptodate/dirty. 631 */ 632 if (!page_has_buffers(page)) { 633 create_empty_buffers(page, blocksize, 0); 634 if (unlikely(!page_has_buffers(page))) 635 return -ENOMEM; 636 } 637 } while (++u < nr_pages); 638 rl_write_locked = false; 639 rl = NULL; 640 err = 0; 641 vcn = lcn = -1; 642 vcn_len = 0; 643 lcn_block = -1; 644 was_hole = false; 645 cpos = pos >> vol->cluster_size_bits; 646 end = pos + bytes; 647 cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits; 648 /* 649 * Loop over each page and for each page over each buffer. Use goto to 650 * reduce indentation. 651 */ 652 u = 0; 653 do_next_page: 654 page = pages[u]; 655 bh_pos = (s64)page->index << PAGE_CACHE_SHIFT; 656 bh = head = page_buffers(page); 657 do { 658 VCN cdelta; 659 s64 bh_end; 660 unsigned bh_cofs; 661 662 /* Clear buffer_new on all buffers to reinitialise state. */ 663 if (buffer_new(bh)) 664 clear_buffer_new(bh); 665 bh_end = bh_pos + blocksize; 666 bh_cpos = bh_pos >> vol->cluster_size_bits; 667 bh_cofs = bh_pos & vol->cluster_size_mask; 668 if (buffer_mapped(bh)) { 669 /* 670 * The buffer is already mapped. If it is uptodate, 671 * ignore it. 672 */ 673 if (buffer_uptodate(bh)) 674 continue; 675 /* 676 * The buffer is not uptodate. If the page is uptodate 677 * set the buffer uptodate and otherwise ignore it. 678 */ 679 if (PageUptodate(page)) { 680 set_buffer_uptodate(bh); 681 continue; 682 } 683 /* 684 * Neither the page nor the buffer are uptodate. If 685 * the buffer is only partially being written to, we 686 * need to read it in before the write, i.e. now. 687 */ 688 if ((bh_pos < pos && bh_end > pos) || 689 (bh_pos < end && bh_end > end)) { 690 /* 691 * If the buffer is fully or partially within 692 * the initialized size, do an actual read. 693 * Otherwise, simply zero the buffer. 694 */ 695 read_lock_irqsave(&ni->size_lock, flags); 696 initialized_size = ni->initialized_size; 697 read_unlock_irqrestore(&ni->size_lock, flags); 698 if (bh_pos < initialized_size) { 699 ntfs_submit_bh_for_read(bh); 700 *wait_bh++ = bh; 701 } else { 702 zero_user(page, bh_offset(bh), 703 blocksize); 704 set_buffer_uptodate(bh); 705 } 706 } 707 continue; 708 } 709 /* Unmapped buffer. Need to map it. */ 710 bh->b_bdev = vol->sb->s_bdev; 711 /* 712 * If the current buffer is in the same clusters as the map 713 * cache, there is no need to check the runlist again. The 714 * map cache is made up of @vcn, which is the first cached file 715 * cluster, @vcn_len which is the number of cached file 716 * clusters, @lcn is the device cluster corresponding to @vcn, 717 * and @lcn_block is the block number corresponding to @lcn. 718 */ 719 cdelta = bh_cpos - vcn; 720 if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) { 721 map_buffer_cached: 722 BUG_ON(lcn < 0); 723 bh->b_blocknr = lcn_block + 724 (cdelta << (vol->cluster_size_bits - 725 blocksize_bits)) + 726 (bh_cofs >> blocksize_bits); 727 set_buffer_mapped(bh); 728 /* 729 * If the page is uptodate so is the buffer. If the 730 * buffer is fully outside the write, we ignore it if 731 * it was already allocated and we mark it dirty so it 732 * gets written out if we allocated it. On the other 733 * hand, if we allocated the buffer but we are not 734 * marking it dirty we set buffer_new so we can do 735 * error recovery. 736 */ 737 if (PageUptodate(page)) { 738 if (!buffer_uptodate(bh)) 739 set_buffer_uptodate(bh); 740 if (unlikely(was_hole)) { 741 /* We allocated the buffer. */ 742 unmap_underlying_metadata(bh->b_bdev, 743 bh->b_blocknr); 744 if (bh_end <= pos || bh_pos >= end) 745 mark_buffer_dirty(bh); 746 else 747 set_buffer_new(bh); 748 } 749 continue; 750 } 751 /* Page is _not_ uptodate. */ 752 if (likely(!was_hole)) { 753 /* 754 * Buffer was already allocated. If it is not 755 * uptodate and is only partially being written 756 * to, we need to read it in before the write, 757 * i.e. now. 758 */ 759 if (!buffer_uptodate(bh) && bh_pos < end && 760 bh_end > pos && 761 (bh_pos < pos || 762 bh_end > end)) { 763 /* 764 * If the buffer is fully or partially 765 * within the initialized size, do an 766 * actual read. Otherwise, simply zero 767 * the buffer. 768 */ 769 read_lock_irqsave(&ni->size_lock, 770 flags); 771 initialized_size = ni->initialized_size; 772 read_unlock_irqrestore(&ni->size_lock, 773 flags); 774 if (bh_pos < initialized_size) { 775 ntfs_submit_bh_for_read(bh); 776 *wait_bh++ = bh; 777 } else { 778 zero_user(page, bh_offset(bh), 779 blocksize); 780 set_buffer_uptodate(bh); 781 } 782 } 783 continue; 784 } 785 /* We allocated the buffer. */ 786 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); 787 /* 788 * If the buffer is fully outside the write, zero it, 789 * set it uptodate, and mark it dirty so it gets 790 * written out. If it is partially being written to, 791 * zero region surrounding the write but leave it to 792 * commit write to do anything else. Finally, if the 793 * buffer is fully being overwritten, do nothing. 794 */ 795 if (bh_end <= pos || bh_pos >= end) { 796 if (!buffer_uptodate(bh)) { 797 zero_user(page, bh_offset(bh), 798 blocksize); 799 set_buffer_uptodate(bh); 800 } 801 mark_buffer_dirty(bh); 802 continue; 803 } 804 set_buffer_new(bh); 805 if (!buffer_uptodate(bh) && 806 (bh_pos < pos || bh_end > end)) { 807 u8 *kaddr; 808 unsigned pofs; 809 810 kaddr = kmap_atomic(page); 811 if (bh_pos < pos) { 812 pofs = bh_pos & ~PAGE_CACHE_MASK; 813 memset(kaddr + pofs, 0, pos - bh_pos); 814 } 815 if (bh_end > end) { 816 pofs = end & ~PAGE_CACHE_MASK; 817 memset(kaddr + pofs, 0, bh_end - end); 818 } 819 kunmap_atomic(kaddr); 820 flush_dcache_page(page); 821 } 822 continue; 823 } 824 /* 825 * Slow path: this is the first buffer in the cluster. If it 826 * is outside allocated size and is not uptodate, zero it and 827 * set it uptodate. 828 */ 829 read_lock_irqsave(&ni->size_lock, flags); 830 initialized_size = ni->allocated_size; 831 read_unlock_irqrestore(&ni->size_lock, flags); 832 if (bh_pos > initialized_size) { 833 if (PageUptodate(page)) { 834 if (!buffer_uptodate(bh)) 835 set_buffer_uptodate(bh); 836 } else if (!buffer_uptodate(bh)) { 837 zero_user(page, bh_offset(bh), blocksize); 838 set_buffer_uptodate(bh); 839 } 840 continue; 841 } 842 is_retry = false; 843 if (!rl) { 844 down_read(&ni->runlist.lock); 845 retry_remap: 846 rl = ni->runlist.rl; 847 } 848 if (likely(rl != NULL)) { 849 /* Seek to element containing target cluster. */ 850 while (rl->length && rl[1].vcn <= bh_cpos) 851 rl++; 852 lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos); 853 if (likely(lcn >= 0)) { 854 /* 855 * Successful remap, setup the map cache and 856 * use that to deal with the buffer. 857 */ 858 was_hole = false; 859 vcn = bh_cpos; 860 vcn_len = rl[1].vcn - vcn; 861 lcn_block = lcn << (vol->cluster_size_bits - 862 blocksize_bits); 863 cdelta = 0; 864 /* 865 * If the number of remaining clusters touched 866 * by the write is smaller or equal to the 867 * number of cached clusters, unlock the 868 * runlist as the map cache will be used from 869 * now on. 870 */ 871 if (likely(vcn + vcn_len >= cend)) { 872 if (rl_write_locked) { 873 up_write(&ni->runlist.lock); 874 rl_write_locked = false; 875 } else 876 up_read(&ni->runlist.lock); 877 rl = NULL; 878 } 879 goto map_buffer_cached; 880 } 881 } else 882 lcn = LCN_RL_NOT_MAPPED; 883 /* 884 * If it is not a hole and not out of bounds, the runlist is 885 * probably unmapped so try to map it now. 886 */ 887 if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) { 888 if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) { 889 /* Attempt to map runlist. */ 890 if (!rl_write_locked) { 891 /* 892 * We need the runlist locked for 893 * writing, so if it is locked for 894 * reading relock it now and retry in 895 * case it changed whilst we dropped 896 * the lock. 897 */ 898 up_read(&ni->runlist.lock); 899 down_write(&ni->runlist.lock); 900 rl_write_locked = true; 901 goto retry_remap; 902 } 903 err = ntfs_map_runlist_nolock(ni, bh_cpos, 904 NULL); 905 if (likely(!err)) { 906 is_retry = true; 907 goto retry_remap; 908 } 909 /* 910 * If @vcn is out of bounds, pretend @lcn is 911 * LCN_ENOENT. As long as the buffer is out 912 * of bounds this will work fine. 913 */ 914 if (err == -ENOENT) { 915 lcn = LCN_ENOENT; 916 err = 0; 917 goto rl_not_mapped_enoent; 918 } 919 } else 920 err = -EIO; 921 /* Failed to map the buffer, even after retrying. */ 922 bh->b_blocknr = -1; 923 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 924 "attribute type 0x%x, vcn 0x%llx, " 925 "vcn offset 0x%x, because its " 926 "location on disk could not be " 927 "determined%s (error code %i).", 928 ni->mft_no, ni->type, 929 (unsigned long long)bh_cpos, 930 (unsigned)bh_pos & 931 vol->cluster_size_mask, 932 is_retry ? " even after retrying" : "", 933 err); 934 break; 935 } 936 rl_not_mapped_enoent: 937 /* 938 * The buffer is in a hole or out of bounds. We need to fill 939 * the hole, unless the buffer is in a cluster which is not 940 * touched by the write, in which case we just leave the buffer 941 * unmapped. This can only happen when the cluster size is 942 * less than the page cache size. 943 */ 944 if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) { 945 bh_cend = (bh_end + vol->cluster_size - 1) >> 946 vol->cluster_size_bits; 947 if ((bh_cend <= cpos || bh_cpos >= cend)) { 948 bh->b_blocknr = -1; 949 /* 950 * If the buffer is uptodate we skip it. If it 951 * is not but the page is uptodate, we can set 952 * the buffer uptodate. If the page is not 953 * uptodate, we can clear the buffer and set it 954 * uptodate. Whether this is worthwhile is 955 * debatable and this could be removed. 956 */ 957 if (PageUptodate(page)) { 958 if (!buffer_uptodate(bh)) 959 set_buffer_uptodate(bh); 960 } else if (!buffer_uptodate(bh)) { 961 zero_user(page, bh_offset(bh), 962 blocksize); 963 set_buffer_uptodate(bh); 964 } 965 continue; 966 } 967 } 968 /* 969 * Out of bounds buffer is invalid if it was not really out of 970 * bounds. 971 */ 972 BUG_ON(lcn != LCN_HOLE); 973 /* 974 * We need the runlist locked for writing, so if it is locked 975 * for reading relock it now and retry in case it changed 976 * whilst we dropped the lock. 977 */ 978 BUG_ON(!rl); 979 if (!rl_write_locked) { 980 up_read(&ni->runlist.lock); 981 down_write(&ni->runlist.lock); 982 rl_write_locked = true; 983 goto retry_remap; 984 } 985 /* Find the previous last allocated cluster. */ 986 BUG_ON(rl->lcn != LCN_HOLE); 987 lcn = -1; 988 rl2 = rl; 989 while (--rl2 >= ni->runlist.rl) { 990 if (rl2->lcn >= 0) { 991 lcn = rl2->lcn + rl2->length; 992 break; 993 } 994 } 995 rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE, 996 false); 997 if (IS_ERR(rl2)) { 998 err = PTR_ERR(rl2); 999 ntfs_debug("Failed to allocate cluster, error code %i.", 1000 err); 1001 break; 1002 } 1003 lcn = rl2->lcn; 1004 rl = ntfs_runlists_merge(ni->runlist.rl, rl2); 1005 if (IS_ERR(rl)) { 1006 err = PTR_ERR(rl); 1007 if (err != -ENOMEM) 1008 err = -EIO; 1009 if (ntfs_cluster_free_from_rl(vol, rl2)) { 1010 ntfs_error(vol->sb, "Failed to release " 1011 "allocated cluster in error " 1012 "code path. Run chkdsk to " 1013 "recover the lost cluster."); 1014 NVolSetErrors(vol); 1015 } 1016 ntfs_free(rl2); 1017 break; 1018 } 1019 ni->runlist.rl = rl; 1020 status.runlist_merged = 1; 1021 ntfs_debug("Allocated cluster, lcn 0x%llx.", 1022 (unsigned long long)lcn); 1023 /* Map and lock the mft record and get the attribute record. */ 1024 if (!NInoAttr(ni)) 1025 base_ni = ni; 1026 else 1027 base_ni = ni->ext.base_ntfs_ino; 1028 m = map_mft_record(base_ni); 1029 if (IS_ERR(m)) { 1030 err = PTR_ERR(m); 1031 break; 1032 } 1033 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1034 if (unlikely(!ctx)) { 1035 err = -ENOMEM; 1036 unmap_mft_record(base_ni); 1037 break; 1038 } 1039 status.mft_attr_mapped = 1; 1040 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1041 CASE_SENSITIVE, bh_cpos, NULL, 0, ctx); 1042 if (unlikely(err)) { 1043 if (err == -ENOENT) 1044 err = -EIO; 1045 break; 1046 } 1047 m = ctx->mrec; 1048 a = ctx->attr; 1049 /* 1050 * Find the runlist element with which the attribute extent 1051 * starts. Note, we cannot use the _attr_ version because we 1052 * have mapped the mft record. That is ok because we know the 1053 * runlist fragment must be mapped already to have ever gotten 1054 * here, so we can just use the _rl_ version. 1055 */ 1056 vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn); 1057 rl2 = ntfs_rl_find_vcn_nolock(rl, vcn); 1058 BUG_ON(!rl2); 1059 BUG_ON(!rl2->length); 1060 BUG_ON(rl2->lcn < LCN_HOLE); 1061 highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); 1062 /* 1063 * If @highest_vcn is zero, calculate the real highest_vcn 1064 * (which can really be zero). 1065 */ 1066 if (!highest_vcn) 1067 highest_vcn = (sle64_to_cpu( 1068 a->data.non_resident.allocated_size) >> 1069 vol->cluster_size_bits) - 1; 1070 /* 1071 * Determine the size of the mapping pairs array for the new 1072 * extent, i.e. the old extent with the hole filled. 1073 */ 1074 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn, 1075 highest_vcn); 1076 if (unlikely(mp_size <= 0)) { 1077 if (!(err = mp_size)) 1078 err = -EIO; 1079 ntfs_debug("Failed to get size for mapping pairs " 1080 "array, error code %i.", err); 1081 break; 1082 } 1083 /* 1084 * Resize the attribute record to fit the new mapping pairs 1085 * array. 1086 */ 1087 attr_rec_len = le32_to_cpu(a->length); 1088 err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu( 1089 a->data.non_resident.mapping_pairs_offset)); 1090 if (unlikely(err)) { 1091 BUG_ON(err != -ENOSPC); 1092 // TODO: Deal with this by using the current attribute 1093 // and fill it with as much of the mapping pairs 1094 // array as possible. Then loop over each attribute 1095 // extent rewriting the mapping pairs arrays as we go 1096 // along and if when we reach the end we have not 1097 // enough space, try to resize the last attribute 1098 // extent and if even that fails, add a new attribute 1099 // extent. 1100 // We could also try to resize at each step in the hope 1101 // that we will not need to rewrite every single extent. 1102 // Note, we may need to decompress some extents to fill 1103 // the runlist as we are walking the extents... 1104 ntfs_error(vol->sb, "Not enough space in the mft " 1105 "record for the extended attribute " 1106 "record. This case is not " 1107 "implemented yet."); 1108 err = -EOPNOTSUPP; 1109 break ; 1110 } 1111 status.mp_rebuilt = 1; 1112 /* 1113 * Generate the mapping pairs array directly into the attribute 1114 * record. 1115 */ 1116 err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 1117 a->data.non_resident.mapping_pairs_offset), 1118 mp_size, rl2, vcn, highest_vcn, NULL); 1119 if (unlikely(err)) { 1120 ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, " 1121 "attribute type 0x%x, because building " 1122 "the mapping pairs failed with error " 1123 "code %i.", vi->i_ino, 1124 (unsigned)le32_to_cpu(ni->type), err); 1125 err = -EIO; 1126 break; 1127 } 1128 /* Update the highest_vcn but only if it was not set. */ 1129 if (unlikely(!a->data.non_resident.highest_vcn)) 1130 a->data.non_resident.highest_vcn = 1131 cpu_to_sle64(highest_vcn); 1132 /* 1133 * If the attribute is sparse/compressed, update the compressed 1134 * size in the ntfs_inode structure and the attribute record. 1135 */ 1136 if (likely(NInoSparse(ni) || NInoCompressed(ni))) { 1137 /* 1138 * If we are not in the first attribute extent, switch 1139 * to it, but first ensure the changes will make it to 1140 * disk later. 1141 */ 1142 if (a->data.non_resident.lowest_vcn) { 1143 flush_dcache_mft_record_page(ctx->ntfs_ino); 1144 mark_mft_record_dirty(ctx->ntfs_ino); 1145 ntfs_attr_reinit_search_ctx(ctx); 1146 err = ntfs_attr_lookup(ni->type, ni->name, 1147 ni->name_len, CASE_SENSITIVE, 1148 0, NULL, 0, ctx); 1149 if (unlikely(err)) { 1150 status.attr_switched = 1; 1151 break; 1152 } 1153 /* @m is not used any more so do not set it. */ 1154 a = ctx->attr; 1155 } 1156 write_lock_irqsave(&ni->size_lock, flags); 1157 ni->itype.compressed.size += vol->cluster_size; 1158 a->data.non_resident.compressed_size = 1159 cpu_to_sle64(ni->itype.compressed.size); 1160 write_unlock_irqrestore(&ni->size_lock, flags); 1161 } 1162 /* Ensure the changes make it to disk. */ 1163 flush_dcache_mft_record_page(ctx->ntfs_ino); 1164 mark_mft_record_dirty(ctx->ntfs_ino); 1165 ntfs_attr_put_search_ctx(ctx); 1166 unmap_mft_record(base_ni); 1167 /* Successfully filled the hole. */ 1168 status.runlist_merged = 0; 1169 status.mft_attr_mapped = 0; 1170 status.mp_rebuilt = 0; 1171 /* Setup the map cache and use that to deal with the buffer. */ 1172 was_hole = true; 1173 vcn = bh_cpos; 1174 vcn_len = 1; 1175 lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits); 1176 cdelta = 0; 1177 /* 1178 * If the number of remaining clusters in the @pages is smaller 1179 * or equal to the number of cached clusters, unlock the 1180 * runlist as the map cache will be used from now on. 1181 */ 1182 if (likely(vcn + vcn_len >= cend)) { 1183 up_write(&ni->runlist.lock); 1184 rl_write_locked = false; 1185 rl = NULL; 1186 } 1187 goto map_buffer_cached; 1188 } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); 1189 /* If there are no errors, do the next page. */ 1190 if (likely(!err && ++u < nr_pages)) 1191 goto do_next_page; 1192 /* If there are no errors, release the runlist lock if we took it. */ 1193 if (likely(!err)) { 1194 if (unlikely(rl_write_locked)) { 1195 up_write(&ni->runlist.lock); 1196 rl_write_locked = false; 1197 } else if (unlikely(rl)) 1198 up_read(&ni->runlist.lock); 1199 rl = NULL; 1200 } 1201 /* If we issued read requests, let them complete. */ 1202 read_lock_irqsave(&ni->size_lock, flags); 1203 initialized_size = ni->initialized_size; 1204 read_unlock_irqrestore(&ni->size_lock, flags); 1205 while (wait_bh > wait) { 1206 bh = *--wait_bh; 1207 wait_on_buffer(bh); 1208 if (likely(buffer_uptodate(bh))) { 1209 page = bh->b_page; 1210 bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) + 1211 bh_offset(bh); 1212 /* 1213 * If the buffer overflows the initialized size, need 1214 * to zero the overflowing region. 1215 */ 1216 if (unlikely(bh_pos + blocksize > initialized_size)) { 1217 int ofs = 0; 1218 1219 if (likely(bh_pos < initialized_size)) 1220 ofs = initialized_size - bh_pos; 1221 zero_user_segment(page, bh_offset(bh) + ofs, 1222 blocksize); 1223 } 1224 } else /* if (unlikely(!buffer_uptodate(bh))) */ 1225 err = -EIO; 1226 } 1227 if (likely(!err)) { 1228 /* Clear buffer_new on all buffers. */ 1229 u = 0; 1230 do { 1231 bh = head = page_buffers(pages[u]); 1232 do { 1233 if (buffer_new(bh)) 1234 clear_buffer_new(bh); 1235 } while ((bh = bh->b_this_page) != head); 1236 } while (++u < nr_pages); 1237 ntfs_debug("Done."); 1238 return err; 1239 } 1240 if (status.attr_switched) { 1241 /* Get back to the attribute extent we modified. */ 1242 ntfs_attr_reinit_search_ctx(ctx); 1243 if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1244 CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) { 1245 ntfs_error(vol->sb, "Failed to find required " 1246 "attribute extent of attribute in " 1247 "error code path. Run chkdsk to " 1248 "recover."); 1249 write_lock_irqsave(&ni->size_lock, flags); 1250 ni->itype.compressed.size += vol->cluster_size; 1251 write_unlock_irqrestore(&ni->size_lock, flags); 1252 flush_dcache_mft_record_page(ctx->ntfs_ino); 1253 mark_mft_record_dirty(ctx->ntfs_ino); 1254 /* 1255 * The only thing that is now wrong is the compressed 1256 * size of the base attribute extent which chkdsk 1257 * should be able to fix. 1258 */ 1259 NVolSetErrors(vol); 1260 } else { 1261 m = ctx->mrec; 1262 a = ctx->attr; 1263 status.attr_switched = 0; 1264 } 1265 } 1266 /* 1267 * If the runlist has been modified, need to restore it by punching a 1268 * hole into it and we then need to deallocate the on-disk cluster as 1269 * well. Note, we only modify the runlist if we are able to generate a 1270 * new mapping pairs array, i.e. only when the mapped attribute extent 1271 * is not switched. 1272 */ 1273 if (status.runlist_merged && !status.attr_switched) { 1274 BUG_ON(!rl_write_locked); 1275 /* Make the file cluster we allocated sparse in the runlist. */ 1276 if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) { 1277 ntfs_error(vol->sb, "Failed to punch hole into " 1278 "attribute runlist in error code " 1279 "path. Run chkdsk to recover the " 1280 "lost cluster."); 1281 NVolSetErrors(vol); 1282 } else /* if (success) */ { 1283 status.runlist_merged = 0; 1284 /* 1285 * Deallocate the on-disk cluster we allocated but only 1286 * if we succeeded in punching its vcn out of the 1287 * runlist. 1288 */ 1289 down_write(&vol->lcnbmp_lock); 1290 if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 1291 ntfs_error(vol->sb, "Failed to release " 1292 "allocated cluster in error " 1293 "code path. Run chkdsk to " 1294 "recover the lost cluster."); 1295 NVolSetErrors(vol); 1296 } 1297 up_write(&vol->lcnbmp_lock); 1298 } 1299 } 1300 /* 1301 * Resize the attribute record to its old size and rebuild the mapping 1302 * pairs array. Note, we only can do this if the runlist has been 1303 * restored to its old state which also implies that the mapped 1304 * attribute extent is not switched. 1305 */ 1306 if (status.mp_rebuilt && !status.runlist_merged) { 1307 if (ntfs_attr_record_resize(m, a, attr_rec_len)) { 1308 ntfs_error(vol->sb, "Failed to restore attribute " 1309 "record in error code path. Run " 1310 "chkdsk to recover."); 1311 NVolSetErrors(vol); 1312 } else /* if (success) */ { 1313 if (ntfs_mapping_pairs_build(vol, (u8*)a + 1314 le16_to_cpu(a->data.non_resident. 1315 mapping_pairs_offset), attr_rec_len - 1316 le16_to_cpu(a->data.non_resident. 1317 mapping_pairs_offset), ni->runlist.rl, 1318 vcn, highest_vcn, NULL)) { 1319 ntfs_error(vol->sb, "Failed to restore " 1320 "mapping pairs array in error " 1321 "code path. Run chkdsk to " 1322 "recover."); 1323 NVolSetErrors(vol); 1324 } 1325 flush_dcache_mft_record_page(ctx->ntfs_ino); 1326 mark_mft_record_dirty(ctx->ntfs_ino); 1327 } 1328 } 1329 /* Release the mft record and the attribute. */ 1330 if (status.mft_attr_mapped) { 1331 ntfs_attr_put_search_ctx(ctx); 1332 unmap_mft_record(base_ni); 1333 } 1334 /* Release the runlist lock. */ 1335 if (rl_write_locked) 1336 up_write(&ni->runlist.lock); 1337 else if (rl) 1338 up_read(&ni->runlist.lock); 1339 /* 1340 * Zero out any newly allocated blocks to avoid exposing stale data. 1341 * If BH_New is set, we know that the block was newly allocated above 1342 * and that it has not been fully zeroed and marked dirty yet. 1343 */ 1344 nr_pages = u; 1345 u = 0; 1346 end = bh_cpos << vol->cluster_size_bits; 1347 do { 1348 page = pages[u]; 1349 bh = head = page_buffers(page); 1350 do { 1351 if (u == nr_pages && 1352 ((s64)page->index << PAGE_CACHE_SHIFT) + 1353 bh_offset(bh) >= end) 1354 break; 1355 if (!buffer_new(bh)) 1356 continue; 1357 clear_buffer_new(bh); 1358 if (!buffer_uptodate(bh)) { 1359 if (PageUptodate(page)) 1360 set_buffer_uptodate(bh); 1361 else { 1362 zero_user(page, bh_offset(bh), 1363 blocksize); 1364 set_buffer_uptodate(bh); 1365 } 1366 } 1367 mark_buffer_dirty(bh); 1368 } while ((bh = bh->b_this_page) != head); 1369 } while (++u <= nr_pages); 1370 ntfs_error(vol->sb, "Failed. Returning error code %i.", err); 1371 return err; 1372 } 1373 1374 static inline void ntfs_flush_dcache_pages(struct page **pages, 1375 unsigned nr_pages) 1376 { 1377 BUG_ON(!nr_pages); 1378 /* 1379 * Warning: Do not do the decrement at the same time as the call to 1380 * flush_dcache_page() because it is a NULL macro on i386 and hence the 1381 * decrement never happens so the loop never terminates. 1382 */ 1383 do { 1384 --nr_pages; 1385 flush_dcache_page(pages[nr_pages]); 1386 } while (nr_pages > 0); 1387 } 1388 1389 /** 1390 * ntfs_commit_pages_after_non_resident_write - commit the received data 1391 * @pages: array of destination pages 1392 * @nr_pages: number of pages in @pages 1393 * @pos: byte position in file at which the write begins 1394 * @bytes: number of bytes to be written 1395 * 1396 * See description of ntfs_commit_pages_after_write(), below. 1397 */ 1398 static inline int ntfs_commit_pages_after_non_resident_write( 1399 struct page **pages, const unsigned nr_pages, 1400 s64 pos, size_t bytes) 1401 { 1402 s64 end, initialized_size; 1403 struct inode *vi; 1404 ntfs_inode *ni, *base_ni; 1405 struct buffer_head *bh, *head; 1406 ntfs_attr_search_ctx *ctx; 1407 MFT_RECORD *m; 1408 ATTR_RECORD *a; 1409 unsigned long flags; 1410 unsigned blocksize, u; 1411 int err; 1412 1413 vi = pages[0]->mapping->host; 1414 ni = NTFS_I(vi); 1415 blocksize = vi->i_sb->s_blocksize; 1416 end = pos + bytes; 1417 u = 0; 1418 do { 1419 s64 bh_pos; 1420 struct page *page; 1421 bool partial; 1422 1423 page = pages[u]; 1424 bh_pos = (s64)page->index << PAGE_CACHE_SHIFT; 1425 bh = head = page_buffers(page); 1426 partial = false; 1427 do { 1428 s64 bh_end; 1429 1430 bh_end = bh_pos + blocksize; 1431 if (bh_end <= pos || bh_pos >= end) { 1432 if (!buffer_uptodate(bh)) 1433 partial = true; 1434 } else { 1435 set_buffer_uptodate(bh); 1436 mark_buffer_dirty(bh); 1437 } 1438 } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); 1439 /* 1440 * If all buffers are now uptodate but the page is not, set the 1441 * page uptodate. 1442 */ 1443 if (!partial && !PageUptodate(page)) 1444 SetPageUptodate(page); 1445 } while (++u < nr_pages); 1446 /* 1447 * Finally, if we do not need to update initialized_size or i_size we 1448 * are finished. 1449 */ 1450 read_lock_irqsave(&ni->size_lock, flags); 1451 initialized_size = ni->initialized_size; 1452 read_unlock_irqrestore(&ni->size_lock, flags); 1453 if (end <= initialized_size) { 1454 ntfs_debug("Done."); 1455 return 0; 1456 } 1457 /* 1458 * Update initialized_size/i_size as appropriate, both in the inode and 1459 * the mft record. 1460 */ 1461 if (!NInoAttr(ni)) 1462 base_ni = ni; 1463 else 1464 base_ni = ni->ext.base_ntfs_ino; 1465 /* Map, pin, and lock the mft record. */ 1466 m = map_mft_record(base_ni); 1467 if (IS_ERR(m)) { 1468 err = PTR_ERR(m); 1469 m = NULL; 1470 ctx = NULL; 1471 goto err_out; 1472 } 1473 BUG_ON(!NInoNonResident(ni)); 1474 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1475 if (unlikely(!ctx)) { 1476 err = -ENOMEM; 1477 goto err_out; 1478 } 1479 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1480 CASE_SENSITIVE, 0, NULL, 0, ctx); 1481 if (unlikely(err)) { 1482 if (err == -ENOENT) 1483 err = -EIO; 1484 goto err_out; 1485 } 1486 a = ctx->attr; 1487 BUG_ON(!a->non_resident); 1488 write_lock_irqsave(&ni->size_lock, flags); 1489 BUG_ON(end > ni->allocated_size); 1490 ni->initialized_size = end; 1491 a->data.non_resident.initialized_size = cpu_to_sle64(end); 1492 if (end > i_size_read(vi)) { 1493 i_size_write(vi, end); 1494 a->data.non_resident.data_size = 1495 a->data.non_resident.initialized_size; 1496 } 1497 write_unlock_irqrestore(&ni->size_lock, flags); 1498 /* Mark the mft record dirty, so it gets written back. */ 1499 flush_dcache_mft_record_page(ctx->ntfs_ino); 1500 mark_mft_record_dirty(ctx->ntfs_ino); 1501 ntfs_attr_put_search_ctx(ctx); 1502 unmap_mft_record(base_ni); 1503 ntfs_debug("Done."); 1504 return 0; 1505 err_out: 1506 if (ctx) 1507 ntfs_attr_put_search_ctx(ctx); 1508 if (m) 1509 unmap_mft_record(base_ni); 1510 ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error " 1511 "code %i).", err); 1512 if (err != -ENOMEM) 1513 NVolSetErrors(ni->vol); 1514 return err; 1515 } 1516 1517 /** 1518 * ntfs_commit_pages_after_write - commit the received data 1519 * @pages: array of destination pages 1520 * @nr_pages: number of pages in @pages 1521 * @pos: byte position in file at which the write begins 1522 * @bytes: number of bytes to be written 1523 * 1524 * This is called from ntfs_file_buffered_write() with i_mutex held on the inode 1525 * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are 1526 * locked but not kmap()ped. The source data has already been copied into the 1527 * @page. ntfs_prepare_pages_for_non_resident_write() has been called before 1528 * the data was copied (for non-resident attributes only) and it returned 1529 * success. 1530 * 1531 * Need to set uptodate and mark dirty all buffers within the boundary of the 1532 * write. If all buffers in a page are uptodate we set the page uptodate, too. 1533 * 1534 * Setting the buffers dirty ensures that they get written out later when 1535 * ntfs_writepage() is invoked by the VM. 1536 * 1537 * Finally, we need to update i_size and initialized_size as appropriate both 1538 * in the inode and the mft record. 1539 * 1540 * This is modelled after fs/buffer.c::generic_commit_write(), which marks 1541 * buffers uptodate and dirty, sets the page uptodate if all buffers in the 1542 * page are uptodate, and updates i_size if the end of io is beyond i_size. In 1543 * that case, it also marks the inode dirty. 1544 * 1545 * If things have gone as outlined in 1546 * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page 1547 * content modifications here for non-resident attributes. For resident 1548 * attributes we need to do the uptodate bringing here which we combine with 1549 * the copying into the mft record which means we save one atomic kmap. 1550 * 1551 * Return 0 on success or -errno on error. 1552 */ 1553 static int ntfs_commit_pages_after_write(struct page **pages, 1554 const unsigned nr_pages, s64 pos, size_t bytes) 1555 { 1556 s64 end, initialized_size; 1557 loff_t i_size; 1558 struct inode *vi; 1559 ntfs_inode *ni, *base_ni; 1560 struct page *page; 1561 ntfs_attr_search_ctx *ctx; 1562 MFT_RECORD *m; 1563 ATTR_RECORD *a; 1564 char *kattr, *kaddr; 1565 unsigned long flags; 1566 u32 attr_len; 1567 int err; 1568 1569 BUG_ON(!nr_pages); 1570 BUG_ON(!pages); 1571 page = pages[0]; 1572 BUG_ON(!page); 1573 vi = page->mapping->host; 1574 ni = NTFS_I(vi); 1575 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " 1576 "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 1577 vi->i_ino, ni->type, page->index, nr_pages, 1578 (long long)pos, bytes); 1579 if (NInoNonResident(ni)) 1580 return ntfs_commit_pages_after_non_resident_write(pages, 1581 nr_pages, pos, bytes); 1582 BUG_ON(nr_pages > 1); 1583 /* 1584 * Attribute is resident, implying it is not compressed, encrypted, or 1585 * sparse. 1586 */ 1587 if (!NInoAttr(ni)) 1588 base_ni = ni; 1589 else 1590 base_ni = ni->ext.base_ntfs_ino; 1591 BUG_ON(NInoNonResident(ni)); 1592 /* Map, pin, and lock the mft record. */ 1593 m = map_mft_record(base_ni); 1594 if (IS_ERR(m)) { 1595 err = PTR_ERR(m); 1596 m = NULL; 1597 ctx = NULL; 1598 goto err_out; 1599 } 1600 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1601 if (unlikely(!ctx)) { 1602 err = -ENOMEM; 1603 goto err_out; 1604 } 1605 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1606 CASE_SENSITIVE, 0, NULL, 0, ctx); 1607 if (unlikely(err)) { 1608 if (err == -ENOENT) 1609 err = -EIO; 1610 goto err_out; 1611 } 1612 a = ctx->attr; 1613 BUG_ON(a->non_resident); 1614 /* The total length of the attribute value. */ 1615 attr_len = le32_to_cpu(a->data.resident.value_length); 1616 i_size = i_size_read(vi); 1617 BUG_ON(attr_len != i_size); 1618 BUG_ON(pos > attr_len); 1619 end = pos + bytes; 1620 BUG_ON(end > le32_to_cpu(a->length) - 1621 le16_to_cpu(a->data.resident.value_offset)); 1622 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 1623 kaddr = kmap_atomic(page); 1624 /* Copy the received data from the page to the mft record. */ 1625 memcpy(kattr + pos, kaddr + pos, bytes); 1626 /* Update the attribute length if necessary. */ 1627 if (end > attr_len) { 1628 attr_len = end; 1629 a->data.resident.value_length = cpu_to_le32(attr_len); 1630 } 1631 /* 1632 * If the page is not uptodate, bring the out of bounds area(s) 1633 * uptodate by copying data from the mft record to the page. 1634 */ 1635 if (!PageUptodate(page)) { 1636 if (pos > 0) 1637 memcpy(kaddr, kattr, pos); 1638 if (end < attr_len) 1639 memcpy(kaddr + end, kattr + end, attr_len - end); 1640 /* Zero the region outside the end of the attribute value. */ 1641 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 1642 flush_dcache_page(page); 1643 SetPageUptodate(page); 1644 } 1645 kunmap_atomic(kaddr); 1646 /* Update initialized_size/i_size if necessary. */ 1647 read_lock_irqsave(&ni->size_lock, flags); 1648 initialized_size = ni->initialized_size; 1649 BUG_ON(end > ni->allocated_size); 1650 read_unlock_irqrestore(&ni->size_lock, flags); 1651 BUG_ON(initialized_size != i_size); 1652 if (end > initialized_size) { 1653 write_lock_irqsave(&ni->size_lock, flags); 1654 ni->initialized_size = end; 1655 i_size_write(vi, end); 1656 write_unlock_irqrestore(&ni->size_lock, flags); 1657 } 1658 /* Mark the mft record dirty, so it gets written back. */ 1659 flush_dcache_mft_record_page(ctx->ntfs_ino); 1660 mark_mft_record_dirty(ctx->ntfs_ino); 1661 ntfs_attr_put_search_ctx(ctx); 1662 unmap_mft_record(base_ni); 1663 ntfs_debug("Done."); 1664 return 0; 1665 err_out: 1666 if (err == -ENOMEM) { 1667 ntfs_warning(vi->i_sb, "Error allocating memory required to " 1668 "commit the write."); 1669 if (PageUptodate(page)) { 1670 ntfs_warning(vi->i_sb, "Page is uptodate, setting " 1671 "dirty so the write will be retried " 1672 "later on by the VM."); 1673 /* 1674 * Put the page on mapping->dirty_pages, but leave its 1675 * buffers' dirty state as-is. 1676 */ 1677 __set_page_dirty_nobuffers(page); 1678 err = 0; 1679 } else 1680 ntfs_error(vi->i_sb, "Page is not uptodate. Written " 1681 "data has been lost."); 1682 } else { 1683 ntfs_error(vi->i_sb, "Resident attribute commit write failed " 1684 "with error %i.", err); 1685 NVolSetErrors(ni->vol); 1686 } 1687 if (ctx) 1688 ntfs_attr_put_search_ctx(ctx); 1689 if (m) 1690 unmap_mft_record(base_ni); 1691 return err; 1692 } 1693 1694 /* 1695 * Copy as much as we can into the pages and return the number of bytes which 1696 * were successfully copied. If a fault is encountered then clear the pages 1697 * out to (ofs + bytes) and return the number of bytes which were copied. 1698 */ 1699 static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, 1700 unsigned ofs, struct iov_iter *i, size_t bytes) 1701 { 1702 struct page **last_page = pages + nr_pages; 1703 size_t total = 0; 1704 struct iov_iter data = *i; 1705 unsigned len, copied; 1706 1707 do { 1708 len = PAGE_CACHE_SIZE - ofs; 1709 if (len > bytes) 1710 len = bytes; 1711 copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, 1712 len); 1713 total += copied; 1714 bytes -= copied; 1715 if (!bytes) 1716 break; 1717 iov_iter_advance(&data, copied); 1718 if (copied < len) 1719 goto err; 1720 ofs = 0; 1721 } while (++pages < last_page); 1722 out: 1723 return total; 1724 err: 1725 /* Zero the rest of the target like __copy_from_user(). */ 1726 len = PAGE_CACHE_SIZE - copied; 1727 do { 1728 if (len > bytes) 1729 len = bytes; 1730 zero_user(*pages, copied, len); 1731 bytes -= len; 1732 copied = 0; 1733 len = PAGE_CACHE_SIZE; 1734 } while (++pages < last_page); 1735 goto out; 1736 } 1737 1738 /** 1739 * ntfs_perform_write - perform buffered write to a file 1740 * @file: file to write to 1741 * @i: iov_iter with data to write 1742 * @pos: byte offset in file at which to begin writing to 1743 */ 1744 static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, 1745 loff_t pos) 1746 { 1747 struct address_space *mapping = file->f_mapping; 1748 struct inode *vi = mapping->host; 1749 ntfs_inode *ni = NTFS_I(vi); 1750 ntfs_volume *vol = ni->vol; 1751 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1752 struct page *cached_page = NULL; 1753 VCN last_vcn; 1754 LCN lcn; 1755 size_t bytes; 1756 ssize_t status, written = 0; 1757 unsigned nr_pages; 1758 1759 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " 1760 "0x%llx, count 0x%lx.", vi->i_ino, 1761 (unsigned)le32_to_cpu(ni->type), 1762 (unsigned long long)pos, 1763 (unsigned long)iov_iter_count(i)); 1764 /* 1765 * If a previous ntfs_truncate() failed, repeat it and abort if it 1766 * fails again. 1767 */ 1768 if (unlikely(NInoTruncateFailed(ni))) { 1769 int err; 1770 1771 inode_dio_wait(vi); 1772 err = ntfs_truncate(vi); 1773 if (err || NInoTruncateFailed(ni)) { 1774 if (!err) 1775 err = -EIO; 1776 ntfs_error(vol->sb, "Cannot perform write to inode " 1777 "0x%lx, attribute type 0x%x, because " 1778 "ntfs_truncate() failed (error code " 1779 "%i).", vi->i_ino, 1780 (unsigned)le32_to_cpu(ni->type), err); 1781 return err; 1782 } 1783 } 1784 /* 1785 * Determine the number of pages per cluster for non-resident 1786 * attributes. 1787 */ 1788 nr_pages = 1; 1789 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) 1790 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; 1791 last_vcn = -1; 1792 do { 1793 VCN vcn; 1794 pgoff_t idx, start_idx; 1795 unsigned ofs, do_pages, u; 1796 size_t copied; 1797 1798 start_idx = idx = pos >> PAGE_CACHE_SHIFT; 1799 ofs = pos & ~PAGE_CACHE_MASK; 1800 bytes = PAGE_CACHE_SIZE - ofs; 1801 do_pages = 1; 1802 if (nr_pages > 1) { 1803 vcn = pos >> vol->cluster_size_bits; 1804 if (vcn != last_vcn) { 1805 last_vcn = vcn; 1806 /* 1807 * Get the lcn of the vcn the write is in. If 1808 * it is a hole, need to lock down all pages in 1809 * the cluster. 1810 */ 1811 down_read(&ni->runlist.lock); 1812 lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >> 1813 vol->cluster_size_bits, false); 1814 up_read(&ni->runlist.lock); 1815 if (unlikely(lcn < LCN_HOLE)) { 1816 if (lcn == LCN_ENOMEM) 1817 status = -ENOMEM; 1818 else { 1819 status = -EIO; 1820 ntfs_error(vol->sb, "Cannot " 1821 "perform write to " 1822 "inode 0x%lx, " 1823 "attribute type 0x%x, " 1824 "because the attribute " 1825 "is corrupt.", 1826 vi->i_ino, (unsigned) 1827 le32_to_cpu(ni->type)); 1828 } 1829 break; 1830 } 1831 if (lcn == LCN_HOLE) { 1832 start_idx = (pos & ~(s64) 1833 vol->cluster_size_mask) 1834 >> PAGE_CACHE_SHIFT; 1835 bytes = vol->cluster_size - (pos & 1836 vol->cluster_size_mask); 1837 do_pages = nr_pages; 1838 } 1839 } 1840 } 1841 if (bytes > iov_iter_count(i)) 1842 bytes = iov_iter_count(i); 1843 again: 1844 /* 1845 * Bring in the user page(s) that we will copy from _first_. 1846 * Otherwise there is a nasty deadlock on copying from the same 1847 * page(s) as we are writing to, without it/them being marked 1848 * up-to-date. Note, at present there is nothing to stop the 1849 * pages being swapped out between us bringing them into memory 1850 * and doing the actual copying. 1851 */ 1852 if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) { 1853 status = -EFAULT; 1854 break; 1855 } 1856 /* Get and lock @do_pages starting at index @start_idx. */ 1857 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1858 pages, &cached_page); 1859 if (unlikely(status)) 1860 break; 1861 /* 1862 * For non-resident attributes, we need to fill any holes with 1863 * actual clusters and ensure all bufferes are mapped. We also 1864 * need to bring uptodate any buffers that are only partially 1865 * being written to. 1866 */ 1867 if (NInoNonResident(ni)) { 1868 status = ntfs_prepare_pages_for_non_resident_write( 1869 pages, do_pages, pos, bytes); 1870 if (unlikely(status)) { 1871 do { 1872 unlock_page(pages[--do_pages]); 1873 page_cache_release(pages[do_pages]); 1874 } while (do_pages); 1875 break; 1876 } 1877 } 1878 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; 1879 copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, 1880 i, bytes); 1881 ntfs_flush_dcache_pages(pages + u, do_pages - u); 1882 status = 0; 1883 if (likely(copied == bytes)) { 1884 status = ntfs_commit_pages_after_write(pages, do_pages, 1885 pos, bytes); 1886 if (!status) 1887 status = bytes; 1888 } 1889 do { 1890 unlock_page(pages[--do_pages]); 1891 page_cache_release(pages[do_pages]); 1892 } while (do_pages); 1893 if (unlikely(status < 0)) 1894 break; 1895 copied = status; 1896 cond_resched(); 1897 if (unlikely(!copied)) { 1898 size_t sc; 1899 1900 /* 1901 * We failed to copy anything. Fall back to single 1902 * segment length write. 1903 * 1904 * This is needed to avoid possible livelock in the 1905 * case that all segments in the iov cannot be copied 1906 * at once without a pagefault. 1907 */ 1908 sc = iov_iter_single_seg_count(i); 1909 if (bytes > sc) 1910 bytes = sc; 1911 goto again; 1912 } 1913 iov_iter_advance(i, copied); 1914 pos += copied; 1915 written += copied; 1916 balance_dirty_pages_ratelimited(mapping); 1917 if (fatal_signal_pending(current)) { 1918 status = -EINTR; 1919 break; 1920 } 1921 } while (iov_iter_count(i)); 1922 if (cached_page) 1923 page_cache_release(cached_page); 1924 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1925 written ? "written" : "status", (unsigned long)written, 1926 (long)status); 1927 return written ? written : status; 1928 } 1929 1930 /** 1931 * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() 1932 * @iocb: IO state structure 1933 * @from: iov_iter with data to write 1934 * 1935 * Basically the same as generic_file_write_iter() except that it ends up 1936 * up calling ntfs_perform_write() instead of generic_perform_write() and that 1937 * O_DIRECT is not implemented. 1938 */ 1939 static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1940 { 1941 struct file *file = iocb->ki_filp; 1942 struct inode *vi = file_inode(file); 1943 ssize_t written = 0; 1944 ssize_t err; 1945 1946 mutex_lock(&vi->i_mutex); 1947 /* We can write back this queue in page reclaim. */ 1948 current->backing_dev_info = inode_to_bdi(vi); 1949 err = ntfs_prepare_file_for_write(iocb, from); 1950 if (iov_iter_count(from) && !err) 1951 written = ntfs_perform_write(file, from, iocb->ki_pos); 1952 current->backing_dev_info = NULL; 1953 mutex_unlock(&vi->i_mutex); 1954 if (likely(written > 0)) { 1955 err = generic_write_sync(file, iocb->ki_pos, written); 1956 if (err < 0) 1957 written = 0; 1958 } 1959 iocb->ki_pos += written; 1960 return written ? written : err; 1961 } 1962 1963 /** 1964 * ntfs_file_fsync - sync a file to disk 1965 * @filp: file to be synced 1966 * @datasync: if non-zero only flush user data and not metadata 1967 * 1968 * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync 1969 * system calls. This function is inspired by fs/buffer.c::file_fsync(). 1970 * 1971 * If @datasync is false, write the mft record and all associated extent mft 1972 * records as well as the $DATA attribute and then sync the block device. 1973 * 1974 * If @datasync is true and the attribute is non-resident, we skip the writing 1975 * of the mft record and all associated extent mft records (this might still 1976 * happen due to the write_inode_now() call). 1977 * 1978 * Also, if @datasync is true, we do not wait on the inode to be written out 1979 * but we always wait on the page cache pages to be written out. 1980 * 1981 * Locking: Caller must hold i_mutex on the inode. 1982 * 1983 * TODO: We should probably also write all attribute/index inodes associated 1984 * with this inode but since we have no simple way of getting to them we ignore 1985 * this problem for now. 1986 */ 1987 static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, 1988 int datasync) 1989 { 1990 struct inode *vi = filp->f_mapping->host; 1991 int err, ret = 0; 1992 1993 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 1994 1995 err = filemap_write_and_wait_range(vi->i_mapping, start, end); 1996 if (err) 1997 return err; 1998 mutex_lock(&vi->i_mutex); 1999 2000 BUG_ON(S_ISDIR(vi->i_mode)); 2001 if (!datasync || !NInoNonResident(NTFS_I(vi))) 2002 ret = __ntfs_write_inode(vi, 1); 2003 write_inode_now(vi, !datasync); 2004 /* 2005 * NOTE: If we were to use mapping->private_list (see ext2 and 2006 * fs/buffer.c) for dirty blocks then we could optimize the below to be 2007 * sync_mapping_buffers(vi->i_mapping). 2008 */ 2009 err = sync_blockdev(vi->i_sb->s_bdev); 2010 if (unlikely(err && !ret)) 2011 ret = err; 2012 if (likely(!ret)) 2013 ntfs_debug("Done."); 2014 else 2015 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 2016 "%u.", datasync ? "data" : "", vi->i_ino, -ret); 2017 mutex_unlock(&vi->i_mutex); 2018 return ret; 2019 } 2020 2021 #endif /* NTFS_RW */ 2022 2023 const struct file_operations ntfs_file_ops = { 2024 .llseek = generic_file_llseek, 2025 .read_iter = generic_file_read_iter, 2026 #ifdef NTFS_RW 2027 .write_iter = ntfs_file_write_iter, 2028 .fsync = ntfs_file_fsync, 2029 #endif /* NTFS_RW */ 2030 .mmap = generic_file_mmap, 2031 .open = ntfs_file_open, 2032 .splice_read = generic_file_splice_read, 2033 }; 2034 2035 const struct inode_operations ntfs_file_inode_ops = { 2036 #ifdef NTFS_RW 2037 .setattr = ntfs_setattr, 2038 #endif /* NTFS_RW */ 2039 }; 2040 2041 const struct file_operations ntfs_empty_file_ops = {}; 2042 2043 const struct inode_operations ntfs_empty_inode_ops = {}; 2044