1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 4 * 5 * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. 6 */ 7 8 #include <linux/backing-dev.h> 9 #include <linux/buffer_head.h> 10 #include <linux/gfp.h> 11 #include <linux/pagemap.h> 12 #include <linux/pagevec.h> 13 #include <linux/sched/signal.h> 14 #include <linux/swap.h> 15 #include <linux/uio.h> 16 #include <linux/writeback.h> 17 18 #include <asm/page.h> 19 #include <linux/uaccess.h> 20 21 #include "attrib.h" 22 #include "bitmap.h" 23 #include "inode.h" 24 #include "debug.h" 25 #include "lcnalloc.h" 26 #include "malloc.h" 27 #include "mft.h" 28 #include "ntfs.h" 29 30 /** 31 * ntfs_file_open - called when an inode is about to be opened 32 * @vi: inode to be opened 33 * @filp: file structure describing the inode 34 * 35 * Limit file size to the page cache limit on architectures where unsigned long 36 * is 32-bits. This is the most we can do for now without overflowing the page 37 * cache page index. Doing it this way means we don't run into problems because 38 * of existing too large files. It would be better to allow the user to read 39 * the beginning of the file but I doubt very much anyone is going to hit this 40 * check on a 32-bit architecture, so there is no point in adding the extra 41 * complexity required to support this. 42 * 43 * On 64-bit architectures, the check is hopefully optimized away by the 44 * compiler. 45 * 46 * After the check passes, just call generic_file_open() to do its work. 47 */ 48 static int ntfs_file_open(struct inode *vi, struct file *filp) 49 { 50 if (sizeof(unsigned long) < 8) { 51 if (i_size_read(vi) > MAX_LFS_FILESIZE) 52 return -EOVERFLOW; 53 } 54 return generic_file_open(vi, filp); 55 } 56 57 #ifdef NTFS_RW 58 59 /** 60 * ntfs_attr_extend_initialized - extend the initialized size of an attribute 61 * @ni: ntfs inode of the attribute to extend 62 * @new_init_size: requested new initialized size in bytes 63 * 64 * Extend the initialized size of an attribute described by the ntfs inode @ni 65 * to @new_init_size bytes. This involves zeroing any non-sparse space between 66 * the old initialized size and @new_init_size both in the page cache and on 67 * disk (if relevant complete pages are already uptodate in the page cache then 68 * these are simply marked dirty). 69 * 70 * As a side-effect, the file size (vfs inode->i_size) may be incremented as, 71 * in the resident attribute case, it is tied to the initialized size and, in 72 * the non-resident attribute case, it may not fall below the initialized size. 73 * 74 * Note that if the attribute is resident, we do not need to touch the page 75 * cache at all. This is because if the page cache page is not uptodate we 76 * bring it uptodate later, when doing the write to the mft record since we 77 * then already have the page mapped. And if the page is uptodate, the 78 * non-initialized region will already have been zeroed when the page was 79 * brought uptodate and the region may in fact already have been overwritten 80 * with new data via mmap() based writes, so we cannot just zero it. And since 81 * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped 82 * is unspecified, we choose not to do zeroing and thus we do not need to touch 83 * the page at all. For a more detailed explanation see ntfs_truncate() in 84 * fs/ntfs/inode.c. 85 * 86 * Return 0 on success and -errno on error. In the case that an error is 87 * encountered it is possible that the initialized size will already have been 88 * incremented some way towards @new_init_size but it is guaranteed that if 89 * this is the case, the necessary zeroing will also have happened and that all 90 * metadata is self-consistent. 91 * 92 * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be 93 * held by the caller. 94 */ 95 static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) 96 { 97 s64 old_init_size; 98 loff_t old_i_size; 99 pgoff_t index, end_index; 100 unsigned long flags; 101 struct inode *vi = VFS_I(ni); 102 ntfs_inode *base_ni; 103 MFT_RECORD *m = NULL; 104 ATTR_RECORD *a; 105 ntfs_attr_search_ctx *ctx = NULL; 106 struct address_space *mapping; 107 struct page *page = NULL; 108 u8 *kattr; 109 int err; 110 u32 attr_len; 111 112 read_lock_irqsave(&ni->size_lock, flags); 113 old_init_size = ni->initialized_size; 114 old_i_size = i_size_read(vi); 115 BUG_ON(new_init_size > ni->allocated_size); 116 read_unlock_irqrestore(&ni->size_lock, flags); 117 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 118 "old_initialized_size 0x%llx, " 119 "new_initialized_size 0x%llx, i_size 0x%llx.", 120 vi->i_ino, (unsigned)le32_to_cpu(ni->type), 121 (unsigned long long)old_init_size, 122 (unsigned long long)new_init_size, old_i_size); 123 if (!NInoAttr(ni)) 124 base_ni = ni; 125 else 126 base_ni = ni->ext.base_ntfs_ino; 127 /* Use goto to reduce indentation and we need the label below anyway. */ 128 if (NInoNonResident(ni)) 129 goto do_non_resident_extend; 130 BUG_ON(old_init_size != old_i_size); 131 m = map_mft_record(base_ni); 132 if (IS_ERR(m)) { 133 err = PTR_ERR(m); 134 m = NULL; 135 goto err_out; 136 } 137 ctx = ntfs_attr_get_search_ctx(base_ni, m); 138 if (unlikely(!ctx)) { 139 err = -ENOMEM; 140 goto err_out; 141 } 142 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 143 CASE_SENSITIVE, 0, NULL, 0, ctx); 144 if (unlikely(err)) { 145 if (err == -ENOENT) 146 err = -EIO; 147 goto err_out; 148 } 149 m = ctx->mrec; 150 a = ctx->attr; 151 BUG_ON(a->non_resident); 152 /* The total length of the attribute value. */ 153 attr_len = le32_to_cpu(a->data.resident.value_length); 154 BUG_ON(old_i_size != (loff_t)attr_len); 155 /* 156 * Do the zeroing in the mft record and update the attribute size in 157 * the mft record. 158 */ 159 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 160 memset(kattr + attr_len, 0, new_init_size - attr_len); 161 a->data.resident.value_length = cpu_to_le32((u32)new_init_size); 162 /* Finally, update the sizes in the vfs and ntfs inodes. */ 163 write_lock_irqsave(&ni->size_lock, flags); 164 i_size_write(vi, new_init_size); 165 ni->initialized_size = new_init_size; 166 write_unlock_irqrestore(&ni->size_lock, flags); 167 goto done; 168 do_non_resident_extend: 169 /* 170 * If the new initialized size @new_init_size exceeds the current file 171 * size (vfs inode->i_size), we need to extend the file size to the 172 * new initialized size. 173 */ 174 if (new_init_size > old_i_size) { 175 m = map_mft_record(base_ni); 176 if (IS_ERR(m)) { 177 err = PTR_ERR(m); 178 m = NULL; 179 goto err_out; 180 } 181 ctx = ntfs_attr_get_search_ctx(base_ni, m); 182 if (unlikely(!ctx)) { 183 err = -ENOMEM; 184 goto err_out; 185 } 186 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 187 CASE_SENSITIVE, 0, NULL, 0, ctx); 188 if (unlikely(err)) { 189 if (err == -ENOENT) 190 err = -EIO; 191 goto err_out; 192 } 193 m = ctx->mrec; 194 a = ctx->attr; 195 BUG_ON(!a->non_resident); 196 BUG_ON(old_i_size != (loff_t) 197 sle64_to_cpu(a->data.non_resident.data_size)); 198 a->data.non_resident.data_size = cpu_to_sle64(new_init_size); 199 flush_dcache_mft_record_page(ctx->ntfs_ino); 200 mark_mft_record_dirty(ctx->ntfs_ino); 201 /* Update the file size in the vfs inode. */ 202 i_size_write(vi, new_init_size); 203 ntfs_attr_put_search_ctx(ctx); 204 ctx = NULL; 205 unmap_mft_record(base_ni); 206 m = NULL; 207 } 208 mapping = vi->i_mapping; 209 index = old_init_size >> PAGE_SHIFT; 210 end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 211 do { 212 /* 213 * Read the page. If the page is not present, this will zero 214 * the uninitialized regions for us. 215 */ 216 page = read_mapping_page(mapping, index, NULL); 217 if (IS_ERR(page)) { 218 err = PTR_ERR(page); 219 goto init_err_out; 220 } 221 if (unlikely(PageError(page))) { 222 put_page(page); 223 err = -EIO; 224 goto init_err_out; 225 } 226 /* 227 * Update the initialized size in the ntfs inode. This is 228 * enough to make ntfs_writepage() work. 229 */ 230 write_lock_irqsave(&ni->size_lock, flags); 231 ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT; 232 if (ni->initialized_size > new_init_size) 233 ni->initialized_size = new_init_size; 234 write_unlock_irqrestore(&ni->size_lock, flags); 235 /* Set the page dirty so it gets written out. */ 236 set_page_dirty(page); 237 put_page(page); 238 /* 239 * Play nice with the vm and the rest of the system. This is 240 * very much needed as we can potentially be modifying the 241 * initialised size from a very small value to a really huge 242 * value, e.g. 243 * f = open(somefile, O_TRUNC); 244 * truncate(f, 10GiB); 245 * seek(f, 10GiB); 246 * write(f, 1); 247 * And this would mean we would be marking dirty hundreds of 248 * thousands of pages or as in the above example more than 249 * two and a half million pages! 250 * 251 * TODO: For sparse pages could optimize this workload by using 252 * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This 253 * would be set in readpage for sparse pages and here we would 254 * not need to mark dirty any pages which have this bit set. 255 * The only caveat is that we have to clear the bit everywhere 256 * where we allocate any clusters that lie in the page or that 257 * contain the page. 258 * 259 * TODO: An even greater optimization would be for us to only 260 * call readpage() on pages which are not in sparse regions as 261 * determined from the runlist. This would greatly reduce the 262 * number of pages we read and make dirty in the case of sparse 263 * files. 264 */ 265 balance_dirty_pages_ratelimited(mapping); 266 cond_resched(); 267 } while (++index < end_index); 268 read_lock_irqsave(&ni->size_lock, flags); 269 BUG_ON(ni->initialized_size != new_init_size); 270 read_unlock_irqrestore(&ni->size_lock, flags); 271 /* Now bring in sync the initialized_size in the mft record. */ 272 m = map_mft_record(base_ni); 273 if (IS_ERR(m)) { 274 err = PTR_ERR(m); 275 m = NULL; 276 goto init_err_out; 277 } 278 ctx = ntfs_attr_get_search_ctx(base_ni, m); 279 if (unlikely(!ctx)) { 280 err = -ENOMEM; 281 goto init_err_out; 282 } 283 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 284 CASE_SENSITIVE, 0, NULL, 0, ctx); 285 if (unlikely(err)) { 286 if (err == -ENOENT) 287 err = -EIO; 288 goto init_err_out; 289 } 290 m = ctx->mrec; 291 a = ctx->attr; 292 BUG_ON(!a->non_resident); 293 a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size); 294 done: 295 flush_dcache_mft_record_page(ctx->ntfs_ino); 296 mark_mft_record_dirty(ctx->ntfs_ino); 297 if (ctx) 298 ntfs_attr_put_search_ctx(ctx); 299 if (m) 300 unmap_mft_record(base_ni); 301 ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.", 302 (unsigned long long)new_init_size, i_size_read(vi)); 303 return 0; 304 init_err_out: 305 write_lock_irqsave(&ni->size_lock, flags); 306 ni->initialized_size = old_init_size; 307 write_unlock_irqrestore(&ni->size_lock, flags); 308 err_out: 309 if (ctx) 310 ntfs_attr_put_search_ctx(ctx); 311 if (m) 312 unmap_mft_record(base_ni); 313 ntfs_debug("Failed. Returning error code %i.", err); 314 return err; 315 } 316 317 static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, 318 struct iov_iter *from) 319 { 320 loff_t pos; 321 s64 end, ll; 322 ssize_t err; 323 unsigned long flags; 324 struct file *file = iocb->ki_filp; 325 struct inode *vi = file_inode(file); 326 ntfs_inode *base_ni, *ni = NTFS_I(vi); 327 ntfs_volume *vol = ni->vol; 328 329 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " 330 "0x%llx, count 0x%zx.", vi->i_ino, 331 (unsigned)le32_to_cpu(ni->type), 332 (unsigned long long)iocb->ki_pos, 333 iov_iter_count(from)); 334 err = generic_write_checks(iocb, from); 335 if (unlikely(err <= 0)) 336 goto out; 337 /* 338 * All checks have passed. Before we start doing any writing we want 339 * to abort any totally illegal writes. 340 */ 341 BUG_ON(NInoMstProtected(ni)); 342 BUG_ON(ni->type != AT_DATA); 343 /* If file is encrypted, deny access, just like NT4. */ 344 if (NInoEncrypted(ni)) { 345 /* Only $DATA attributes can be encrypted. */ 346 /* 347 * Reminder for later: Encrypted files are _always_ 348 * non-resident so that the content can always be encrypted. 349 */ 350 ntfs_debug("Denying write access to encrypted file."); 351 err = -EACCES; 352 goto out; 353 } 354 if (NInoCompressed(ni)) { 355 /* Only unnamed $DATA attribute can be compressed. */ 356 BUG_ON(ni->name_len); 357 /* 358 * Reminder for later: If resident, the data is not actually 359 * compressed. Only on the switch to non-resident does 360 * compression kick in. This is in contrast to encrypted files 361 * (see above). 362 */ 363 ntfs_error(vi->i_sb, "Writing to compressed files is not " 364 "implemented yet. Sorry."); 365 err = -EOPNOTSUPP; 366 goto out; 367 } 368 base_ni = ni; 369 if (NInoAttr(ni)) 370 base_ni = ni->ext.base_ntfs_ino; 371 err = file_remove_privs(file); 372 if (unlikely(err)) 373 goto out; 374 /* 375 * Our ->update_time method always succeeds thus file_update_time() 376 * cannot fail either so there is no need to check the return code. 377 */ 378 file_update_time(file); 379 pos = iocb->ki_pos; 380 /* The first byte after the last cluster being written to. */ 381 end = (pos + iov_iter_count(from) + vol->cluster_size_mask) & 382 ~(u64)vol->cluster_size_mask; 383 /* 384 * If the write goes beyond the allocated size, extend the allocation 385 * to cover the whole of the write, rounded up to the nearest cluster. 386 */ 387 read_lock_irqsave(&ni->size_lock, flags); 388 ll = ni->allocated_size; 389 read_unlock_irqrestore(&ni->size_lock, flags); 390 if (end > ll) { 391 /* 392 * Extend the allocation without changing the data size. 393 * 394 * Note we ensure the allocation is big enough to at least 395 * write some data but we do not require the allocation to be 396 * complete, i.e. it may be partial. 397 */ 398 ll = ntfs_attr_extend_allocation(ni, end, -1, pos); 399 if (likely(ll >= 0)) { 400 BUG_ON(pos >= ll); 401 /* If the extension was partial truncate the write. */ 402 if (end > ll) { 403 ntfs_debug("Truncating write to inode 0x%lx, " 404 "attribute type 0x%x, because " 405 "the allocation was only " 406 "partially extended.", 407 vi->i_ino, (unsigned) 408 le32_to_cpu(ni->type)); 409 iov_iter_truncate(from, ll - pos); 410 } 411 } else { 412 err = ll; 413 read_lock_irqsave(&ni->size_lock, flags); 414 ll = ni->allocated_size; 415 read_unlock_irqrestore(&ni->size_lock, flags); 416 /* Perform a partial write if possible or fail. */ 417 if (pos < ll) { 418 ntfs_debug("Truncating write to inode 0x%lx " 419 "attribute type 0x%x, because " 420 "extending the allocation " 421 "failed (error %d).", 422 vi->i_ino, (unsigned) 423 le32_to_cpu(ni->type), 424 (int)-err); 425 iov_iter_truncate(from, ll - pos); 426 } else { 427 if (err != -ENOSPC) 428 ntfs_error(vi->i_sb, "Cannot perform " 429 "write to inode " 430 "0x%lx, attribute " 431 "type 0x%x, because " 432 "extending the " 433 "allocation failed " 434 "(error %ld).", 435 vi->i_ino, (unsigned) 436 le32_to_cpu(ni->type), 437 (long)-err); 438 else 439 ntfs_debug("Cannot perform write to " 440 "inode 0x%lx, " 441 "attribute type 0x%x, " 442 "because there is not " 443 "space left.", 444 vi->i_ino, (unsigned) 445 le32_to_cpu(ni->type)); 446 goto out; 447 } 448 } 449 } 450 /* 451 * If the write starts beyond the initialized size, extend it up to the 452 * beginning of the write and initialize all non-sparse space between 453 * the old initialized size and the new one. This automatically also 454 * increments the vfs inode->i_size to keep it above or equal to the 455 * initialized_size. 456 */ 457 read_lock_irqsave(&ni->size_lock, flags); 458 ll = ni->initialized_size; 459 read_unlock_irqrestore(&ni->size_lock, flags); 460 if (pos > ll) { 461 /* 462 * Wait for ongoing direct i/o to complete before proceeding. 463 * New direct i/o cannot start as we hold i_mutex. 464 */ 465 inode_dio_wait(vi); 466 err = ntfs_attr_extend_initialized(ni, pos); 467 if (unlikely(err < 0)) 468 ntfs_error(vi->i_sb, "Cannot perform write to inode " 469 "0x%lx, attribute type 0x%x, because " 470 "extending the initialized size " 471 "failed (error %d).", vi->i_ino, 472 (unsigned)le32_to_cpu(ni->type), 473 (int)-err); 474 } 475 out: 476 return err; 477 } 478 479 /** 480 * __ntfs_grab_cache_pages - obtain a number of locked pages 481 * @mapping: address space mapping from which to obtain page cache pages 482 * @index: starting index in @mapping at which to begin obtaining pages 483 * @nr_pages: number of page cache pages to obtain 484 * @pages: array of pages in which to return the obtained page cache pages 485 * @cached_page: allocated but as yet unused page 486 * 487 * Obtain @nr_pages locked page cache pages from the mapping @mapping and 488 * starting at index @index. 489 * 490 * If a page is newly created, add it to lru list 491 * 492 * Note, the page locks are obtained in ascending page index order. 493 */ 494 static inline int __ntfs_grab_cache_pages(struct address_space *mapping, 495 pgoff_t index, const unsigned nr_pages, struct page **pages, 496 struct page **cached_page) 497 { 498 int err, nr; 499 500 BUG_ON(!nr_pages); 501 err = nr = 0; 502 do { 503 pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK | 504 FGP_ACCESSED); 505 if (!pages[nr]) { 506 if (!*cached_page) { 507 *cached_page = page_cache_alloc(mapping); 508 if (unlikely(!*cached_page)) { 509 err = -ENOMEM; 510 goto err_out; 511 } 512 } 513 err = add_to_page_cache_lru(*cached_page, mapping, 514 index, 515 mapping_gfp_constraint(mapping, GFP_KERNEL)); 516 if (unlikely(err)) { 517 if (err == -EEXIST) 518 continue; 519 goto err_out; 520 } 521 pages[nr] = *cached_page; 522 *cached_page = NULL; 523 } 524 index++; 525 nr++; 526 } while (nr < nr_pages); 527 out: 528 return err; 529 err_out: 530 while (nr > 0) { 531 unlock_page(pages[--nr]); 532 put_page(pages[nr]); 533 } 534 goto out; 535 } 536 537 static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) 538 { 539 lock_buffer(bh); 540 get_bh(bh); 541 bh->b_end_io = end_buffer_read_sync; 542 return submit_bh(REQ_OP_READ, 0, bh); 543 } 544 545 /** 546 * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data 547 * @pages: array of destination pages 548 * @nr_pages: number of pages in @pages 549 * @pos: byte position in file at which the write begins 550 * @bytes: number of bytes to be written 551 * 552 * This is called for non-resident attributes from ntfs_file_buffered_write() 553 * with i_mutex held on the inode (@pages[0]->mapping->host). There are 554 * @nr_pages pages in @pages which are locked but not kmap()ped. The source 555 * data has not yet been copied into the @pages. 556 * 557 * Need to fill any holes with actual clusters, allocate buffers if necessary, 558 * ensure all the buffers are mapped, and bring uptodate any buffers that are 559 * only partially being written to. 560 * 561 * If @nr_pages is greater than one, we are guaranteed that the cluster size is 562 * greater than PAGE_SIZE, that all pages in @pages are entirely inside 563 * the same cluster and that they are the entirety of that cluster, and that 564 * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole. 565 * 566 * i_size is not to be modified yet. 567 * 568 * Return 0 on success or -errno on error. 569 */ 570 static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, 571 unsigned nr_pages, s64 pos, size_t bytes) 572 { 573 VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend; 574 LCN lcn; 575 s64 bh_pos, vcn_len, end, initialized_size; 576 sector_t lcn_block; 577 struct page *page; 578 struct inode *vi; 579 ntfs_inode *ni, *base_ni = NULL; 580 ntfs_volume *vol; 581 runlist_element *rl, *rl2; 582 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; 583 ntfs_attr_search_ctx *ctx = NULL; 584 MFT_RECORD *m = NULL; 585 ATTR_RECORD *a = NULL; 586 unsigned long flags; 587 u32 attr_rec_len = 0; 588 unsigned blocksize, u; 589 int err, mp_size; 590 bool rl_write_locked, was_hole, is_retry; 591 unsigned char blocksize_bits; 592 struct { 593 u8 runlist_merged:1; 594 u8 mft_attr_mapped:1; 595 u8 mp_rebuilt:1; 596 u8 attr_switched:1; 597 } status = { 0, 0, 0, 0 }; 598 599 BUG_ON(!nr_pages); 600 BUG_ON(!pages); 601 BUG_ON(!*pages); 602 vi = pages[0]->mapping->host; 603 ni = NTFS_I(vi); 604 vol = ni->vol; 605 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " 606 "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 607 vi->i_ino, ni->type, pages[0]->index, nr_pages, 608 (long long)pos, bytes); 609 blocksize = vol->sb->s_blocksize; 610 blocksize_bits = vol->sb->s_blocksize_bits; 611 u = 0; 612 do { 613 page = pages[u]; 614 BUG_ON(!page); 615 /* 616 * create_empty_buffers() will create uptodate/dirty buffers if 617 * the page is uptodate/dirty. 618 */ 619 if (!page_has_buffers(page)) { 620 create_empty_buffers(page, blocksize, 0); 621 if (unlikely(!page_has_buffers(page))) 622 return -ENOMEM; 623 } 624 } while (++u < nr_pages); 625 rl_write_locked = false; 626 rl = NULL; 627 err = 0; 628 vcn = lcn = -1; 629 vcn_len = 0; 630 lcn_block = -1; 631 was_hole = false; 632 cpos = pos >> vol->cluster_size_bits; 633 end = pos + bytes; 634 cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits; 635 /* 636 * Loop over each page and for each page over each buffer. Use goto to 637 * reduce indentation. 638 */ 639 u = 0; 640 do_next_page: 641 page = pages[u]; 642 bh_pos = (s64)page->index << PAGE_SHIFT; 643 bh = head = page_buffers(page); 644 do { 645 VCN cdelta; 646 s64 bh_end; 647 unsigned bh_cofs; 648 649 /* Clear buffer_new on all buffers to reinitialise state. */ 650 if (buffer_new(bh)) 651 clear_buffer_new(bh); 652 bh_end = bh_pos + blocksize; 653 bh_cpos = bh_pos >> vol->cluster_size_bits; 654 bh_cofs = bh_pos & vol->cluster_size_mask; 655 if (buffer_mapped(bh)) { 656 /* 657 * The buffer is already mapped. If it is uptodate, 658 * ignore it. 659 */ 660 if (buffer_uptodate(bh)) 661 continue; 662 /* 663 * The buffer is not uptodate. If the page is uptodate 664 * set the buffer uptodate and otherwise ignore it. 665 */ 666 if (PageUptodate(page)) { 667 set_buffer_uptodate(bh); 668 continue; 669 } 670 /* 671 * Neither the page nor the buffer are uptodate. If 672 * the buffer is only partially being written to, we 673 * need to read it in before the write, i.e. now. 674 */ 675 if ((bh_pos < pos && bh_end > pos) || 676 (bh_pos < end && bh_end > end)) { 677 /* 678 * If the buffer is fully or partially within 679 * the initialized size, do an actual read. 680 * Otherwise, simply zero the buffer. 681 */ 682 read_lock_irqsave(&ni->size_lock, flags); 683 initialized_size = ni->initialized_size; 684 read_unlock_irqrestore(&ni->size_lock, flags); 685 if (bh_pos < initialized_size) { 686 ntfs_submit_bh_for_read(bh); 687 *wait_bh++ = bh; 688 } else { 689 zero_user(page, bh_offset(bh), 690 blocksize); 691 set_buffer_uptodate(bh); 692 } 693 } 694 continue; 695 } 696 /* Unmapped buffer. Need to map it. */ 697 bh->b_bdev = vol->sb->s_bdev; 698 /* 699 * If the current buffer is in the same clusters as the map 700 * cache, there is no need to check the runlist again. The 701 * map cache is made up of @vcn, which is the first cached file 702 * cluster, @vcn_len which is the number of cached file 703 * clusters, @lcn is the device cluster corresponding to @vcn, 704 * and @lcn_block is the block number corresponding to @lcn. 705 */ 706 cdelta = bh_cpos - vcn; 707 if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) { 708 map_buffer_cached: 709 BUG_ON(lcn < 0); 710 bh->b_blocknr = lcn_block + 711 (cdelta << (vol->cluster_size_bits - 712 blocksize_bits)) + 713 (bh_cofs >> blocksize_bits); 714 set_buffer_mapped(bh); 715 /* 716 * If the page is uptodate so is the buffer. If the 717 * buffer is fully outside the write, we ignore it if 718 * it was already allocated and we mark it dirty so it 719 * gets written out if we allocated it. On the other 720 * hand, if we allocated the buffer but we are not 721 * marking it dirty we set buffer_new so we can do 722 * error recovery. 723 */ 724 if (PageUptodate(page)) { 725 if (!buffer_uptodate(bh)) 726 set_buffer_uptodate(bh); 727 if (unlikely(was_hole)) { 728 /* We allocated the buffer. */ 729 clean_bdev_bh_alias(bh); 730 if (bh_end <= pos || bh_pos >= end) 731 mark_buffer_dirty(bh); 732 else 733 set_buffer_new(bh); 734 } 735 continue; 736 } 737 /* Page is _not_ uptodate. */ 738 if (likely(!was_hole)) { 739 /* 740 * Buffer was already allocated. If it is not 741 * uptodate and is only partially being written 742 * to, we need to read it in before the write, 743 * i.e. now. 744 */ 745 if (!buffer_uptodate(bh) && bh_pos < end && 746 bh_end > pos && 747 (bh_pos < pos || 748 bh_end > end)) { 749 /* 750 * If the buffer is fully or partially 751 * within the initialized size, do an 752 * actual read. Otherwise, simply zero 753 * the buffer. 754 */ 755 read_lock_irqsave(&ni->size_lock, 756 flags); 757 initialized_size = ni->initialized_size; 758 read_unlock_irqrestore(&ni->size_lock, 759 flags); 760 if (bh_pos < initialized_size) { 761 ntfs_submit_bh_for_read(bh); 762 *wait_bh++ = bh; 763 } else { 764 zero_user(page, bh_offset(bh), 765 blocksize); 766 set_buffer_uptodate(bh); 767 } 768 } 769 continue; 770 } 771 /* We allocated the buffer. */ 772 clean_bdev_bh_alias(bh); 773 /* 774 * If the buffer is fully outside the write, zero it, 775 * set it uptodate, and mark it dirty so it gets 776 * written out. If it is partially being written to, 777 * zero region surrounding the write but leave it to 778 * commit write to do anything else. Finally, if the 779 * buffer is fully being overwritten, do nothing. 780 */ 781 if (bh_end <= pos || bh_pos >= end) { 782 if (!buffer_uptodate(bh)) { 783 zero_user(page, bh_offset(bh), 784 blocksize); 785 set_buffer_uptodate(bh); 786 } 787 mark_buffer_dirty(bh); 788 continue; 789 } 790 set_buffer_new(bh); 791 if (!buffer_uptodate(bh) && 792 (bh_pos < pos || bh_end > end)) { 793 u8 *kaddr; 794 unsigned pofs; 795 796 kaddr = kmap_atomic(page); 797 if (bh_pos < pos) { 798 pofs = bh_pos & ~PAGE_MASK; 799 memset(kaddr + pofs, 0, pos - bh_pos); 800 } 801 if (bh_end > end) { 802 pofs = end & ~PAGE_MASK; 803 memset(kaddr + pofs, 0, bh_end - end); 804 } 805 kunmap_atomic(kaddr); 806 flush_dcache_page(page); 807 } 808 continue; 809 } 810 /* 811 * Slow path: this is the first buffer in the cluster. If it 812 * is outside allocated size and is not uptodate, zero it and 813 * set it uptodate. 814 */ 815 read_lock_irqsave(&ni->size_lock, flags); 816 initialized_size = ni->allocated_size; 817 read_unlock_irqrestore(&ni->size_lock, flags); 818 if (bh_pos > initialized_size) { 819 if (PageUptodate(page)) { 820 if (!buffer_uptodate(bh)) 821 set_buffer_uptodate(bh); 822 } else if (!buffer_uptodate(bh)) { 823 zero_user(page, bh_offset(bh), blocksize); 824 set_buffer_uptodate(bh); 825 } 826 continue; 827 } 828 is_retry = false; 829 if (!rl) { 830 down_read(&ni->runlist.lock); 831 retry_remap: 832 rl = ni->runlist.rl; 833 } 834 if (likely(rl != NULL)) { 835 /* Seek to element containing target cluster. */ 836 while (rl->length && rl[1].vcn <= bh_cpos) 837 rl++; 838 lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos); 839 if (likely(lcn >= 0)) { 840 /* 841 * Successful remap, setup the map cache and 842 * use that to deal with the buffer. 843 */ 844 was_hole = false; 845 vcn = bh_cpos; 846 vcn_len = rl[1].vcn - vcn; 847 lcn_block = lcn << (vol->cluster_size_bits - 848 blocksize_bits); 849 cdelta = 0; 850 /* 851 * If the number of remaining clusters touched 852 * by the write is smaller or equal to the 853 * number of cached clusters, unlock the 854 * runlist as the map cache will be used from 855 * now on. 856 */ 857 if (likely(vcn + vcn_len >= cend)) { 858 if (rl_write_locked) { 859 up_write(&ni->runlist.lock); 860 rl_write_locked = false; 861 } else 862 up_read(&ni->runlist.lock); 863 rl = NULL; 864 } 865 goto map_buffer_cached; 866 } 867 } else 868 lcn = LCN_RL_NOT_MAPPED; 869 /* 870 * If it is not a hole and not out of bounds, the runlist is 871 * probably unmapped so try to map it now. 872 */ 873 if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) { 874 if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) { 875 /* Attempt to map runlist. */ 876 if (!rl_write_locked) { 877 /* 878 * We need the runlist locked for 879 * writing, so if it is locked for 880 * reading relock it now and retry in 881 * case it changed whilst we dropped 882 * the lock. 883 */ 884 up_read(&ni->runlist.lock); 885 down_write(&ni->runlist.lock); 886 rl_write_locked = true; 887 goto retry_remap; 888 } 889 err = ntfs_map_runlist_nolock(ni, bh_cpos, 890 NULL); 891 if (likely(!err)) { 892 is_retry = true; 893 goto retry_remap; 894 } 895 /* 896 * If @vcn is out of bounds, pretend @lcn is 897 * LCN_ENOENT. As long as the buffer is out 898 * of bounds this will work fine. 899 */ 900 if (err == -ENOENT) { 901 lcn = LCN_ENOENT; 902 err = 0; 903 goto rl_not_mapped_enoent; 904 } 905 } else 906 err = -EIO; 907 /* Failed to map the buffer, even after retrying. */ 908 bh->b_blocknr = -1; 909 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 910 "attribute type 0x%x, vcn 0x%llx, " 911 "vcn offset 0x%x, because its " 912 "location on disk could not be " 913 "determined%s (error code %i).", 914 ni->mft_no, ni->type, 915 (unsigned long long)bh_cpos, 916 (unsigned)bh_pos & 917 vol->cluster_size_mask, 918 is_retry ? " even after retrying" : "", 919 err); 920 break; 921 } 922 rl_not_mapped_enoent: 923 /* 924 * The buffer is in a hole or out of bounds. We need to fill 925 * the hole, unless the buffer is in a cluster which is not 926 * touched by the write, in which case we just leave the buffer 927 * unmapped. This can only happen when the cluster size is 928 * less than the page cache size. 929 */ 930 if (unlikely(vol->cluster_size < PAGE_SIZE)) { 931 bh_cend = (bh_end + vol->cluster_size - 1) >> 932 vol->cluster_size_bits; 933 if ((bh_cend <= cpos || bh_cpos >= cend)) { 934 bh->b_blocknr = -1; 935 /* 936 * If the buffer is uptodate we skip it. If it 937 * is not but the page is uptodate, we can set 938 * the buffer uptodate. If the page is not 939 * uptodate, we can clear the buffer and set it 940 * uptodate. Whether this is worthwhile is 941 * debatable and this could be removed. 942 */ 943 if (PageUptodate(page)) { 944 if (!buffer_uptodate(bh)) 945 set_buffer_uptodate(bh); 946 } else if (!buffer_uptodate(bh)) { 947 zero_user(page, bh_offset(bh), 948 blocksize); 949 set_buffer_uptodate(bh); 950 } 951 continue; 952 } 953 } 954 /* 955 * Out of bounds buffer is invalid if it was not really out of 956 * bounds. 957 */ 958 BUG_ON(lcn != LCN_HOLE); 959 /* 960 * We need the runlist locked for writing, so if it is locked 961 * for reading relock it now and retry in case it changed 962 * whilst we dropped the lock. 963 */ 964 BUG_ON(!rl); 965 if (!rl_write_locked) { 966 up_read(&ni->runlist.lock); 967 down_write(&ni->runlist.lock); 968 rl_write_locked = true; 969 goto retry_remap; 970 } 971 /* Find the previous last allocated cluster. */ 972 BUG_ON(rl->lcn != LCN_HOLE); 973 lcn = -1; 974 rl2 = rl; 975 while (--rl2 >= ni->runlist.rl) { 976 if (rl2->lcn >= 0) { 977 lcn = rl2->lcn + rl2->length; 978 break; 979 } 980 } 981 rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE, 982 false); 983 if (IS_ERR(rl2)) { 984 err = PTR_ERR(rl2); 985 ntfs_debug("Failed to allocate cluster, error code %i.", 986 err); 987 break; 988 } 989 lcn = rl2->lcn; 990 rl = ntfs_runlists_merge(ni->runlist.rl, rl2); 991 if (IS_ERR(rl)) { 992 err = PTR_ERR(rl); 993 if (err != -ENOMEM) 994 err = -EIO; 995 if (ntfs_cluster_free_from_rl(vol, rl2)) { 996 ntfs_error(vol->sb, "Failed to release " 997 "allocated cluster in error " 998 "code path. Run chkdsk to " 999 "recover the lost cluster."); 1000 NVolSetErrors(vol); 1001 } 1002 ntfs_free(rl2); 1003 break; 1004 } 1005 ni->runlist.rl = rl; 1006 status.runlist_merged = 1; 1007 ntfs_debug("Allocated cluster, lcn 0x%llx.", 1008 (unsigned long long)lcn); 1009 /* Map and lock the mft record and get the attribute record. */ 1010 if (!NInoAttr(ni)) 1011 base_ni = ni; 1012 else 1013 base_ni = ni->ext.base_ntfs_ino; 1014 m = map_mft_record(base_ni); 1015 if (IS_ERR(m)) { 1016 err = PTR_ERR(m); 1017 break; 1018 } 1019 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1020 if (unlikely(!ctx)) { 1021 err = -ENOMEM; 1022 unmap_mft_record(base_ni); 1023 break; 1024 } 1025 status.mft_attr_mapped = 1; 1026 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1027 CASE_SENSITIVE, bh_cpos, NULL, 0, ctx); 1028 if (unlikely(err)) { 1029 if (err == -ENOENT) 1030 err = -EIO; 1031 break; 1032 } 1033 m = ctx->mrec; 1034 a = ctx->attr; 1035 /* 1036 * Find the runlist element with which the attribute extent 1037 * starts. Note, we cannot use the _attr_ version because we 1038 * have mapped the mft record. That is ok because we know the 1039 * runlist fragment must be mapped already to have ever gotten 1040 * here, so we can just use the _rl_ version. 1041 */ 1042 vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn); 1043 rl2 = ntfs_rl_find_vcn_nolock(rl, vcn); 1044 BUG_ON(!rl2); 1045 BUG_ON(!rl2->length); 1046 BUG_ON(rl2->lcn < LCN_HOLE); 1047 highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); 1048 /* 1049 * If @highest_vcn is zero, calculate the real highest_vcn 1050 * (which can really be zero). 1051 */ 1052 if (!highest_vcn) 1053 highest_vcn = (sle64_to_cpu( 1054 a->data.non_resident.allocated_size) >> 1055 vol->cluster_size_bits) - 1; 1056 /* 1057 * Determine the size of the mapping pairs array for the new 1058 * extent, i.e. the old extent with the hole filled. 1059 */ 1060 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn, 1061 highest_vcn); 1062 if (unlikely(mp_size <= 0)) { 1063 if (!(err = mp_size)) 1064 err = -EIO; 1065 ntfs_debug("Failed to get size for mapping pairs " 1066 "array, error code %i.", err); 1067 break; 1068 } 1069 /* 1070 * Resize the attribute record to fit the new mapping pairs 1071 * array. 1072 */ 1073 attr_rec_len = le32_to_cpu(a->length); 1074 err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu( 1075 a->data.non_resident.mapping_pairs_offset)); 1076 if (unlikely(err)) { 1077 BUG_ON(err != -ENOSPC); 1078 // TODO: Deal with this by using the current attribute 1079 // and fill it with as much of the mapping pairs 1080 // array as possible. Then loop over each attribute 1081 // extent rewriting the mapping pairs arrays as we go 1082 // along and if when we reach the end we have not 1083 // enough space, try to resize the last attribute 1084 // extent and if even that fails, add a new attribute 1085 // extent. 1086 // We could also try to resize at each step in the hope 1087 // that we will not need to rewrite every single extent. 1088 // Note, we may need to decompress some extents to fill 1089 // the runlist as we are walking the extents... 1090 ntfs_error(vol->sb, "Not enough space in the mft " 1091 "record for the extended attribute " 1092 "record. This case is not " 1093 "implemented yet."); 1094 err = -EOPNOTSUPP; 1095 break ; 1096 } 1097 status.mp_rebuilt = 1; 1098 /* 1099 * Generate the mapping pairs array directly into the attribute 1100 * record. 1101 */ 1102 err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 1103 a->data.non_resident.mapping_pairs_offset), 1104 mp_size, rl2, vcn, highest_vcn, NULL); 1105 if (unlikely(err)) { 1106 ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, " 1107 "attribute type 0x%x, because building " 1108 "the mapping pairs failed with error " 1109 "code %i.", vi->i_ino, 1110 (unsigned)le32_to_cpu(ni->type), err); 1111 err = -EIO; 1112 break; 1113 } 1114 /* Update the highest_vcn but only if it was not set. */ 1115 if (unlikely(!a->data.non_resident.highest_vcn)) 1116 a->data.non_resident.highest_vcn = 1117 cpu_to_sle64(highest_vcn); 1118 /* 1119 * If the attribute is sparse/compressed, update the compressed 1120 * size in the ntfs_inode structure and the attribute record. 1121 */ 1122 if (likely(NInoSparse(ni) || NInoCompressed(ni))) { 1123 /* 1124 * If we are not in the first attribute extent, switch 1125 * to it, but first ensure the changes will make it to 1126 * disk later. 1127 */ 1128 if (a->data.non_resident.lowest_vcn) { 1129 flush_dcache_mft_record_page(ctx->ntfs_ino); 1130 mark_mft_record_dirty(ctx->ntfs_ino); 1131 ntfs_attr_reinit_search_ctx(ctx); 1132 err = ntfs_attr_lookup(ni->type, ni->name, 1133 ni->name_len, CASE_SENSITIVE, 1134 0, NULL, 0, ctx); 1135 if (unlikely(err)) { 1136 status.attr_switched = 1; 1137 break; 1138 } 1139 /* @m is not used any more so do not set it. */ 1140 a = ctx->attr; 1141 } 1142 write_lock_irqsave(&ni->size_lock, flags); 1143 ni->itype.compressed.size += vol->cluster_size; 1144 a->data.non_resident.compressed_size = 1145 cpu_to_sle64(ni->itype.compressed.size); 1146 write_unlock_irqrestore(&ni->size_lock, flags); 1147 } 1148 /* Ensure the changes make it to disk. */ 1149 flush_dcache_mft_record_page(ctx->ntfs_ino); 1150 mark_mft_record_dirty(ctx->ntfs_ino); 1151 ntfs_attr_put_search_ctx(ctx); 1152 unmap_mft_record(base_ni); 1153 /* Successfully filled the hole. */ 1154 status.runlist_merged = 0; 1155 status.mft_attr_mapped = 0; 1156 status.mp_rebuilt = 0; 1157 /* Setup the map cache and use that to deal with the buffer. */ 1158 was_hole = true; 1159 vcn = bh_cpos; 1160 vcn_len = 1; 1161 lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits); 1162 cdelta = 0; 1163 /* 1164 * If the number of remaining clusters in the @pages is smaller 1165 * or equal to the number of cached clusters, unlock the 1166 * runlist as the map cache will be used from now on. 1167 */ 1168 if (likely(vcn + vcn_len >= cend)) { 1169 up_write(&ni->runlist.lock); 1170 rl_write_locked = false; 1171 rl = NULL; 1172 } 1173 goto map_buffer_cached; 1174 } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); 1175 /* If there are no errors, do the next page. */ 1176 if (likely(!err && ++u < nr_pages)) 1177 goto do_next_page; 1178 /* If there are no errors, release the runlist lock if we took it. */ 1179 if (likely(!err)) { 1180 if (unlikely(rl_write_locked)) { 1181 up_write(&ni->runlist.lock); 1182 rl_write_locked = false; 1183 } else if (unlikely(rl)) 1184 up_read(&ni->runlist.lock); 1185 rl = NULL; 1186 } 1187 /* If we issued read requests, let them complete. */ 1188 read_lock_irqsave(&ni->size_lock, flags); 1189 initialized_size = ni->initialized_size; 1190 read_unlock_irqrestore(&ni->size_lock, flags); 1191 while (wait_bh > wait) { 1192 bh = *--wait_bh; 1193 wait_on_buffer(bh); 1194 if (likely(buffer_uptodate(bh))) { 1195 page = bh->b_page; 1196 bh_pos = ((s64)page->index << PAGE_SHIFT) + 1197 bh_offset(bh); 1198 /* 1199 * If the buffer overflows the initialized size, need 1200 * to zero the overflowing region. 1201 */ 1202 if (unlikely(bh_pos + blocksize > initialized_size)) { 1203 int ofs = 0; 1204 1205 if (likely(bh_pos < initialized_size)) 1206 ofs = initialized_size - bh_pos; 1207 zero_user_segment(page, bh_offset(bh) + ofs, 1208 blocksize); 1209 } 1210 } else /* if (unlikely(!buffer_uptodate(bh))) */ 1211 err = -EIO; 1212 } 1213 if (likely(!err)) { 1214 /* Clear buffer_new on all buffers. */ 1215 u = 0; 1216 do { 1217 bh = head = page_buffers(pages[u]); 1218 do { 1219 if (buffer_new(bh)) 1220 clear_buffer_new(bh); 1221 } while ((bh = bh->b_this_page) != head); 1222 } while (++u < nr_pages); 1223 ntfs_debug("Done."); 1224 return err; 1225 } 1226 if (status.attr_switched) { 1227 /* Get back to the attribute extent we modified. */ 1228 ntfs_attr_reinit_search_ctx(ctx); 1229 if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1230 CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) { 1231 ntfs_error(vol->sb, "Failed to find required " 1232 "attribute extent of attribute in " 1233 "error code path. Run chkdsk to " 1234 "recover."); 1235 write_lock_irqsave(&ni->size_lock, flags); 1236 ni->itype.compressed.size += vol->cluster_size; 1237 write_unlock_irqrestore(&ni->size_lock, flags); 1238 flush_dcache_mft_record_page(ctx->ntfs_ino); 1239 mark_mft_record_dirty(ctx->ntfs_ino); 1240 /* 1241 * The only thing that is now wrong is the compressed 1242 * size of the base attribute extent which chkdsk 1243 * should be able to fix. 1244 */ 1245 NVolSetErrors(vol); 1246 } else { 1247 m = ctx->mrec; 1248 a = ctx->attr; 1249 status.attr_switched = 0; 1250 } 1251 } 1252 /* 1253 * If the runlist has been modified, need to restore it by punching a 1254 * hole into it and we then need to deallocate the on-disk cluster as 1255 * well. Note, we only modify the runlist if we are able to generate a 1256 * new mapping pairs array, i.e. only when the mapped attribute extent 1257 * is not switched. 1258 */ 1259 if (status.runlist_merged && !status.attr_switched) { 1260 BUG_ON(!rl_write_locked); 1261 /* Make the file cluster we allocated sparse in the runlist. */ 1262 if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) { 1263 ntfs_error(vol->sb, "Failed to punch hole into " 1264 "attribute runlist in error code " 1265 "path. Run chkdsk to recover the " 1266 "lost cluster."); 1267 NVolSetErrors(vol); 1268 } else /* if (success) */ { 1269 status.runlist_merged = 0; 1270 /* 1271 * Deallocate the on-disk cluster we allocated but only 1272 * if we succeeded in punching its vcn out of the 1273 * runlist. 1274 */ 1275 down_write(&vol->lcnbmp_lock); 1276 if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 1277 ntfs_error(vol->sb, "Failed to release " 1278 "allocated cluster in error " 1279 "code path. Run chkdsk to " 1280 "recover the lost cluster."); 1281 NVolSetErrors(vol); 1282 } 1283 up_write(&vol->lcnbmp_lock); 1284 } 1285 } 1286 /* 1287 * Resize the attribute record to its old size and rebuild the mapping 1288 * pairs array. Note, we only can do this if the runlist has been 1289 * restored to its old state which also implies that the mapped 1290 * attribute extent is not switched. 1291 */ 1292 if (status.mp_rebuilt && !status.runlist_merged) { 1293 if (ntfs_attr_record_resize(m, a, attr_rec_len)) { 1294 ntfs_error(vol->sb, "Failed to restore attribute " 1295 "record in error code path. Run " 1296 "chkdsk to recover."); 1297 NVolSetErrors(vol); 1298 } else /* if (success) */ { 1299 if (ntfs_mapping_pairs_build(vol, (u8*)a + 1300 le16_to_cpu(a->data.non_resident. 1301 mapping_pairs_offset), attr_rec_len - 1302 le16_to_cpu(a->data.non_resident. 1303 mapping_pairs_offset), ni->runlist.rl, 1304 vcn, highest_vcn, NULL)) { 1305 ntfs_error(vol->sb, "Failed to restore " 1306 "mapping pairs array in error " 1307 "code path. Run chkdsk to " 1308 "recover."); 1309 NVolSetErrors(vol); 1310 } 1311 flush_dcache_mft_record_page(ctx->ntfs_ino); 1312 mark_mft_record_dirty(ctx->ntfs_ino); 1313 } 1314 } 1315 /* Release the mft record and the attribute. */ 1316 if (status.mft_attr_mapped) { 1317 ntfs_attr_put_search_ctx(ctx); 1318 unmap_mft_record(base_ni); 1319 } 1320 /* Release the runlist lock. */ 1321 if (rl_write_locked) 1322 up_write(&ni->runlist.lock); 1323 else if (rl) 1324 up_read(&ni->runlist.lock); 1325 /* 1326 * Zero out any newly allocated blocks to avoid exposing stale data. 1327 * If BH_New is set, we know that the block was newly allocated above 1328 * and that it has not been fully zeroed and marked dirty yet. 1329 */ 1330 nr_pages = u; 1331 u = 0; 1332 end = bh_cpos << vol->cluster_size_bits; 1333 do { 1334 page = pages[u]; 1335 bh = head = page_buffers(page); 1336 do { 1337 if (u == nr_pages && 1338 ((s64)page->index << PAGE_SHIFT) + 1339 bh_offset(bh) >= end) 1340 break; 1341 if (!buffer_new(bh)) 1342 continue; 1343 clear_buffer_new(bh); 1344 if (!buffer_uptodate(bh)) { 1345 if (PageUptodate(page)) 1346 set_buffer_uptodate(bh); 1347 else { 1348 zero_user(page, bh_offset(bh), 1349 blocksize); 1350 set_buffer_uptodate(bh); 1351 } 1352 } 1353 mark_buffer_dirty(bh); 1354 } while ((bh = bh->b_this_page) != head); 1355 } while (++u <= nr_pages); 1356 ntfs_error(vol->sb, "Failed. Returning error code %i.", err); 1357 return err; 1358 } 1359 1360 static inline void ntfs_flush_dcache_pages(struct page **pages, 1361 unsigned nr_pages) 1362 { 1363 BUG_ON(!nr_pages); 1364 /* 1365 * Warning: Do not do the decrement at the same time as the call to 1366 * flush_dcache_page() because it is a NULL macro on i386 and hence the 1367 * decrement never happens so the loop never terminates. 1368 */ 1369 do { 1370 --nr_pages; 1371 flush_dcache_page(pages[nr_pages]); 1372 } while (nr_pages > 0); 1373 } 1374 1375 /** 1376 * ntfs_commit_pages_after_non_resident_write - commit the received data 1377 * @pages: array of destination pages 1378 * @nr_pages: number of pages in @pages 1379 * @pos: byte position in file at which the write begins 1380 * @bytes: number of bytes to be written 1381 * 1382 * See description of ntfs_commit_pages_after_write(), below. 1383 */ 1384 static inline int ntfs_commit_pages_after_non_resident_write( 1385 struct page **pages, const unsigned nr_pages, 1386 s64 pos, size_t bytes) 1387 { 1388 s64 end, initialized_size; 1389 struct inode *vi; 1390 ntfs_inode *ni, *base_ni; 1391 struct buffer_head *bh, *head; 1392 ntfs_attr_search_ctx *ctx; 1393 MFT_RECORD *m; 1394 ATTR_RECORD *a; 1395 unsigned long flags; 1396 unsigned blocksize, u; 1397 int err; 1398 1399 vi = pages[0]->mapping->host; 1400 ni = NTFS_I(vi); 1401 blocksize = vi->i_sb->s_blocksize; 1402 end = pos + bytes; 1403 u = 0; 1404 do { 1405 s64 bh_pos; 1406 struct page *page; 1407 bool partial; 1408 1409 page = pages[u]; 1410 bh_pos = (s64)page->index << PAGE_SHIFT; 1411 bh = head = page_buffers(page); 1412 partial = false; 1413 do { 1414 s64 bh_end; 1415 1416 bh_end = bh_pos + blocksize; 1417 if (bh_end <= pos || bh_pos >= end) { 1418 if (!buffer_uptodate(bh)) 1419 partial = true; 1420 } else { 1421 set_buffer_uptodate(bh); 1422 mark_buffer_dirty(bh); 1423 } 1424 } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); 1425 /* 1426 * If all buffers are now uptodate but the page is not, set the 1427 * page uptodate. 1428 */ 1429 if (!partial && !PageUptodate(page)) 1430 SetPageUptodate(page); 1431 } while (++u < nr_pages); 1432 /* 1433 * Finally, if we do not need to update initialized_size or i_size we 1434 * are finished. 1435 */ 1436 read_lock_irqsave(&ni->size_lock, flags); 1437 initialized_size = ni->initialized_size; 1438 read_unlock_irqrestore(&ni->size_lock, flags); 1439 if (end <= initialized_size) { 1440 ntfs_debug("Done."); 1441 return 0; 1442 } 1443 /* 1444 * Update initialized_size/i_size as appropriate, both in the inode and 1445 * the mft record. 1446 */ 1447 if (!NInoAttr(ni)) 1448 base_ni = ni; 1449 else 1450 base_ni = ni->ext.base_ntfs_ino; 1451 /* Map, pin, and lock the mft record. */ 1452 m = map_mft_record(base_ni); 1453 if (IS_ERR(m)) { 1454 err = PTR_ERR(m); 1455 m = NULL; 1456 ctx = NULL; 1457 goto err_out; 1458 } 1459 BUG_ON(!NInoNonResident(ni)); 1460 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1461 if (unlikely(!ctx)) { 1462 err = -ENOMEM; 1463 goto err_out; 1464 } 1465 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1466 CASE_SENSITIVE, 0, NULL, 0, ctx); 1467 if (unlikely(err)) { 1468 if (err == -ENOENT) 1469 err = -EIO; 1470 goto err_out; 1471 } 1472 a = ctx->attr; 1473 BUG_ON(!a->non_resident); 1474 write_lock_irqsave(&ni->size_lock, flags); 1475 BUG_ON(end > ni->allocated_size); 1476 ni->initialized_size = end; 1477 a->data.non_resident.initialized_size = cpu_to_sle64(end); 1478 if (end > i_size_read(vi)) { 1479 i_size_write(vi, end); 1480 a->data.non_resident.data_size = 1481 a->data.non_resident.initialized_size; 1482 } 1483 write_unlock_irqrestore(&ni->size_lock, flags); 1484 /* Mark the mft record dirty, so it gets written back. */ 1485 flush_dcache_mft_record_page(ctx->ntfs_ino); 1486 mark_mft_record_dirty(ctx->ntfs_ino); 1487 ntfs_attr_put_search_ctx(ctx); 1488 unmap_mft_record(base_ni); 1489 ntfs_debug("Done."); 1490 return 0; 1491 err_out: 1492 if (ctx) 1493 ntfs_attr_put_search_ctx(ctx); 1494 if (m) 1495 unmap_mft_record(base_ni); 1496 ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error " 1497 "code %i).", err); 1498 if (err != -ENOMEM) 1499 NVolSetErrors(ni->vol); 1500 return err; 1501 } 1502 1503 /** 1504 * ntfs_commit_pages_after_write - commit the received data 1505 * @pages: array of destination pages 1506 * @nr_pages: number of pages in @pages 1507 * @pos: byte position in file at which the write begins 1508 * @bytes: number of bytes to be written 1509 * 1510 * This is called from ntfs_file_buffered_write() with i_mutex held on the inode 1511 * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are 1512 * locked but not kmap()ped. The source data has already been copied into the 1513 * @page. ntfs_prepare_pages_for_non_resident_write() has been called before 1514 * the data was copied (for non-resident attributes only) and it returned 1515 * success. 1516 * 1517 * Need to set uptodate and mark dirty all buffers within the boundary of the 1518 * write. If all buffers in a page are uptodate we set the page uptodate, too. 1519 * 1520 * Setting the buffers dirty ensures that they get written out later when 1521 * ntfs_writepage() is invoked by the VM. 1522 * 1523 * Finally, we need to update i_size and initialized_size as appropriate both 1524 * in the inode and the mft record. 1525 * 1526 * This is modelled after fs/buffer.c::generic_commit_write(), which marks 1527 * buffers uptodate and dirty, sets the page uptodate if all buffers in the 1528 * page are uptodate, and updates i_size if the end of io is beyond i_size. In 1529 * that case, it also marks the inode dirty. 1530 * 1531 * If things have gone as outlined in 1532 * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page 1533 * content modifications here for non-resident attributes. For resident 1534 * attributes we need to do the uptodate bringing here which we combine with 1535 * the copying into the mft record which means we save one atomic kmap. 1536 * 1537 * Return 0 on success or -errno on error. 1538 */ 1539 static int ntfs_commit_pages_after_write(struct page **pages, 1540 const unsigned nr_pages, s64 pos, size_t bytes) 1541 { 1542 s64 end, initialized_size; 1543 loff_t i_size; 1544 struct inode *vi; 1545 ntfs_inode *ni, *base_ni; 1546 struct page *page; 1547 ntfs_attr_search_ctx *ctx; 1548 MFT_RECORD *m; 1549 ATTR_RECORD *a; 1550 char *kattr, *kaddr; 1551 unsigned long flags; 1552 u32 attr_len; 1553 int err; 1554 1555 BUG_ON(!nr_pages); 1556 BUG_ON(!pages); 1557 page = pages[0]; 1558 BUG_ON(!page); 1559 vi = page->mapping->host; 1560 ni = NTFS_I(vi); 1561 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " 1562 "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 1563 vi->i_ino, ni->type, page->index, nr_pages, 1564 (long long)pos, bytes); 1565 if (NInoNonResident(ni)) 1566 return ntfs_commit_pages_after_non_resident_write(pages, 1567 nr_pages, pos, bytes); 1568 BUG_ON(nr_pages > 1); 1569 /* 1570 * Attribute is resident, implying it is not compressed, encrypted, or 1571 * sparse. 1572 */ 1573 if (!NInoAttr(ni)) 1574 base_ni = ni; 1575 else 1576 base_ni = ni->ext.base_ntfs_ino; 1577 BUG_ON(NInoNonResident(ni)); 1578 /* Map, pin, and lock the mft record. */ 1579 m = map_mft_record(base_ni); 1580 if (IS_ERR(m)) { 1581 err = PTR_ERR(m); 1582 m = NULL; 1583 ctx = NULL; 1584 goto err_out; 1585 } 1586 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1587 if (unlikely(!ctx)) { 1588 err = -ENOMEM; 1589 goto err_out; 1590 } 1591 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1592 CASE_SENSITIVE, 0, NULL, 0, ctx); 1593 if (unlikely(err)) { 1594 if (err == -ENOENT) 1595 err = -EIO; 1596 goto err_out; 1597 } 1598 a = ctx->attr; 1599 BUG_ON(a->non_resident); 1600 /* The total length of the attribute value. */ 1601 attr_len = le32_to_cpu(a->data.resident.value_length); 1602 i_size = i_size_read(vi); 1603 BUG_ON(attr_len != i_size); 1604 BUG_ON(pos > attr_len); 1605 end = pos + bytes; 1606 BUG_ON(end > le32_to_cpu(a->length) - 1607 le16_to_cpu(a->data.resident.value_offset)); 1608 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 1609 kaddr = kmap_atomic(page); 1610 /* Copy the received data from the page to the mft record. */ 1611 memcpy(kattr + pos, kaddr + pos, bytes); 1612 /* Update the attribute length if necessary. */ 1613 if (end > attr_len) { 1614 attr_len = end; 1615 a->data.resident.value_length = cpu_to_le32(attr_len); 1616 } 1617 /* 1618 * If the page is not uptodate, bring the out of bounds area(s) 1619 * uptodate by copying data from the mft record to the page. 1620 */ 1621 if (!PageUptodate(page)) { 1622 if (pos > 0) 1623 memcpy(kaddr, kattr, pos); 1624 if (end < attr_len) 1625 memcpy(kaddr + end, kattr + end, attr_len - end); 1626 /* Zero the region outside the end of the attribute value. */ 1627 memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len); 1628 flush_dcache_page(page); 1629 SetPageUptodate(page); 1630 } 1631 kunmap_atomic(kaddr); 1632 /* Update initialized_size/i_size if necessary. */ 1633 read_lock_irqsave(&ni->size_lock, flags); 1634 initialized_size = ni->initialized_size; 1635 BUG_ON(end > ni->allocated_size); 1636 read_unlock_irqrestore(&ni->size_lock, flags); 1637 BUG_ON(initialized_size != i_size); 1638 if (end > initialized_size) { 1639 write_lock_irqsave(&ni->size_lock, flags); 1640 ni->initialized_size = end; 1641 i_size_write(vi, end); 1642 write_unlock_irqrestore(&ni->size_lock, flags); 1643 } 1644 /* Mark the mft record dirty, so it gets written back. */ 1645 flush_dcache_mft_record_page(ctx->ntfs_ino); 1646 mark_mft_record_dirty(ctx->ntfs_ino); 1647 ntfs_attr_put_search_ctx(ctx); 1648 unmap_mft_record(base_ni); 1649 ntfs_debug("Done."); 1650 return 0; 1651 err_out: 1652 if (err == -ENOMEM) { 1653 ntfs_warning(vi->i_sb, "Error allocating memory required to " 1654 "commit the write."); 1655 if (PageUptodate(page)) { 1656 ntfs_warning(vi->i_sb, "Page is uptodate, setting " 1657 "dirty so the write will be retried " 1658 "later on by the VM."); 1659 /* 1660 * Put the page on mapping->dirty_pages, but leave its 1661 * buffers' dirty state as-is. 1662 */ 1663 __set_page_dirty_nobuffers(page); 1664 err = 0; 1665 } else 1666 ntfs_error(vi->i_sb, "Page is not uptodate. Written " 1667 "data has been lost."); 1668 } else { 1669 ntfs_error(vi->i_sb, "Resident attribute commit write failed " 1670 "with error %i.", err); 1671 NVolSetErrors(ni->vol); 1672 } 1673 if (ctx) 1674 ntfs_attr_put_search_ctx(ctx); 1675 if (m) 1676 unmap_mft_record(base_ni); 1677 return err; 1678 } 1679 1680 /* 1681 * Copy as much as we can into the pages and return the number of bytes which 1682 * were successfully copied. If a fault is encountered then clear the pages 1683 * out to (ofs + bytes) and return the number of bytes which were copied. 1684 */ 1685 static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, 1686 unsigned ofs, struct iov_iter *i, size_t bytes) 1687 { 1688 struct page **last_page = pages + nr_pages; 1689 size_t total = 0; 1690 struct iov_iter data = *i; 1691 unsigned len, copied; 1692 1693 do { 1694 len = PAGE_SIZE - ofs; 1695 if (len > bytes) 1696 len = bytes; 1697 copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, 1698 len); 1699 total += copied; 1700 bytes -= copied; 1701 if (!bytes) 1702 break; 1703 iov_iter_advance(&data, copied); 1704 if (copied < len) 1705 goto err; 1706 ofs = 0; 1707 } while (++pages < last_page); 1708 out: 1709 return total; 1710 err: 1711 /* Zero the rest of the target like __copy_from_user(). */ 1712 len = PAGE_SIZE - copied; 1713 do { 1714 if (len > bytes) 1715 len = bytes; 1716 zero_user(*pages, copied, len); 1717 bytes -= len; 1718 copied = 0; 1719 len = PAGE_SIZE; 1720 } while (++pages < last_page); 1721 goto out; 1722 } 1723 1724 /** 1725 * ntfs_perform_write - perform buffered write to a file 1726 * @file: file to write to 1727 * @i: iov_iter with data to write 1728 * @pos: byte offset in file at which to begin writing to 1729 */ 1730 static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, 1731 loff_t pos) 1732 { 1733 struct address_space *mapping = file->f_mapping; 1734 struct inode *vi = mapping->host; 1735 ntfs_inode *ni = NTFS_I(vi); 1736 ntfs_volume *vol = ni->vol; 1737 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1738 struct page *cached_page = NULL; 1739 VCN last_vcn; 1740 LCN lcn; 1741 size_t bytes; 1742 ssize_t status, written = 0; 1743 unsigned nr_pages; 1744 1745 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " 1746 "0x%llx, count 0x%lx.", vi->i_ino, 1747 (unsigned)le32_to_cpu(ni->type), 1748 (unsigned long long)pos, 1749 (unsigned long)iov_iter_count(i)); 1750 /* 1751 * If a previous ntfs_truncate() failed, repeat it and abort if it 1752 * fails again. 1753 */ 1754 if (unlikely(NInoTruncateFailed(ni))) { 1755 int err; 1756 1757 inode_dio_wait(vi); 1758 err = ntfs_truncate(vi); 1759 if (err || NInoTruncateFailed(ni)) { 1760 if (!err) 1761 err = -EIO; 1762 ntfs_error(vol->sb, "Cannot perform write to inode " 1763 "0x%lx, attribute type 0x%x, because " 1764 "ntfs_truncate() failed (error code " 1765 "%i).", vi->i_ino, 1766 (unsigned)le32_to_cpu(ni->type), err); 1767 return err; 1768 } 1769 } 1770 /* 1771 * Determine the number of pages per cluster for non-resident 1772 * attributes. 1773 */ 1774 nr_pages = 1; 1775 if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni)) 1776 nr_pages = vol->cluster_size >> PAGE_SHIFT; 1777 last_vcn = -1; 1778 do { 1779 VCN vcn; 1780 pgoff_t idx, start_idx; 1781 unsigned ofs, do_pages, u; 1782 size_t copied; 1783 1784 start_idx = idx = pos >> PAGE_SHIFT; 1785 ofs = pos & ~PAGE_MASK; 1786 bytes = PAGE_SIZE - ofs; 1787 do_pages = 1; 1788 if (nr_pages > 1) { 1789 vcn = pos >> vol->cluster_size_bits; 1790 if (vcn != last_vcn) { 1791 last_vcn = vcn; 1792 /* 1793 * Get the lcn of the vcn the write is in. If 1794 * it is a hole, need to lock down all pages in 1795 * the cluster. 1796 */ 1797 down_read(&ni->runlist.lock); 1798 lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >> 1799 vol->cluster_size_bits, false); 1800 up_read(&ni->runlist.lock); 1801 if (unlikely(lcn < LCN_HOLE)) { 1802 if (lcn == LCN_ENOMEM) 1803 status = -ENOMEM; 1804 else { 1805 status = -EIO; 1806 ntfs_error(vol->sb, "Cannot " 1807 "perform write to " 1808 "inode 0x%lx, " 1809 "attribute type 0x%x, " 1810 "because the attribute " 1811 "is corrupt.", 1812 vi->i_ino, (unsigned) 1813 le32_to_cpu(ni->type)); 1814 } 1815 break; 1816 } 1817 if (lcn == LCN_HOLE) { 1818 start_idx = (pos & ~(s64) 1819 vol->cluster_size_mask) 1820 >> PAGE_SHIFT; 1821 bytes = vol->cluster_size - (pos & 1822 vol->cluster_size_mask); 1823 do_pages = nr_pages; 1824 } 1825 } 1826 } 1827 if (bytes > iov_iter_count(i)) 1828 bytes = iov_iter_count(i); 1829 again: 1830 /* 1831 * Bring in the user page(s) that we will copy from _first_. 1832 * Otherwise there is a nasty deadlock on copying from the same 1833 * page(s) as we are writing to, without it/them being marked 1834 * up-to-date. Note, at present there is nothing to stop the 1835 * pages being swapped out between us bringing them into memory 1836 * and doing the actual copying. 1837 */ 1838 if (unlikely(iov_iter_fault_in_readable(i, bytes))) { 1839 status = -EFAULT; 1840 break; 1841 } 1842 /* Get and lock @do_pages starting at index @start_idx. */ 1843 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1844 pages, &cached_page); 1845 if (unlikely(status)) 1846 break; 1847 /* 1848 * For non-resident attributes, we need to fill any holes with 1849 * actual clusters and ensure all bufferes are mapped. We also 1850 * need to bring uptodate any buffers that are only partially 1851 * being written to. 1852 */ 1853 if (NInoNonResident(ni)) { 1854 status = ntfs_prepare_pages_for_non_resident_write( 1855 pages, do_pages, pos, bytes); 1856 if (unlikely(status)) { 1857 do { 1858 unlock_page(pages[--do_pages]); 1859 put_page(pages[do_pages]); 1860 } while (do_pages); 1861 break; 1862 } 1863 } 1864 u = (pos >> PAGE_SHIFT) - pages[0]->index; 1865 copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, 1866 i, bytes); 1867 ntfs_flush_dcache_pages(pages + u, do_pages - u); 1868 status = 0; 1869 if (likely(copied == bytes)) { 1870 status = ntfs_commit_pages_after_write(pages, do_pages, 1871 pos, bytes); 1872 if (!status) 1873 status = bytes; 1874 } 1875 do { 1876 unlock_page(pages[--do_pages]); 1877 put_page(pages[do_pages]); 1878 } while (do_pages); 1879 if (unlikely(status < 0)) 1880 break; 1881 copied = status; 1882 cond_resched(); 1883 if (unlikely(!copied)) { 1884 size_t sc; 1885 1886 /* 1887 * We failed to copy anything. Fall back to single 1888 * segment length write. 1889 * 1890 * This is needed to avoid possible livelock in the 1891 * case that all segments in the iov cannot be copied 1892 * at once without a pagefault. 1893 */ 1894 sc = iov_iter_single_seg_count(i); 1895 if (bytes > sc) 1896 bytes = sc; 1897 goto again; 1898 } 1899 iov_iter_advance(i, copied); 1900 pos += copied; 1901 written += copied; 1902 balance_dirty_pages_ratelimited(mapping); 1903 if (fatal_signal_pending(current)) { 1904 status = -EINTR; 1905 break; 1906 } 1907 } while (iov_iter_count(i)); 1908 if (cached_page) 1909 put_page(cached_page); 1910 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1911 written ? "written" : "status", (unsigned long)written, 1912 (long)status); 1913 return written ? written : status; 1914 } 1915 1916 /** 1917 * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() 1918 * @iocb: IO state structure 1919 * @from: iov_iter with data to write 1920 * 1921 * Basically the same as generic_file_write_iter() except that it ends up 1922 * up calling ntfs_perform_write() instead of generic_perform_write() and that 1923 * O_DIRECT is not implemented. 1924 */ 1925 static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1926 { 1927 struct file *file = iocb->ki_filp; 1928 struct inode *vi = file_inode(file); 1929 ssize_t written = 0; 1930 ssize_t err; 1931 1932 inode_lock(vi); 1933 /* We can write back this queue in page reclaim. */ 1934 current->backing_dev_info = inode_to_bdi(vi); 1935 err = ntfs_prepare_file_for_write(iocb, from); 1936 if (iov_iter_count(from) && !err) 1937 written = ntfs_perform_write(file, from, iocb->ki_pos); 1938 current->backing_dev_info = NULL; 1939 inode_unlock(vi); 1940 iocb->ki_pos += written; 1941 if (likely(written > 0)) 1942 written = generic_write_sync(iocb, written); 1943 return written ? written : err; 1944 } 1945 1946 /** 1947 * ntfs_file_fsync - sync a file to disk 1948 * @filp: file to be synced 1949 * @datasync: if non-zero only flush user data and not metadata 1950 * 1951 * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync 1952 * system calls. This function is inspired by fs/buffer.c::file_fsync(). 1953 * 1954 * If @datasync is false, write the mft record and all associated extent mft 1955 * records as well as the $DATA attribute and then sync the block device. 1956 * 1957 * If @datasync is true and the attribute is non-resident, we skip the writing 1958 * of the mft record and all associated extent mft records (this might still 1959 * happen due to the write_inode_now() call). 1960 * 1961 * Also, if @datasync is true, we do not wait on the inode to be written out 1962 * but we always wait on the page cache pages to be written out. 1963 * 1964 * Locking: Caller must hold i_mutex on the inode. 1965 * 1966 * TODO: We should probably also write all attribute/index inodes associated 1967 * with this inode but since we have no simple way of getting to them we ignore 1968 * this problem for now. 1969 */ 1970 static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, 1971 int datasync) 1972 { 1973 struct inode *vi = filp->f_mapping->host; 1974 int err, ret = 0; 1975 1976 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 1977 1978 err = file_write_and_wait_range(filp, start, end); 1979 if (err) 1980 return err; 1981 inode_lock(vi); 1982 1983 BUG_ON(S_ISDIR(vi->i_mode)); 1984 if (!datasync || !NInoNonResident(NTFS_I(vi))) 1985 ret = __ntfs_write_inode(vi, 1); 1986 write_inode_now(vi, !datasync); 1987 /* 1988 * NOTE: If we were to use mapping->private_list (see ext2 and 1989 * fs/buffer.c) for dirty blocks then we could optimize the below to be 1990 * sync_mapping_buffers(vi->i_mapping). 1991 */ 1992 err = sync_blockdev(vi->i_sb->s_bdev); 1993 if (unlikely(err && !ret)) 1994 ret = err; 1995 if (likely(!ret)) 1996 ntfs_debug("Done."); 1997 else 1998 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 1999 "%u.", datasync ? "data" : "", vi->i_ino, -ret); 2000 inode_unlock(vi); 2001 return ret; 2002 } 2003 2004 #endif /* NTFS_RW */ 2005 2006 const struct file_operations ntfs_file_ops = { 2007 .llseek = generic_file_llseek, 2008 .read_iter = generic_file_read_iter, 2009 #ifdef NTFS_RW 2010 .write_iter = ntfs_file_write_iter, 2011 .fsync = ntfs_file_fsync, 2012 #endif /* NTFS_RW */ 2013 .mmap = generic_file_mmap, 2014 .open = ntfs_file_open, 2015 .splice_read = generic_file_splice_read, 2016 }; 2017 2018 const struct inode_operations ntfs_file_inode_ops = { 2019 #ifdef NTFS_RW 2020 .setattr = ntfs_setattr, 2021 #endif /* NTFS_RW */ 2022 }; 2023 2024 const struct file_operations ntfs_empty_file_ops = {}; 2025 2026 const struct inode_operations ntfs_empty_inode_ops = {}; 2027