1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * file.c 5 * 6 * File open, close, extend, truncate 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/capability.h> 27 #include <linux/fs.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/highmem.h> 31 #include <linux/pagemap.h> 32 #include <linux/uio.h> 33 #include <linux/sched.h> 34 #include <linux/splice.h> 35 #include <linux/mount.h> 36 #include <linux/writeback.h> 37 #include <linux/falloc.h> 38 39 #define MLOG_MASK_PREFIX ML_INODE 40 #include <cluster/masklog.h> 41 42 #include "ocfs2.h" 43 44 #include "alloc.h" 45 #include "aops.h" 46 #include "dir.h" 47 #include "dlmglue.h" 48 #include "extent_map.h" 49 #include "file.h" 50 #include "sysfile.h" 51 #include "inode.h" 52 #include "ioctl.h" 53 #include "journal.h" 54 #include "mmap.h" 55 #include "suballoc.h" 56 #include "super.h" 57 58 #include "buffer_head_io.h" 59 60 static int ocfs2_sync_inode(struct inode *inode) 61 { 62 filemap_fdatawrite(inode->i_mapping); 63 return sync_mapping_buffers(inode->i_mapping); 64 } 65 66 static int ocfs2_file_open(struct inode *inode, struct file *file) 67 { 68 int status; 69 int mode = file->f_flags; 70 struct ocfs2_inode_info *oi = OCFS2_I(inode); 71 72 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 73 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); 74 75 spin_lock(&oi->ip_lock); 76 77 /* Check that the inode hasn't been wiped from disk by another 78 * node. If it hasn't then we're safe as long as we hold the 79 * spin lock until our increment of open count. */ 80 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 81 spin_unlock(&oi->ip_lock); 82 83 status = -ENOENT; 84 goto leave; 85 } 86 87 if (mode & O_DIRECT) 88 oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; 89 90 oi->ip_open_count++; 91 spin_unlock(&oi->ip_lock); 92 status = 0; 93 leave: 94 mlog_exit(status); 95 return status; 96 } 97 98 static int ocfs2_file_release(struct inode *inode, struct file *file) 99 { 100 struct ocfs2_inode_info *oi = OCFS2_I(inode); 101 102 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 103 file->f_path.dentry->d_name.len, 104 file->f_path.dentry->d_name.name); 105 106 spin_lock(&oi->ip_lock); 107 if (!--oi->ip_open_count) 108 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; 109 spin_unlock(&oi->ip_lock); 110 111 mlog_exit(0); 112 113 return 0; 114 } 115 116 static int ocfs2_sync_file(struct file *file, 117 struct dentry *dentry, 118 int datasync) 119 { 120 int err = 0; 121 journal_t *journal; 122 struct inode *inode = dentry->d_inode; 123 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 124 125 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, 126 dentry->d_name.len, dentry->d_name.name); 127 128 err = ocfs2_sync_inode(dentry->d_inode); 129 if (err) 130 goto bail; 131 132 journal = osb->journal->j_journal; 133 err = journal_force_commit(journal); 134 135 bail: 136 mlog_exit(err); 137 138 return (err < 0) ? -EIO : 0; 139 } 140 141 int ocfs2_should_update_atime(struct inode *inode, 142 struct vfsmount *vfsmnt) 143 { 144 struct timespec now; 145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 146 147 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 148 return 0; 149 150 if ((inode->i_flags & S_NOATIME) || 151 ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) 152 return 0; 153 154 /* 155 * We can be called with no vfsmnt structure - NFSD will 156 * sometimes do this. 157 * 158 * Note that our action here is different than touch_atime() - 159 * if we can't tell whether this is a noatime mount, then we 160 * don't know whether to trust the value of s_atime_quantum. 161 */ 162 if (vfsmnt == NULL) 163 return 0; 164 165 if ((vfsmnt->mnt_flags & MNT_NOATIME) || 166 ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) 167 return 0; 168 169 if (vfsmnt->mnt_flags & MNT_RELATIME) { 170 if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) || 171 (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0)) 172 return 1; 173 174 return 0; 175 } 176 177 now = CURRENT_TIME; 178 if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) 179 return 0; 180 else 181 return 1; 182 } 183 184 int ocfs2_update_inode_atime(struct inode *inode, 185 struct buffer_head *bh) 186 { 187 int ret; 188 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 189 handle_t *handle; 190 struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data; 191 192 mlog_entry_void(); 193 194 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 195 if (handle == NULL) { 196 ret = -ENOMEM; 197 mlog_errno(ret); 198 goto out; 199 } 200 201 ret = ocfs2_journal_access(handle, inode, bh, 202 OCFS2_JOURNAL_ACCESS_WRITE); 203 if (ret) { 204 mlog_errno(ret); 205 goto out_commit; 206 } 207 208 /* 209 * Don't use ocfs2_mark_inode_dirty() here as we don't always 210 * have i_mutex to guard against concurrent changes to other 211 * inode fields. 212 */ 213 inode->i_atime = CURRENT_TIME; 214 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 215 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 216 217 ret = ocfs2_journal_dirty(handle, bh); 218 if (ret < 0) 219 mlog_errno(ret); 220 221 out_commit: 222 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 223 out: 224 mlog_exit(ret); 225 return ret; 226 } 227 228 static int ocfs2_set_inode_size(handle_t *handle, 229 struct inode *inode, 230 struct buffer_head *fe_bh, 231 u64 new_i_size) 232 { 233 int status; 234 235 mlog_entry_void(); 236 i_size_write(inode, new_i_size); 237 inode->i_blocks = ocfs2_inode_sector_count(inode); 238 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 239 240 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 241 if (status < 0) { 242 mlog_errno(status); 243 goto bail; 244 } 245 246 bail: 247 mlog_exit(status); 248 return status; 249 } 250 251 static int ocfs2_simple_size_update(struct inode *inode, 252 struct buffer_head *di_bh, 253 u64 new_i_size) 254 { 255 int ret; 256 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 257 handle_t *handle = NULL; 258 259 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 260 if (handle == NULL) { 261 ret = -ENOMEM; 262 mlog_errno(ret); 263 goto out; 264 } 265 266 ret = ocfs2_set_inode_size(handle, inode, di_bh, 267 new_i_size); 268 if (ret < 0) 269 mlog_errno(ret); 270 271 ocfs2_commit_trans(osb, handle); 272 out: 273 return ret; 274 } 275 276 static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, 277 struct inode *inode, 278 struct buffer_head *fe_bh, 279 u64 new_i_size) 280 { 281 int status; 282 handle_t *handle; 283 struct ocfs2_dinode *di; 284 u64 cluster_bytes; 285 286 mlog_entry_void(); 287 288 /* TODO: This needs to actually orphan the inode in this 289 * transaction. */ 290 291 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 292 if (IS_ERR(handle)) { 293 status = PTR_ERR(handle); 294 mlog_errno(status); 295 goto out; 296 } 297 298 status = ocfs2_journal_access(handle, inode, fe_bh, 299 OCFS2_JOURNAL_ACCESS_WRITE); 300 if (status < 0) { 301 mlog_errno(status); 302 goto out_commit; 303 } 304 305 /* 306 * Do this before setting i_size. 307 */ 308 cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); 309 status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size, 310 cluster_bytes); 311 if (status) { 312 mlog_errno(status); 313 goto out_commit; 314 } 315 316 i_size_write(inode, new_i_size); 317 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 318 319 di = (struct ocfs2_dinode *) fe_bh->b_data; 320 di->i_size = cpu_to_le64(new_i_size); 321 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 322 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 323 324 status = ocfs2_journal_dirty(handle, fe_bh); 325 if (status < 0) 326 mlog_errno(status); 327 328 out_commit: 329 ocfs2_commit_trans(osb, handle); 330 out: 331 332 mlog_exit(status); 333 return status; 334 } 335 336 static int ocfs2_truncate_file(struct inode *inode, 337 struct buffer_head *di_bh, 338 u64 new_i_size) 339 { 340 int status = 0; 341 struct ocfs2_dinode *fe = NULL; 342 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 343 struct ocfs2_truncate_context *tc = NULL; 344 345 mlog_entry("(inode = %llu, new_i_size = %llu\n", 346 (unsigned long long)OCFS2_I(inode)->ip_blkno, 347 (unsigned long long)new_i_size); 348 349 fe = (struct ocfs2_dinode *) di_bh->b_data; 350 if (!OCFS2_IS_VALID_DINODE(fe)) { 351 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 352 status = -EIO; 353 goto bail; 354 } 355 356 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), 357 "Inode %llu, inode i_size = %lld != di " 358 "i_size = %llu, i_flags = 0x%x\n", 359 (unsigned long long)OCFS2_I(inode)->ip_blkno, 360 i_size_read(inode), 361 (unsigned long long)le64_to_cpu(fe->i_size), 362 le32_to_cpu(fe->i_flags)); 363 364 if (new_i_size > le64_to_cpu(fe->i_size)) { 365 mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", 366 (unsigned long long)le64_to_cpu(fe->i_size), 367 (unsigned long long)new_i_size); 368 status = -EINVAL; 369 mlog_errno(status); 370 goto bail; 371 } 372 373 mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n", 374 (unsigned long long)le64_to_cpu(fe->i_blkno), 375 (unsigned long long)le64_to_cpu(fe->i_size), 376 (unsigned long long)new_i_size); 377 378 /* lets handle the simple truncate cases before doing any more 379 * cluster locking. */ 380 if (new_i_size == le64_to_cpu(fe->i_size)) 381 goto bail; 382 383 down_write(&OCFS2_I(inode)->ip_alloc_sem); 384 385 /* This forces other nodes to sync and drop their pages. Do 386 * this even if we have a truncate without allocation change - 387 * ocfs2 cluster sizes can be much greater than page size, so 388 * we have to truncate them anyway. */ 389 status = ocfs2_data_lock(inode, 1); 390 if (status < 0) { 391 up_write(&OCFS2_I(inode)->ip_alloc_sem); 392 393 mlog_errno(status); 394 goto bail; 395 } 396 397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); 398 truncate_inode_pages(inode->i_mapping, new_i_size); 399 400 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 401 status = ocfs2_truncate_inline(inode, di_bh, new_i_size, 402 i_size_read(inode), 0); 403 if (status) 404 mlog_errno(status); 405 406 goto bail_unlock_data; 407 } 408 409 /* alright, we're going to need to do a full blown alloc size 410 * change. Orphan the inode so that recovery can complete the 411 * truncate if necessary. This does the task of marking 412 * i_size. */ 413 status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); 414 if (status < 0) { 415 mlog_errno(status); 416 goto bail_unlock_data; 417 } 418 419 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 420 if (status < 0) { 421 mlog_errno(status); 422 goto bail_unlock_data; 423 } 424 425 status = ocfs2_commit_truncate(osb, inode, di_bh, tc); 426 if (status < 0) { 427 mlog_errno(status); 428 goto bail_unlock_data; 429 } 430 431 /* TODO: orphan dir cleanup here. */ 432 bail_unlock_data: 433 ocfs2_data_unlock(inode, 1); 434 435 up_write(&OCFS2_I(inode)->ip_alloc_sem); 436 437 bail: 438 439 mlog_exit(status); 440 return status; 441 } 442 443 /* 444 * extend allocation only here. 445 * we'll update all the disk stuff, and oip->alloc_size 446 * 447 * expect stuff to be locked, a transaction started and enough data / 448 * metadata reservations in the contexts. 449 * 450 * Will return -EAGAIN, and a reason if a restart is needed. 451 * If passed in, *reason will always be set, even in error. 452 */ 453 int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 454 struct inode *inode, 455 u32 *logical_offset, 456 u32 clusters_to_add, 457 int mark_unwritten, 458 struct buffer_head *fe_bh, 459 handle_t *handle, 460 struct ocfs2_alloc_context *data_ac, 461 struct ocfs2_alloc_context *meta_ac, 462 enum ocfs2_alloc_restarted *reason_ret) 463 { 464 int status = 0; 465 int free_extents; 466 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 467 enum ocfs2_alloc_restarted reason = RESTART_NONE; 468 u32 bit_off, num_bits; 469 u64 block; 470 u8 flags = 0; 471 472 BUG_ON(!clusters_to_add); 473 474 if (mark_unwritten) 475 flags = OCFS2_EXT_UNWRITTEN; 476 477 free_extents = ocfs2_num_free_extents(osb, inode, fe); 478 if (free_extents < 0) { 479 status = free_extents; 480 mlog_errno(status); 481 goto leave; 482 } 483 484 /* there are two cases which could cause us to EAGAIN in the 485 * we-need-more-metadata case: 486 * 1) we haven't reserved *any* 487 * 2) we are so fragmented, we've needed to add metadata too 488 * many times. */ 489 if (!free_extents && !meta_ac) { 490 mlog(0, "we haven't reserved any metadata!\n"); 491 status = -EAGAIN; 492 reason = RESTART_META; 493 goto leave; 494 } else if ((!free_extents) 495 && (ocfs2_alloc_context_bits_left(meta_ac) 496 < ocfs2_extend_meta_needed(fe))) { 497 mlog(0, "filesystem is really fragmented...\n"); 498 status = -EAGAIN; 499 reason = RESTART_META; 500 goto leave; 501 } 502 503 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 504 clusters_to_add, &bit_off, &num_bits); 505 if (status < 0) { 506 if (status != -ENOSPC) 507 mlog_errno(status); 508 goto leave; 509 } 510 511 BUG_ON(num_bits > clusters_to_add); 512 513 /* reserve our write early -- insert_extent may update the inode */ 514 status = ocfs2_journal_access(handle, inode, fe_bh, 515 OCFS2_JOURNAL_ACCESS_WRITE); 516 if (status < 0) { 517 mlog_errno(status); 518 goto leave; 519 } 520 521 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 522 mlog(0, "Allocating %u clusters at block %u for inode %llu\n", 523 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 524 status = ocfs2_insert_extent(osb, handle, inode, fe_bh, 525 *logical_offset, block, num_bits, 526 flags, meta_ac); 527 if (status < 0) { 528 mlog_errno(status); 529 goto leave; 530 } 531 532 status = ocfs2_journal_dirty(handle, fe_bh); 533 if (status < 0) { 534 mlog_errno(status); 535 goto leave; 536 } 537 538 clusters_to_add -= num_bits; 539 *logical_offset += num_bits; 540 541 if (clusters_to_add) { 542 mlog(0, "need to alloc once more, clusters = %u, wanted = " 543 "%u\n", fe->i_clusters, clusters_to_add); 544 status = -EAGAIN; 545 reason = RESTART_TRANS; 546 } 547 548 leave: 549 mlog_exit(status); 550 if (reason_ret) 551 *reason_ret = reason; 552 return status; 553 } 554 555 /* 556 * For a given allocation, determine which allocators will need to be 557 * accessed, and lock them, reserving the appropriate number of bits. 558 * 559 * Sparse file systems call this from ocfs2_write_begin_nolock() 560 * and ocfs2_allocate_unwritten_extents(). 561 * 562 * File systems which don't support holes call this from 563 * ocfs2_extend_allocation(). 564 */ 565 int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 566 u32 clusters_to_add, u32 extents_to_split, 567 struct ocfs2_alloc_context **data_ac, 568 struct ocfs2_alloc_context **meta_ac) 569 { 570 int ret = 0, num_free_extents; 571 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; 572 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 573 574 *meta_ac = NULL; 575 if (data_ac) 576 *data_ac = NULL; 577 578 BUG_ON(clusters_to_add != 0 && data_ac == NULL); 579 580 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " 581 "clusters_to_add = %u, extents_to_split = %u\n", 582 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), 583 le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); 584 585 num_free_extents = ocfs2_num_free_extents(osb, inode, di); 586 if (num_free_extents < 0) { 587 ret = num_free_extents; 588 mlog_errno(ret); 589 goto out; 590 } 591 592 /* 593 * Sparse allocation file systems need to be more conservative 594 * with reserving room for expansion - the actual allocation 595 * happens while we've got a journal handle open so re-taking 596 * a cluster lock (because we ran out of room for another 597 * extent) will violate ordering rules. 598 * 599 * Most of the time we'll only be seeing this 1 cluster at a time 600 * anyway. 601 * 602 * Always lock for any unwritten extents - we might want to 603 * add blocks during a split. 604 */ 605 if (!num_free_extents || 606 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { 607 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); 608 if (ret < 0) { 609 if (ret != -ENOSPC) 610 mlog_errno(ret); 611 goto out; 612 } 613 } 614 615 if (clusters_to_add == 0) 616 goto out; 617 618 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); 619 if (ret < 0) { 620 if (ret != -ENOSPC) 621 mlog_errno(ret); 622 goto out; 623 } 624 625 out: 626 if (ret) { 627 if (*meta_ac) { 628 ocfs2_free_alloc_context(*meta_ac); 629 *meta_ac = NULL; 630 } 631 632 /* 633 * We cannot have an error and a non null *data_ac. 634 */ 635 } 636 637 return ret; 638 } 639 640 static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, 641 u32 clusters_to_add, int mark_unwritten) 642 { 643 int status = 0; 644 int restart_func = 0; 645 int credits; 646 u32 prev_clusters; 647 struct buffer_head *bh = NULL; 648 struct ocfs2_dinode *fe = NULL; 649 handle_t *handle = NULL; 650 struct ocfs2_alloc_context *data_ac = NULL; 651 struct ocfs2_alloc_context *meta_ac = NULL; 652 enum ocfs2_alloc_restarted why; 653 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 654 655 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); 656 657 /* 658 * This function only exists for file systems which don't 659 * support holes. 660 */ 661 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); 662 663 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 664 OCFS2_BH_CACHED, inode); 665 if (status < 0) { 666 mlog_errno(status); 667 goto leave; 668 } 669 670 fe = (struct ocfs2_dinode *) bh->b_data; 671 if (!OCFS2_IS_VALID_DINODE(fe)) { 672 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 673 status = -EIO; 674 goto leave; 675 } 676 677 restart_all: 678 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 679 680 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, 681 &meta_ac); 682 if (status) { 683 mlog_errno(status); 684 goto leave; 685 } 686 687 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); 688 handle = ocfs2_start_trans(osb, credits); 689 if (IS_ERR(handle)) { 690 status = PTR_ERR(handle); 691 handle = NULL; 692 mlog_errno(status); 693 goto leave; 694 } 695 696 restarted_transaction: 697 /* reserve a write to the file entry early on - that we if we 698 * run out of credits in the allocation path, we can still 699 * update i_size. */ 700 status = ocfs2_journal_access(handle, inode, bh, 701 OCFS2_JOURNAL_ACCESS_WRITE); 702 if (status < 0) { 703 mlog_errno(status); 704 goto leave; 705 } 706 707 prev_clusters = OCFS2_I(inode)->ip_clusters; 708 709 status = ocfs2_do_extend_allocation(osb, 710 inode, 711 &logical_start, 712 clusters_to_add, 713 mark_unwritten, 714 bh, 715 handle, 716 data_ac, 717 meta_ac, 718 &why); 719 if ((status < 0) && (status != -EAGAIN)) { 720 if (status != -ENOSPC) 721 mlog_errno(status); 722 goto leave; 723 } 724 725 status = ocfs2_journal_dirty(handle, bh); 726 if (status < 0) { 727 mlog_errno(status); 728 goto leave; 729 } 730 731 spin_lock(&OCFS2_I(inode)->ip_lock); 732 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 733 spin_unlock(&OCFS2_I(inode)->ip_lock); 734 735 if (why != RESTART_NONE && clusters_to_add) { 736 if (why == RESTART_META) { 737 mlog(0, "restarting function.\n"); 738 restart_func = 1; 739 } else { 740 BUG_ON(why != RESTART_TRANS); 741 742 mlog(0, "restarting transaction.\n"); 743 /* TODO: This can be more intelligent. */ 744 credits = ocfs2_calc_extend_credits(osb->sb, 745 fe, 746 clusters_to_add); 747 status = ocfs2_extend_trans(handle, credits); 748 if (status < 0) { 749 /* handle still has to be committed at 750 * this point. */ 751 status = -ENOMEM; 752 mlog_errno(status); 753 goto leave; 754 } 755 goto restarted_transaction; 756 } 757 } 758 759 mlog(0, "fe: i_clusters = %u, i_size=%llu\n", 760 le32_to_cpu(fe->i_clusters), 761 (unsigned long long)le64_to_cpu(fe->i_size)); 762 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", 763 OCFS2_I(inode)->ip_clusters, i_size_read(inode)); 764 765 leave: 766 if (handle) { 767 ocfs2_commit_trans(osb, handle); 768 handle = NULL; 769 } 770 if (data_ac) { 771 ocfs2_free_alloc_context(data_ac); 772 data_ac = NULL; 773 } 774 if (meta_ac) { 775 ocfs2_free_alloc_context(meta_ac); 776 meta_ac = NULL; 777 } 778 if ((!status) && restart_func) { 779 restart_func = 0; 780 goto restart_all; 781 } 782 if (bh) { 783 brelse(bh); 784 bh = NULL; 785 } 786 787 mlog_exit(status); 788 return status; 789 } 790 791 /* Some parts of this taken from generic_cont_expand, which turned out 792 * to be too fragile to do exactly what we need without us having to 793 * worry about recursive locking in ->prepare_write() and 794 * ->commit_write(). */ 795 static int ocfs2_write_zero_page(struct inode *inode, 796 u64 size) 797 { 798 struct address_space *mapping = inode->i_mapping; 799 struct page *page; 800 unsigned long index; 801 unsigned int offset; 802 handle_t *handle = NULL; 803 int ret; 804 805 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 806 /* ugh. in prepare/commit_write, if from==to==start of block, we 807 ** skip the prepare. make sure we never send an offset for the start 808 ** of a block 809 */ 810 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { 811 offset++; 812 } 813 index = size >> PAGE_CACHE_SHIFT; 814 815 page = grab_cache_page(mapping, index); 816 if (!page) { 817 ret = -ENOMEM; 818 mlog_errno(ret); 819 goto out; 820 } 821 822 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 823 if (ret < 0) { 824 mlog_errno(ret); 825 goto out_unlock; 826 } 827 828 if (ocfs2_should_order_data(inode)) { 829 handle = ocfs2_start_walk_page_trans(inode, page, offset, 830 offset); 831 if (IS_ERR(handle)) { 832 ret = PTR_ERR(handle); 833 handle = NULL; 834 goto out_unlock; 835 } 836 } 837 838 /* must not update i_size! */ 839 ret = block_commit_write(page, offset, offset); 840 if (ret < 0) 841 mlog_errno(ret); 842 else 843 ret = 0; 844 845 if (handle) 846 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 847 out_unlock: 848 unlock_page(page); 849 page_cache_release(page); 850 out: 851 return ret; 852 } 853 854 static int ocfs2_zero_extend(struct inode *inode, 855 u64 zero_to_size) 856 { 857 int ret = 0; 858 u64 start_off; 859 struct super_block *sb = inode->i_sb; 860 861 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 862 while (start_off < zero_to_size) { 863 ret = ocfs2_write_zero_page(inode, start_off); 864 if (ret < 0) { 865 mlog_errno(ret); 866 goto out; 867 } 868 869 start_off += sb->s_blocksize; 870 871 /* 872 * Very large extends have the potential to lock up 873 * the cpu for extended periods of time. 874 */ 875 cond_resched(); 876 } 877 878 out: 879 return ret; 880 } 881 882 int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) 883 { 884 int ret; 885 u32 clusters_to_add; 886 struct ocfs2_inode_info *oi = OCFS2_I(inode); 887 888 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); 889 if (clusters_to_add < oi->ip_clusters) 890 clusters_to_add = 0; 891 else 892 clusters_to_add -= oi->ip_clusters; 893 894 if (clusters_to_add) { 895 ret = __ocfs2_extend_allocation(inode, oi->ip_clusters, 896 clusters_to_add, 0); 897 if (ret) { 898 mlog_errno(ret); 899 goto out; 900 } 901 } 902 903 /* 904 * Call this even if we don't add any clusters to the tree. We 905 * still need to zero the area between the old i_size and the 906 * new i_size. 907 */ 908 ret = ocfs2_zero_extend(inode, zero_to); 909 if (ret < 0) 910 mlog_errno(ret); 911 912 out: 913 return ret; 914 } 915 916 static int ocfs2_extend_file(struct inode *inode, 917 struct buffer_head *di_bh, 918 u64 new_i_size) 919 { 920 int ret = 0, data_locked = 0; 921 struct ocfs2_inode_info *oi = OCFS2_I(inode); 922 923 BUG_ON(!di_bh); 924 925 /* setattr sometimes calls us like this. */ 926 if (new_i_size == 0) 927 goto out; 928 929 if (i_size_read(inode) == new_i_size) 930 goto out; 931 BUG_ON(new_i_size < i_size_read(inode)); 932 933 /* 934 * Fall through for converting inline data, even if the fs 935 * supports sparse files. 936 * 937 * The check for inline data here is legal - nobody can add 938 * the feature since we have i_mutex. We must check it again 939 * after acquiring ip_alloc_sem though, as paths like mmap 940 * might have raced us to converting the inode to extents. 941 */ 942 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) 943 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 944 goto out_update_size; 945 946 /* 947 * protect the pages that ocfs2_zero_extend is going to be 948 * pulling into the page cache.. we do this before the 949 * metadata extend so that we don't get into the situation 950 * where we've extended the metadata but can't get the data 951 * lock to zero. 952 */ 953 ret = ocfs2_data_lock(inode, 1); 954 if (ret < 0) { 955 mlog_errno(ret); 956 goto out; 957 } 958 data_locked = 1; 959 960 /* 961 * The alloc sem blocks people in read/write from reading our 962 * allocation until we're done changing it. We depend on 963 * i_mutex to block other extend/truncate calls while we're 964 * here. 965 */ 966 down_write(&oi->ip_alloc_sem); 967 968 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 969 /* 970 * We can optimize small extends by keeping the inodes 971 * inline data. 972 */ 973 if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) { 974 up_write(&oi->ip_alloc_sem); 975 goto out_update_size; 976 } 977 978 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 979 if (ret) { 980 up_write(&oi->ip_alloc_sem); 981 982 mlog_errno(ret); 983 goto out_unlock; 984 } 985 } 986 987 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 988 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 989 990 up_write(&oi->ip_alloc_sem); 991 992 if (ret < 0) { 993 mlog_errno(ret); 994 goto out_unlock; 995 } 996 997 out_update_size: 998 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); 999 if (ret < 0) 1000 mlog_errno(ret); 1001 1002 out_unlock: 1003 if (data_locked) 1004 ocfs2_data_unlock(inode, 1); 1005 1006 out: 1007 return ret; 1008 } 1009 1010 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) 1011 { 1012 int status = 0, size_change; 1013 struct inode *inode = dentry->d_inode; 1014 struct super_block *sb = inode->i_sb; 1015 struct ocfs2_super *osb = OCFS2_SB(sb); 1016 struct buffer_head *bh = NULL; 1017 handle_t *handle = NULL; 1018 1019 mlog_entry("(0x%p, '%.*s')\n", dentry, 1020 dentry->d_name.len, dentry->d_name.name); 1021 1022 if (attr->ia_valid & ATTR_MODE) 1023 mlog(0, "mode change: %d\n", attr->ia_mode); 1024 if (attr->ia_valid & ATTR_UID) 1025 mlog(0, "uid change: %d\n", attr->ia_uid); 1026 if (attr->ia_valid & ATTR_GID) 1027 mlog(0, "gid change: %d\n", attr->ia_gid); 1028 if (attr->ia_valid & ATTR_SIZE) 1029 mlog(0, "size change...\n"); 1030 if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) 1031 mlog(0, "time change...\n"); 1032 1033 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ 1034 | ATTR_GID | ATTR_UID | ATTR_MODE) 1035 if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { 1036 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); 1037 return 0; 1038 } 1039 1040 status = inode_change_ok(inode, attr); 1041 if (status) 1042 return status; 1043 1044 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; 1045 if (size_change) { 1046 status = ocfs2_rw_lock(inode, 1); 1047 if (status < 0) { 1048 mlog_errno(status); 1049 goto bail; 1050 } 1051 } 1052 1053 status = ocfs2_meta_lock(inode, &bh, 1); 1054 if (status < 0) { 1055 if (status != -ENOENT) 1056 mlog_errno(status); 1057 goto bail_unlock_rw; 1058 } 1059 1060 if (size_change && attr->ia_size != i_size_read(inode)) { 1061 if (attr->ia_size > sb->s_maxbytes) { 1062 status = -EFBIG; 1063 goto bail_unlock; 1064 } 1065 1066 if (i_size_read(inode) > attr->ia_size) 1067 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 1068 else 1069 status = ocfs2_extend_file(inode, bh, attr->ia_size); 1070 if (status < 0) { 1071 if (status != -ENOSPC) 1072 mlog_errno(status); 1073 status = -ENOSPC; 1074 goto bail_unlock; 1075 } 1076 } 1077 1078 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1079 if (IS_ERR(handle)) { 1080 status = PTR_ERR(handle); 1081 mlog_errno(status); 1082 goto bail_unlock; 1083 } 1084 1085 /* 1086 * This will intentionally not wind up calling vmtruncate(), 1087 * since all the work for a size change has been done above. 1088 * Otherwise, we could get into problems with truncate as 1089 * ip_alloc_sem is used there to protect against i_size 1090 * changes. 1091 */ 1092 status = inode_setattr(inode, attr); 1093 if (status < 0) { 1094 mlog_errno(status); 1095 goto bail_commit; 1096 } 1097 1098 status = ocfs2_mark_inode_dirty(handle, inode, bh); 1099 if (status < 0) 1100 mlog_errno(status); 1101 1102 bail_commit: 1103 ocfs2_commit_trans(osb, handle); 1104 bail_unlock: 1105 ocfs2_meta_unlock(inode, 1); 1106 bail_unlock_rw: 1107 if (size_change) 1108 ocfs2_rw_unlock(inode, 1); 1109 bail: 1110 if (bh) 1111 brelse(bh); 1112 1113 mlog_exit(status); 1114 return status; 1115 } 1116 1117 int ocfs2_getattr(struct vfsmount *mnt, 1118 struct dentry *dentry, 1119 struct kstat *stat) 1120 { 1121 struct inode *inode = dentry->d_inode; 1122 struct super_block *sb = dentry->d_inode->i_sb; 1123 struct ocfs2_super *osb = sb->s_fs_info; 1124 int err; 1125 1126 mlog_entry_void(); 1127 1128 err = ocfs2_inode_revalidate(dentry); 1129 if (err) { 1130 if (err != -ENOENT) 1131 mlog_errno(err); 1132 goto bail; 1133 } 1134 1135 generic_fillattr(inode, stat); 1136 1137 /* We set the blksize from the cluster size for performance */ 1138 stat->blksize = osb->s_clustersize; 1139 1140 bail: 1141 mlog_exit(err); 1142 1143 return err; 1144 } 1145 1146 int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) 1147 { 1148 int ret; 1149 1150 mlog_entry_void(); 1151 1152 ret = ocfs2_meta_lock(inode, NULL, 0); 1153 if (ret) { 1154 if (ret != -ENOENT) 1155 mlog_errno(ret); 1156 goto out; 1157 } 1158 1159 ret = generic_permission(inode, mask, NULL); 1160 1161 ocfs2_meta_unlock(inode, 0); 1162 out: 1163 mlog_exit(ret); 1164 return ret; 1165 } 1166 1167 static int __ocfs2_write_remove_suid(struct inode *inode, 1168 struct buffer_head *bh) 1169 { 1170 int ret; 1171 handle_t *handle; 1172 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1173 struct ocfs2_dinode *di; 1174 1175 mlog_entry("(Inode %llu, mode 0%o)\n", 1176 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); 1177 1178 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1179 if (handle == NULL) { 1180 ret = -ENOMEM; 1181 mlog_errno(ret); 1182 goto out; 1183 } 1184 1185 ret = ocfs2_journal_access(handle, inode, bh, 1186 OCFS2_JOURNAL_ACCESS_WRITE); 1187 if (ret < 0) { 1188 mlog_errno(ret); 1189 goto out_trans; 1190 } 1191 1192 inode->i_mode &= ~S_ISUID; 1193 if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) 1194 inode->i_mode &= ~S_ISGID; 1195 1196 di = (struct ocfs2_dinode *) bh->b_data; 1197 di->i_mode = cpu_to_le16(inode->i_mode); 1198 1199 ret = ocfs2_journal_dirty(handle, bh); 1200 if (ret < 0) 1201 mlog_errno(ret); 1202 1203 out_trans: 1204 ocfs2_commit_trans(osb, handle); 1205 out: 1206 mlog_exit(ret); 1207 return ret; 1208 } 1209 1210 /* 1211 * Will look for holes and unwritten extents in the range starting at 1212 * pos for count bytes (inclusive). 1213 */ 1214 static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos, 1215 size_t count) 1216 { 1217 int ret = 0; 1218 unsigned int extent_flags; 1219 u32 cpos, clusters, extent_len, phys_cpos; 1220 struct super_block *sb = inode->i_sb; 1221 1222 cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; 1223 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; 1224 1225 while (clusters) { 1226 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, 1227 &extent_flags); 1228 if (ret < 0) { 1229 mlog_errno(ret); 1230 goto out; 1231 } 1232 1233 if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { 1234 ret = 1; 1235 break; 1236 } 1237 1238 if (extent_len > clusters) 1239 extent_len = clusters; 1240 1241 clusters -= extent_len; 1242 cpos += extent_len; 1243 } 1244 out: 1245 return ret; 1246 } 1247 1248 static int ocfs2_write_remove_suid(struct inode *inode) 1249 { 1250 int ret; 1251 struct buffer_head *bh = NULL; 1252 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1253 1254 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1255 oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); 1256 if (ret < 0) { 1257 mlog_errno(ret); 1258 goto out; 1259 } 1260 1261 ret = __ocfs2_write_remove_suid(inode, bh); 1262 out: 1263 brelse(bh); 1264 return ret; 1265 } 1266 1267 /* 1268 * Allocate enough extents to cover the region starting at byte offset 1269 * start for len bytes. Existing extents are skipped, any extents 1270 * added are marked as "unwritten". 1271 */ 1272 static int ocfs2_allocate_unwritten_extents(struct inode *inode, 1273 u64 start, u64 len) 1274 { 1275 int ret; 1276 u32 cpos, phys_cpos, clusters, alloc_size; 1277 u64 end = start + len; 1278 struct buffer_head *di_bh = NULL; 1279 1280 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1281 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1282 OCFS2_I(inode)->ip_blkno, &di_bh, 1283 OCFS2_BH_CACHED, inode); 1284 if (ret) { 1285 mlog_errno(ret); 1286 goto out; 1287 } 1288 1289 /* 1290 * Nothing to do if the requested reservation range 1291 * fits within the inode. 1292 */ 1293 if (ocfs2_size_fits_inline_data(di_bh, end)) 1294 goto out; 1295 1296 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 1297 if (ret) { 1298 mlog_errno(ret); 1299 goto out; 1300 } 1301 } 1302 1303 /* 1304 * We consider both start and len to be inclusive. 1305 */ 1306 cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; 1307 clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); 1308 clusters -= cpos; 1309 1310 while (clusters) { 1311 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1312 &alloc_size, NULL); 1313 if (ret) { 1314 mlog_errno(ret); 1315 goto out; 1316 } 1317 1318 /* 1319 * Hole or existing extent len can be arbitrary, so 1320 * cap it to our own allocation request. 1321 */ 1322 if (alloc_size > clusters) 1323 alloc_size = clusters; 1324 1325 if (phys_cpos) { 1326 /* 1327 * We already have an allocation at this 1328 * region so we can safely skip it. 1329 */ 1330 goto next; 1331 } 1332 1333 ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); 1334 if (ret) { 1335 if (ret != -ENOSPC) 1336 mlog_errno(ret); 1337 goto out; 1338 } 1339 1340 next: 1341 cpos += alloc_size; 1342 clusters -= alloc_size; 1343 } 1344 1345 ret = 0; 1346 out: 1347 1348 brelse(di_bh); 1349 return ret; 1350 } 1351 1352 static int __ocfs2_remove_inode_range(struct inode *inode, 1353 struct buffer_head *di_bh, 1354 u32 cpos, u32 phys_cpos, u32 len, 1355 struct ocfs2_cached_dealloc_ctxt *dealloc) 1356 { 1357 int ret; 1358 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 1359 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1360 struct inode *tl_inode = osb->osb_tl_inode; 1361 handle_t *handle; 1362 struct ocfs2_alloc_context *meta_ac = NULL; 1363 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1364 1365 ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); 1366 if (ret) { 1367 mlog_errno(ret); 1368 return ret; 1369 } 1370 1371 mutex_lock(&tl_inode->i_mutex); 1372 1373 if (ocfs2_truncate_log_needs_flush(osb)) { 1374 ret = __ocfs2_flush_truncate_log(osb); 1375 if (ret < 0) { 1376 mlog_errno(ret); 1377 goto out; 1378 } 1379 } 1380 1381 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 1382 if (handle == NULL) { 1383 ret = -ENOMEM; 1384 mlog_errno(ret); 1385 goto out; 1386 } 1387 1388 ret = ocfs2_journal_access(handle, inode, di_bh, 1389 OCFS2_JOURNAL_ACCESS_WRITE); 1390 if (ret) { 1391 mlog_errno(ret); 1392 goto out; 1393 } 1394 1395 ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, 1396 dealloc); 1397 if (ret) { 1398 mlog_errno(ret); 1399 goto out_commit; 1400 } 1401 1402 OCFS2_I(inode)->ip_clusters -= len; 1403 di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1404 1405 ret = ocfs2_journal_dirty(handle, di_bh); 1406 if (ret) { 1407 mlog_errno(ret); 1408 goto out_commit; 1409 } 1410 1411 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 1412 if (ret) 1413 mlog_errno(ret); 1414 1415 out_commit: 1416 ocfs2_commit_trans(osb, handle); 1417 out: 1418 mutex_unlock(&tl_inode->i_mutex); 1419 1420 if (meta_ac) 1421 ocfs2_free_alloc_context(meta_ac); 1422 1423 return ret; 1424 } 1425 1426 /* 1427 * Truncate a byte range, avoiding pages within partial clusters. This 1428 * preserves those pages for the zeroing code to write to. 1429 */ 1430 static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, 1431 u64 byte_len) 1432 { 1433 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1434 loff_t start, end; 1435 struct address_space *mapping = inode->i_mapping; 1436 1437 start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); 1438 end = byte_start + byte_len; 1439 end = end & ~(osb->s_clustersize - 1); 1440 1441 if (start < end) { 1442 unmap_mapping_range(mapping, start, end - start, 0); 1443 truncate_inode_pages_range(mapping, start, end - 1); 1444 } 1445 } 1446 1447 static int ocfs2_zero_partial_clusters(struct inode *inode, 1448 u64 start, u64 len) 1449 { 1450 int ret = 0; 1451 u64 tmpend, end = start + len; 1452 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1453 unsigned int csize = osb->s_clustersize; 1454 handle_t *handle; 1455 1456 /* 1457 * The "start" and "end" values are NOT necessarily part of 1458 * the range whose allocation is being deleted. Rather, this 1459 * is what the user passed in with the request. We must zero 1460 * partial clusters here. There's no need to worry about 1461 * physical allocation - the zeroing code knows to skip holes. 1462 */ 1463 mlog(0, "byte start: %llu, end: %llu\n", 1464 (unsigned long long)start, (unsigned long long)end); 1465 1466 /* 1467 * If both edges are on a cluster boundary then there's no 1468 * zeroing required as the region is part of the allocation to 1469 * be truncated. 1470 */ 1471 if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) 1472 goto out; 1473 1474 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1475 if (handle == NULL) { 1476 ret = -ENOMEM; 1477 mlog_errno(ret); 1478 goto out; 1479 } 1480 1481 /* 1482 * We want to get the byte offset of the end of the 1st cluster. 1483 */ 1484 tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); 1485 if (tmpend > end) 1486 tmpend = end; 1487 1488 mlog(0, "1st range: start: %llu, tmpend: %llu\n", 1489 (unsigned long long)start, (unsigned long long)tmpend); 1490 1491 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); 1492 if (ret) 1493 mlog_errno(ret); 1494 1495 if (tmpend < end) { 1496 /* 1497 * This may make start and end equal, but the zeroing 1498 * code will skip any work in that case so there's no 1499 * need to catch it up here. 1500 */ 1501 start = end & ~(osb->s_clustersize - 1); 1502 1503 mlog(0, "2nd range: start: %llu, end: %llu\n", 1504 (unsigned long long)start, (unsigned long long)end); 1505 1506 ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); 1507 if (ret) 1508 mlog_errno(ret); 1509 } 1510 1511 ocfs2_commit_trans(osb, handle); 1512 out: 1513 return ret; 1514 } 1515 1516 static int ocfs2_remove_inode_range(struct inode *inode, 1517 struct buffer_head *di_bh, u64 byte_start, 1518 u64 byte_len) 1519 { 1520 int ret = 0; 1521 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; 1522 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1523 struct ocfs2_cached_dealloc_ctxt dealloc; 1524 1525 ocfs2_init_dealloc_ctxt(&dealloc); 1526 1527 if (byte_len == 0) 1528 return 0; 1529 1530 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1531 ret = ocfs2_truncate_inline(inode, di_bh, byte_start, 1532 byte_start + byte_len, 1); 1533 if (ret) 1534 mlog_errno(ret); 1535 return ret; 1536 } 1537 1538 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1539 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1540 if (trunc_len >= trunc_start) 1541 trunc_len -= trunc_start; 1542 else 1543 trunc_len = 0; 1544 1545 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", 1546 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1547 (unsigned long long)byte_start, 1548 (unsigned long long)byte_len, trunc_start, trunc_len); 1549 1550 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1551 if (ret) { 1552 mlog_errno(ret); 1553 goto out; 1554 } 1555 1556 cpos = trunc_start; 1557 while (trunc_len) { 1558 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1559 &alloc_size, NULL); 1560 if (ret) { 1561 mlog_errno(ret); 1562 goto out; 1563 } 1564 1565 if (alloc_size > trunc_len) 1566 alloc_size = trunc_len; 1567 1568 /* Only do work for non-holes */ 1569 if (phys_cpos != 0) { 1570 ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, 1571 phys_cpos, alloc_size, 1572 &dealloc); 1573 if (ret) { 1574 mlog_errno(ret); 1575 goto out; 1576 } 1577 } 1578 1579 cpos += alloc_size; 1580 trunc_len -= alloc_size; 1581 } 1582 1583 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1584 1585 out: 1586 ocfs2_schedule_truncate_log_flush(osb, 1); 1587 ocfs2_run_deallocs(osb, &dealloc); 1588 1589 return ret; 1590 } 1591 1592 /* 1593 * Parts of this function taken from xfs_change_file_space() 1594 */ 1595 static int __ocfs2_change_file_space(struct file *file, struct inode *inode, 1596 loff_t f_pos, unsigned int cmd, 1597 struct ocfs2_space_resv *sr, 1598 int change_size) 1599 { 1600 int ret; 1601 s64 llen; 1602 loff_t size; 1603 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1604 struct buffer_head *di_bh = NULL; 1605 handle_t *handle; 1606 unsigned long long max_off = inode->i_sb->s_maxbytes; 1607 1608 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 1609 return -EROFS; 1610 1611 mutex_lock(&inode->i_mutex); 1612 1613 /* 1614 * This prevents concurrent writes on other nodes 1615 */ 1616 ret = ocfs2_rw_lock(inode, 1); 1617 if (ret) { 1618 mlog_errno(ret); 1619 goto out; 1620 } 1621 1622 ret = ocfs2_meta_lock(inode, &di_bh, 1); 1623 if (ret) { 1624 mlog_errno(ret); 1625 goto out_rw_unlock; 1626 } 1627 1628 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { 1629 ret = -EPERM; 1630 goto out_meta_unlock; 1631 } 1632 1633 switch (sr->l_whence) { 1634 case 0: /*SEEK_SET*/ 1635 break; 1636 case 1: /*SEEK_CUR*/ 1637 sr->l_start += f_pos; 1638 break; 1639 case 2: /*SEEK_END*/ 1640 sr->l_start += i_size_read(inode); 1641 break; 1642 default: 1643 ret = -EINVAL; 1644 goto out_meta_unlock; 1645 } 1646 sr->l_whence = 0; 1647 1648 llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; 1649 1650 if (sr->l_start < 0 1651 || sr->l_start > max_off 1652 || (sr->l_start + llen) < 0 1653 || (sr->l_start + llen) > max_off) { 1654 ret = -EINVAL; 1655 goto out_meta_unlock; 1656 } 1657 size = sr->l_start + sr->l_len; 1658 1659 if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { 1660 if (sr->l_len <= 0) { 1661 ret = -EINVAL; 1662 goto out_meta_unlock; 1663 } 1664 } 1665 1666 if (file && should_remove_suid(file->f_path.dentry)) { 1667 ret = __ocfs2_write_remove_suid(inode, di_bh); 1668 if (ret) { 1669 mlog_errno(ret); 1670 goto out_meta_unlock; 1671 } 1672 } 1673 1674 down_write(&OCFS2_I(inode)->ip_alloc_sem); 1675 switch (cmd) { 1676 case OCFS2_IOC_RESVSP: 1677 case OCFS2_IOC_RESVSP64: 1678 /* 1679 * This takes unsigned offsets, but the signed ones we 1680 * pass have been checked against overflow above. 1681 */ 1682 ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, 1683 sr->l_len); 1684 break; 1685 case OCFS2_IOC_UNRESVSP: 1686 case OCFS2_IOC_UNRESVSP64: 1687 ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start, 1688 sr->l_len); 1689 break; 1690 default: 1691 ret = -EINVAL; 1692 } 1693 up_write(&OCFS2_I(inode)->ip_alloc_sem); 1694 if (ret) { 1695 mlog_errno(ret); 1696 goto out_meta_unlock; 1697 } 1698 1699 /* 1700 * We update c/mtime for these changes 1701 */ 1702 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1703 if (IS_ERR(handle)) { 1704 ret = PTR_ERR(handle); 1705 mlog_errno(ret); 1706 goto out_meta_unlock; 1707 } 1708 1709 if (change_size && i_size_read(inode) < size) 1710 i_size_write(inode, size); 1711 1712 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1713 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); 1714 if (ret < 0) 1715 mlog_errno(ret); 1716 1717 ocfs2_commit_trans(osb, handle); 1718 1719 out_meta_unlock: 1720 brelse(di_bh); 1721 ocfs2_meta_unlock(inode, 1); 1722 out_rw_unlock: 1723 ocfs2_rw_unlock(inode, 1); 1724 1725 mutex_unlock(&inode->i_mutex); 1726 out: 1727 return ret; 1728 } 1729 1730 int ocfs2_change_file_space(struct file *file, unsigned int cmd, 1731 struct ocfs2_space_resv *sr) 1732 { 1733 struct inode *inode = file->f_path.dentry->d_inode; 1734 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);; 1735 1736 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && 1737 !ocfs2_writes_unwritten_extents(osb)) 1738 return -ENOTTY; 1739 else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) && 1740 !ocfs2_sparse_alloc(osb)) 1741 return -ENOTTY; 1742 1743 if (!S_ISREG(inode->i_mode)) 1744 return -EINVAL; 1745 1746 if (!(file->f_mode & FMODE_WRITE)) 1747 return -EBADF; 1748 1749 return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); 1750 } 1751 1752 static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, 1753 loff_t len) 1754 { 1755 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1756 struct ocfs2_space_resv sr; 1757 int change_size = 1; 1758 1759 if (!ocfs2_writes_unwritten_extents(osb)) 1760 return -EOPNOTSUPP; 1761 1762 if (S_ISDIR(inode->i_mode)) 1763 return -ENODEV; 1764 1765 if (mode & FALLOC_FL_KEEP_SIZE) 1766 change_size = 0; 1767 1768 sr.l_whence = 0; 1769 sr.l_start = (s64)offset; 1770 sr.l_len = (s64)len; 1771 1772 return __ocfs2_change_file_space(NULL, inode, offset, 1773 OCFS2_IOC_RESVSP64, &sr, change_size); 1774 } 1775 1776 static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 1777 loff_t *ppos, 1778 size_t count, 1779 int appending, 1780 int *direct_io) 1781 { 1782 int ret = 0, meta_level = 0; 1783 struct inode *inode = dentry->d_inode; 1784 loff_t saved_pos, end; 1785 1786 /* 1787 * We start with a read level meta lock and only jump to an ex 1788 * if we need to make modifications here. 1789 */ 1790 for(;;) { 1791 ret = ocfs2_meta_lock(inode, NULL, meta_level); 1792 if (ret < 0) { 1793 meta_level = -1; 1794 mlog_errno(ret); 1795 goto out; 1796 } 1797 1798 /* Clear suid / sgid if necessary. We do this here 1799 * instead of later in the write path because 1800 * remove_suid() calls ->setattr without any hint that 1801 * we may have already done our cluster locking. Since 1802 * ocfs2_setattr() *must* take cluster locks to 1803 * proceeed, this will lead us to recursively lock the 1804 * inode. There's also the dinode i_size state which 1805 * can be lost via setattr during extending writes (we 1806 * set inode->i_size at the end of a write. */ 1807 if (should_remove_suid(dentry)) { 1808 if (meta_level == 0) { 1809 ocfs2_meta_unlock(inode, meta_level); 1810 meta_level = 1; 1811 continue; 1812 } 1813 1814 ret = ocfs2_write_remove_suid(inode); 1815 if (ret < 0) { 1816 mlog_errno(ret); 1817 goto out_unlock; 1818 } 1819 } 1820 1821 /* work on a copy of ppos until we're sure that we won't have 1822 * to recalculate it due to relocking. */ 1823 if (appending) { 1824 saved_pos = i_size_read(inode); 1825 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); 1826 } else { 1827 saved_pos = *ppos; 1828 } 1829 1830 end = saved_pos + count; 1831 1832 /* 1833 * Skip the O_DIRECT checks if we don't need 1834 * them. 1835 */ 1836 if (!direct_io || !(*direct_io)) 1837 break; 1838 1839 /* 1840 * There's no sane way to do direct writes to an inode 1841 * with inline data. 1842 */ 1843 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1844 *direct_io = 0; 1845 break; 1846 } 1847 1848 /* 1849 * Allowing concurrent direct writes means 1850 * i_size changes wouldn't be synchronized, so 1851 * one node could wind up truncating another 1852 * nodes writes. 1853 */ 1854 if (end > i_size_read(inode)) { 1855 *direct_io = 0; 1856 break; 1857 } 1858 1859 /* 1860 * We don't fill holes during direct io, so 1861 * check for them here. If any are found, the 1862 * caller will have to retake some cluster 1863 * locks and initiate the io as buffered. 1864 */ 1865 ret = ocfs2_check_range_for_holes(inode, saved_pos, count); 1866 if (ret == 1) { 1867 *direct_io = 0; 1868 ret = 0; 1869 } else if (ret < 0) 1870 mlog_errno(ret); 1871 break; 1872 } 1873 1874 if (appending) 1875 *ppos = saved_pos; 1876 1877 out_unlock: 1878 ocfs2_meta_unlock(inode, meta_level); 1879 1880 out: 1881 return ret; 1882 } 1883 1884 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, 1885 const struct iovec *iov, 1886 unsigned long nr_segs, 1887 loff_t pos) 1888 { 1889 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1890 int can_do_direct; 1891 ssize_t written = 0; 1892 size_t ocount; /* original count */ 1893 size_t count; /* after file limit checks */ 1894 loff_t *ppos = &iocb->ki_pos; 1895 struct file *file = iocb->ki_filp; 1896 struct inode *inode = file->f_path.dentry->d_inode; 1897 1898 mlog_entry("(0x%p, %u, '%.*s')\n", file, 1899 (unsigned int)nr_segs, 1900 file->f_path.dentry->d_name.len, 1901 file->f_path.dentry->d_name.name); 1902 1903 if (iocb->ki_left == 0) 1904 return 0; 1905 1906 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 1907 1908 appending = file->f_flags & O_APPEND ? 1 : 0; 1909 direct_io = file->f_flags & O_DIRECT ? 1 : 0; 1910 1911 mutex_lock(&inode->i_mutex); 1912 1913 relock: 1914 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 1915 if (direct_io) { 1916 down_read(&inode->i_alloc_sem); 1917 have_alloc_sem = 1; 1918 } 1919 1920 /* concurrent O_DIRECT writes are allowed */ 1921 rw_level = !direct_io; 1922 ret = ocfs2_rw_lock(inode, rw_level); 1923 if (ret < 0) { 1924 mlog_errno(ret); 1925 goto out_sems; 1926 } 1927 1928 can_do_direct = direct_io; 1929 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1930 iocb->ki_left, appending, 1931 &can_do_direct); 1932 if (ret < 0) { 1933 mlog_errno(ret); 1934 goto out; 1935 } 1936 1937 /* 1938 * We can't complete the direct I/O as requested, fall back to 1939 * buffered I/O. 1940 */ 1941 if (direct_io && !can_do_direct) { 1942 ocfs2_rw_unlock(inode, rw_level); 1943 up_read(&inode->i_alloc_sem); 1944 1945 have_alloc_sem = 0; 1946 rw_level = -1; 1947 1948 direct_io = 0; 1949 goto relock; 1950 } 1951 1952 /* communicate with ocfs2_dio_end_io */ 1953 ocfs2_iocb_set_rw_locked(iocb, rw_level); 1954 1955 if (direct_io) { 1956 ret = generic_segment_checks(iov, &nr_segs, &ocount, 1957 VERIFY_READ); 1958 if (ret) 1959 goto out_dio; 1960 1961 ret = generic_write_checks(file, ppos, &count, 1962 S_ISBLK(inode->i_mode)); 1963 if (ret) 1964 goto out_dio; 1965 1966 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1967 ppos, count, ocount); 1968 if (written < 0) { 1969 ret = written; 1970 goto out_dio; 1971 } 1972 } else { 1973 written = generic_file_aio_write_nolock(iocb, iov, nr_segs, 1974 *ppos); 1975 } 1976 1977 out_dio: 1978 /* buffered aio wouldn't have proper lock coverage today */ 1979 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 1980 1981 /* 1982 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 1983 * function pointer which is called when o_direct io completes so that 1984 * it can unlock our rw lock. (it's the clustered equivalent of 1985 * i_alloc_sem; protects truncate from racing with pending ios). 1986 * Unfortunately there are error cases which call end_io and others 1987 * that don't. so we don't have to unlock the rw_lock if either an 1988 * async dio is going to do it in the future or an end_io after an 1989 * error has already done it. 1990 */ 1991 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 1992 rw_level = -1; 1993 have_alloc_sem = 0; 1994 } 1995 1996 out: 1997 if (rw_level != -1) 1998 ocfs2_rw_unlock(inode, rw_level); 1999 2000 out_sems: 2001 if (have_alloc_sem) 2002 up_read(&inode->i_alloc_sem); 2003 2004 mutex_unlock(&inode->i_mutex); 2005 2006 mlog_exit(ret); 2007 return written ? written : ret; 2008 } 2009 2010 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, 2011 struct file *out, 2012 loff_t *ppos, 2013 size_t len, 2014 unsigned int flags) 2015 { 2016 int ret; 2017 struct inode *inode = out->f_path.dentry->d_inode; 2018 2019 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, 2020 (unsigned int)len, 2021 out->f_path.dentry->d_name.len, 2022 out->f_path.dentry->d_name.name); 2023 2024 inode_double_lock(inode, pipe->inode); 2025 2026 ret = ocfs2_rw_lock(inode, 1); 2027 if (ret < 0) { 2028 mlog_errno(ret); 2029 goto out; 2030 } 2031 2032 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, 2033 NULL); 2034 if (ret < 0) { 2035 mlog_errno(ret); 2036 goto out_unlock; 2037 } 2038 2039 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); 2040 2041 out_unlock: 2042 ocfs2_rw_unlock(inode, 1); 2043 out: 2044 inode_double_unlock(inode, pipe->inode); 2045 2046 mlog_exit(ret); 2047 return ret; 2048 } 2049 2050 static ssize_t ocfs2_file_splice_read(struct file *in, 2051 loff_t *ppos, 2052 struct pipe_inode_info *pipe, 2053 size_t len, 2054 unsigned int flags) 2055 { 2056 int ret = 0; 2057 struct inode *inode = in->f_path.dentry->d_inode; 2058 2059 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 2060 (unsigned int)len, 2061 in->f_path.dentry->d_name.len, 2062 in->f_path.dentry->d_name.name); 2063 2064 /* 2065 * See the comment in ocfs2_file_aio_read() 2066 */ 2067 ret = ocfs2_meta_lock(inode, NULL, 0); 2068 if (ret < 0) { 2069 mlog_errno(ret); 2070 goto bail; 2071 } 2072 ocfs2_meta_unlock(inode, 0); 2073 2074 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 2075 2076 bail: 2077 mlog_exit(ret); 2078 return ret; 2079 } 2080 2081 static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, 2082 const struct iovec *iov, 2083 unsigned long nr_segs, 2084 loff_t pos) 2085 { 2086 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; 2087 struct file *filp = iocb->ki_filp; 2088 struct inode *inode = filp->f_path.dentry->d_inode; 2089 2090 mlog_entry("(0x%p, %u, '%.*s')\n", filp, 2091 (unsigned int)nr_segs, 2092 filp->f_path.dentry->d_name.len, 2093 filp->f_path.dentry->d_name.name); 2094 2095 if (!inode) { 2096 ret = -EINVAL; 2097 mlog_errno(ret); 2098 goto bail; 2099 } 2100 2101 /* 2102 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2103 * need locks to protect pending reads from racing with truncate. 2104 */ 2105 if (filp->f_flags & O_DIRECT) { 2106 down_read(&inode->i_alloc_sem); 2107 have_alloc_sem = 1; 2108 2109 ret = ocfs2_rw_lock(inode, 0); 2110 if (ret < 0) { 2111 mlog_errno(ret); 2112 goto bail; 2113 } 2114 rw_level = 0; 2115 /* communicate with ocfs2_dio_end_io */ 2116 ocfs2_iocb_set_rw_locked(iocb, rw_level); 2117 } 2118 2119 /* 2120 * We're fine letting folks race truncates and extending 2121 * writes with read across the cluster, just like they can 2122 * locally. Hence no rw_lock during read. 2123 * 2124 * Take and drop the meta data lock to update inode fields 2125 * like i_size. This allows the checks down below 2126 * generic_file_aio_read() a chance of actually working. 2127 */ 2128 ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2129 if (ret < 0) { 2130 mlog_errno(ret); 2131 goto bail; 2132 } 2133 ocfs2_meta_unlock(inode, lock_level); 2134 2135 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); 2136 if (ret == -EINVAL) 2137 mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); 2138 2139 /* buffered aio wouldn't have proper lock coverage today */ 2140 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 2141 2142 /* see ocfs2_file_aio_write */ 2143 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 2144 rw_level = -1; 2145 have_alloc_sem = 0; 2146 } 2147 2148 bail: 2149 if (have_alloc_sem) 2150 up_read(&inode->i_alloc_sem); 2151 if (rw_level != -1) 2152 ocfs2_rw_unlock(inode, rw_level); 2153 mlog_exit(ret); 2154 2155 return ret; 2156 } 2157 2158 const struct inode_operations ocfs2_file_iops = { 2159 .setattr = ocfs2_setattr, 2160 .getattr = ocfs2_getattr, 2161 .permission = ocfs2_permission, 2162 .fallocate = ocfs2_fallocate, 2163 }; 2164 2165 const struct inode_operations ocfs2_special_file_iops = { 2166 .setattr = ocfs2_setattr, 2167 .getattr = ocfs2_getattr, 2168 .permission = ocfs2_permission, 2169 }; 2170 2171 const struct file_operations ocfs2_fops = { 2172 .read = do_sync_read, 2173 .write = do_sync_write, 2174 .mmap = ocfs2_mmap, 2175 .fsync = ocfs2_sync_file, 2176 .release = ocfs2_file_release, 2177 .open = ocfs2_file_open, 2178 .aio_read = ocfs2_file_aio_read, 2179 .aio_write = ocfs2_file_aio_write, 2180 .ioctl = ocfs2_ioctl, 2181 #ifdef CONFIG_COMPAT 2182 .compat_ioctl = ocfs2_compat_ioctl, 2183 #endif 2184 .splice_read = ocfs2_file_splice_read, 2185 .splice_write = ocfs2_file_splice_write, 2186 }; 2187 2188 const struct file_operations ocfs2_dops = { 2189 .read = generic_read_dir, 2190 .readdir = ocfs2_readdir, 2191 .fsync = ocfs2_sync_file, 2192 .ioctl = ocfs2_ioctl, 2193 #ifdef CONFIG_COMPAT 2194 .compat_ioctl = ocfs2_compat_ioctl, 2195 #endif 2196 }; 2197