1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_buf_item.h" 26 #include "xfs_sb.h" 27 #include "xfs_ag.h" 28 #include "xfs_dir2.h" 29 #include "xfs_dmapi.h" 30 #include "xfs_mount.h" 31 #include "xfs_trans_priv.h" 32 #include "xfs_bmap_btree.h" 33 #include "xfs_alloc_btree.h" 34 #include "xfs_ialloc_btree.h" 35 #include "xfs_dir2_sf.h" 36 #include "xfs_attr_sf.h" 37 #include "xfs_dinode.h" 38 #include "xfs_inode.h" 39 #include "xfs_inode_item.h" 40 #include "xfs_btree.h" 41 #include "xfs_ialloc.h" 42 #include "xfs_rw.h" 43 #include "xfs_error.h" 44 45 46 kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 47 48 /* 49 * This returns the number of iovecs needed to log the given inode item. 50 * 51 * We need one iovec for the inode log format structure, one for the 52 * inode core, and possibly one for the inode data/extents/b-tree root 53 * and one for the inode attribute data/extents/b-tree root. 54 */ 55 STATIC uint 56 xfs_inode_item_size( 57 xfs_inode_log_item_t *iip) 58 { 59 uint nvecs; 60 xfs_inode_t *ip; 61 62 ip = iip->ili_inode; 63 nvecs = 2; 64 65 /* 66 * Only log the data/extents/b-tree root if there is something 67 * left to log. 68 */ 69 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 70 71 switch (ip->i_d.di_format) { 72 case XFS_DINODE_FMT_EXTENTS: 73 iip->ili_format.ilf_fields &= 74 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 75 XFS_ILOG_DEV | XFS_ILOG_UUID); 76 if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && 77 (ip->i_d.di_nextents > 0) && 78 (ip->i_df.if_bytes > 0)) { 79 ASSERT(ip->i_df.if_u1.if_extents != NULL); 80 nvecs++; 81 } else { 82 iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; 83 } 84 break; 85 86 case XFS_DINODE_FMT_BTREE: 87 ASSERT(ip->i_df.if_ext_max == 88 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); 89 iip->ili_format.ilf_fields &= 90 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 91 XFS_ILOG_DEV | XFS_ILOG_UUID); 92 if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && 93 (ip->i_df.if_broot_bytes > 0)) { 94 ASSERT(ip->i_df.if_broot != NULL); 95 nvecs++; 96 } else { 97 ASSERT(!(iip->ili_format.ilf_fields & 98 XFS_ILOG_DBROOT)); 99 #ifdef XFS_TRANS_DEBUG 100 if (iip->ili_root_size > 0) { 101 ASSERT(iip->ili_root_size == 102 ip->i_df.if_broot_bytes); 103 ASSERT(memcmp(iip->ili_orig_root, 104 ip->i_df.if_broot, 105 iip->ili_root_size) == 0); 106 } else { 107 ASSERT(ip->i_df.if_broot_bytes == 0); 108 } 109 #endif 110 iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; 111 } 112 break; 113 114 case XFS_DINODE_FMT_LOCAL: 115 iip->ili_format.ilf_fields &= 116 ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | 117 XFS_ILOG_DEV | XFS_ILOG_UUID); 118 if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && 119 (ip->i_df.if_bytes > 0)) { 120 ASSERT(ip->i_df.if_u1.if_data != NULL); 121 ASSERT(ip->i_d.di_size > 0); 122 nvecs++; 123 } else { 124 iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; 125 } 126 break; 127 128 case XFS_DINODE_FMT_DEV: 129 iip->ili_format.ilf_fields &= 130 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 131 XFS_ILOG_DEXT | XFS_ILOG_UUID); 132 break; 133 134 case XFS_DINODE_FMT_UUID: 135 iip->ili_format.ilf_fields &= 136 ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 137 XFS_ILOG_DEXT | XFS_ILOG_DEV); 138 break; 139 140 default: 141 ASSERT(0); 142 break; 143 } 144 145 /* 146 * If there are no attributes associated with this file, 147 * then there cannot be anything more to log. 148 * Clear all attribute-related log flags. 149 */ 150 if (!XFS_IFORK_Q(ip)) { 151 iip->ili_format.ilf_fields &= 152 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); 153 return nvecs; 154 } 155 156 /* 157 * Log any necessary attribute data. 158 */ 159 switch (ip->i_d.di_aformat) { 160 case XFS_DINODE_FMT_EXTENTS: 161 iip->ili_format.ilf_fields &= 162 ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); 163 if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && 164 (ip->i_d.di_anextents > 0) && 165 (ip->i_afp->if_bytes > 0)) { 166 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 167 nvecs++; 168 } else { 169 iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; 170 } 171 break; 172 173 case XFS_DINODE_FMT_BTREE: 174 iip->ili_format.ilf_fields &= 175 ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); 176 if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && 177 (ip->i_afp->if_broot_bytes > 0)) { 178 ASSERT(ip->i_afp->if_broot != NULL); 179 nvecs++; 180 } else { 181 iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; 182 } 183 break; 184 185 case XFS_DINODE_FMT_LOCAL: 186 iip->ili_format.ilf_fields &= 187 ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); 188 if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && 189 (ip->i_afp->if_bytes > 0)) { 190 ASSERT(ip->i_afp->if_u1.if_data != NULL); 191 nvecs++; 192 } else { 193 iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; 194 } 195 break; 196 197 default: 198 ASSERT(0); 199 break; 200 } 201 202 return nvecs; 203 } 204 205 /* 206 * This is called to fill in the vector of log iovecs for the 207 * given inode log item. It fills the first item with an inode 208 * log format structure, the second with the on-disk inode structure, 209 * and a possible third and/or fourth with the inode data/extents/b-tree 210 * root and inode attributes data/extents/b-tree root. 211 */ 212 STATIC void 213 xfs_inode_item_format( 214 xfs_inode_log_item_t *iip, 215 xfs_log_iovec_t *log_vector) 216 { 217 uint nvecs; 218 xfs_log_iovec_t *vecp; 219 xfs_inode_t *ip; 220 size_t data_bytes; 221 xfs_bmbt_rec_t *ext_buffer; 222 int nrecs; 223 xfs_mount_t *mp; 224 225 ip = iip->ili_inode; 226 vecp = log_vector; 227 228 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 229 vecp->i_len = sizeof(xfs_inode_log_format_t); 230 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); 231 vecp++; 232 nvecs = 1; 233 234 /* 235 * Clear i_update_core if the timestamps (or any other 236 * non-transactional modification) need flushing/logging 237 * and we're about to log them with the rest of the core. 238 * 239 * This is the same logic as xfs_iflush() but this code can't 240 * run at the same time as xfs_iflush because we're in commit 241 * processing here and so we have the inode lock held in 242 * exclusive mode. Although it doesn't really matter 243 * for the timestamps if both routines were to grab the 244 * timestamps or not. That would be ok. 245 * 246 * We clear i_update_core before copying out the data. 247 * This is for coordination with our timestamp updates 248 * that don't hold the inode lock. They will always 249 * update the timestamps BEFORE setting i_update_core, 250 * so if we clear i_update_core after they set it we 251 * are guaranteed to see their updates to the timestamps 252 * either here. Likewise, if they set it after we clear it 253 * here, we'll see it either on the next commit of this 254 * inode or the next time the inode gets flushed via 255 * xfs_iflush(). This depends on strongly ordered memory 256 * semantics, but we have that. We use the SYNCHRONIZE 257 * macro to make sure that the compiler does not reorder 258 * the i_update_core access below the data copy below. 259 */ 260 if (ip->i_update_core) { 261 ip->i_update_core = 0; 262 SYNCHRONIZE(); 263 } 264 265 /* 266 * We don't have to worry about re-ordering here because 267 * the update_size field is protected by the inode lock 268 * and we have that held in exclusive mode. 269 */ 270 if (ip->i_update_size) 271 ip->i_update_size = 0; 272 273 /* 274 * Make sure to get the latest atime from the Linux inode. 275 */ 276 xfs_synchronize_atime(ip); 277 278 /* 279 * make sure the linux inode is dirty 280 */ 281 xfs_mark_inode_dirty_sync(ip); 282 283 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 284 vecp->i_len = sizeof(xfs_dinode_core_t); 285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 286 vecp++; 287 nvecs++; 288 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 289 290 /* 291 * If this is really an old format inode, then we need to 292 * log it as such. This means that we have to copy the link 293 * count from the new field to the old. We don't have to worry 294 * about the new fields, because nothing trusts them as long as 295 * the old inode version number is there. If the superblock already 296 * has a new version number, then we don't bother converting back. 297 */ 298 mp = ip->i_mount; 299 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 300 xfs_sb_version_hasnlink(&mp->m_sb)); 301 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 302 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 303 /* 304 * Convert it back. 305 */ 306 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 307 ip->i_d.di_onlink = ip->i_d.di_nlink; 308 } else { 309 /* 310 * The superblock version has already been bumped, 311 * so just make the conversion to the new inode 312 * format permanent. 313 */ 314 ip->i_d.di_version = XFS_DINODE_VERSION_2; 315 ip->i_d.di_onlink = 0; 316 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 317 } 318 } 319 320 switch (ip->i_d.di_format) { 321 case XFS_DINODE_FMT_EXTENTS: 322 ASSERT(!(iip->ili_format.ilf_fields & 323 (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | 324 XFS_ILOG_DEV | XFS_ILOG_UUID))); 325 if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { 326 ASSERT(ip->i_df.if_bytes > 0); 327 ASSERT(ip->i_df.if_u1.if_extents != NULL); 328 ASSERT(ip->i_d.di_nextents > 0); 329 ASSERT(iip->ili_extents_buf == NULL); 330 nrecs = ip->i_df.if_bytes / 331 (uint)sizeof(xfs_bmbt_rec_t); 332 ASSERT(nrecs > 0); 333 #ifdef XFS_NATIVE_HOST 334 if (nrecs == ip->i_d.di_nextents) { 335 /* 336 * There are no delayed allocation 337 * extents, so just point to the 338 * real extents array. 339 */ 340 vecp->i_addr = 341 (char *)(ip->i_df.if_u1.if_extents); 342 vecp->i_len = ip->i_df.if_bytes; 343 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 344 } else 345 #endif 346 { 347 /* 348 * There are delayed allocation extents 349 * in the inode, or we need to convert 350 * the extents to on disk format. 351 * Use xfs_iextents_copy() 352 * to copy only the real extents into 353 * a separate buffer. We'll free the 354 * buffer in the unlock routine. 355 */ 356 ext_buffer = kmem_alloc(ip->i_df.if_bytes, 357 KM_SLEEP); 358 iip->ili_extents_buf = ext_buffer; 359 vecp->i_addr = (xfs_caddr_t)ext_buffer; 360 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 361 XFS_DATA_FORK); 362 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 363 } 364 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 365 iip->ili_format.ilf_dsize = vecp->i_len; 366 vecp++; 367 nvecs++; 368 } 369 break; 370 371 case XFS_DINODE_FMT_BTREE: 372 ASSERT(!(iip->ili_format.ilf_fields & 373 (XFS_ILOG_DDATA | XFS_ILOG_DEXT | 374 XFS_ILOG_DEV | XFS_ILOG_UUID))); 375 if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { 376 ASSERT(ip->i_df.if_broot_bytes > 0); 377 ASSERT(ip->i_df.if_broot != NULL); 378 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 379 vecp->i_len = ip->i_df.if_broot_bytes; 380 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); 381 vecp++; 382 nvecs++; 383 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 384 } 385 break; 386 387 case XFS_DINODE_FMT_LOCAL: 388 ASSERT(!(iip->ili_format.ilf_fields & 389 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 390 XFS_ILOG_DEV | XFS_ILOG_UUID))); 391 if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { 392 ASSERT(ip->i_df.if_bytes > 0); 393 ASSERT(ip->i_df.if_u1.if_data != NULL); 394 ASSERT(ip->i_d.di_size > 0); 395 396 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; 397 /* 398 * Round i_bytes up to a word boundary. 399 * The underlying memory is guaranteed to 400 * to be there by xfs_idata_realloc(). 401 */ 402 data_bytes = roundup(ip->i_df.if_bytes, 4); 403 ASSERT((ip->i_df.if_real_bytes == 0) || 404 (ip->i_df.if_real_bytes == data_bytes)); 405 vecp->i_len = (int)data_bytes; 406 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); 407 vecp++; 408 nvecs++; 409 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 410 } 411 break; 412 413 case XFS_DINODE_FMT_DEV: 414 ASSERT(!(iip->ili_format.ilf_fields & 415 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 416 XFS_ILOG_DDATA | XFS_ILOG_UUID))); 417 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 418 iip->ili_format.ilf_u.ilfu_rdev = 419 ip->i_df.if_u2.if_rdev; 420 } 421 break; 422 423 case XFS_DINODE_FMT_UUID: 424 ASSERT(!(iip->ili_format.ilf_fields & 425 (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | 426 XFS_ILOG_DDATA | XFS_ILOG_DEV))); 427 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 428 iip->ili_format.ilf_u.ilfu_uuid = 429 ip->i_df.if_u2.if_uuid; 430 } 431 break; 432 433 default: 434 ASSERT(0); 435 break; 436 } 437 438 /* 439 * If there are no attributes associated with the file, 440 * then we're done. 441 * Assert that no attribute-related log flags are set. 442 */ 443 if (!XFS_IFORK_Q(ip)) { 444 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 445 iip->ili_format.ilf_size = nvecs; 446 ASSERT(!(iip->ili_format.ilf_fields & 447 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 448 return; 449 } 450 451 switch (ip->i_d.di_aformat) { 452 case XFS_DINODE_FMT_EXTENTS: 453 ASSERT(!(iip->ili_format.ilf_fields & 454 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); 455 if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { 456 ASSERT(ip->i_afp->if_bytes > 0); 457 ASSERT(ip->i_afp->if_u1.if_extents != NULL); 458 ASSERT(ip->i_d.di_anextents > 0); 459 #ifdef DEBUG 460 nrecs = ip->i_afp->if_bytes / 461 (uint)sizeof(xfs_bmbt_rec_t); 462 #endif 463 ASSERT(nrecs > 0); 464 ASSERT(nrecs == ip->i_d.di_anextents); 465 #ifdef XFS_NATIVE_HOST 466 /* 467 * There are not delayed allocation extents 468 * for attributes, so just point at the array. 469 */ 470 vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); 471 vecp->i_len = ip->i_afp->if_bytes; 472 #else 473 ASSERT(iip->ili_aextents_buf == NULL); 474 /* 475 * Need to endian flip before logging 476 */ 477 ext_buffer = kmem_alloc(ip->i_afp->if_bytes, 478 KM_SLEEP); 479 iip->ili_aextents_buf = ext_buffer; 480 vecp->i_addr = (xfs_caddr_t)ext_buffer; 481 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 482 XFS_ATTR_FORK); 483 #endif 484 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); 485 iip->ili_format.ilf_asize = vecp->i_len; 486 vecp++; 487 nvecs++; 488 } 489 break; 490 491 case XFS_DINODE_FMT_BTREE: 492 ASSERT(!(iip->ili_format.ilf_fields & 493 (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); 494 if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { 495 ASSERT(ip->i_afp->if_broot_bytes > 0); 496 ASSERT(ip->i_afp->if_broot != NULL); 497 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 498 vecp->i_len = ip->i_afp->if_broot_bytes; 499 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); 500 vecp++; 501 nvecs++; 502 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 503 } 504 break; 505 506 case XFS_DINODE_FMT_LOCAL: 507 ASSERT(!(iip->ili_format.ilf_fields & 508 (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 509 if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { 510 ASSERT(ip->i_afp->if_bytes > 0); 511 ASSERT(ip->i_afp->if_u1.if_data != NULL); 512 513 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; 514 /* 515 * Round i_bytes up to a word boundary. 516 * The underlying memory is guaranteed to 517 * to be there by xfs_idata_realloc(). 518 */ 519 data_bytes = roundup(ip->i_afp->if_bytes, 4); 520 ASSERT((ip->i_afp->if_real_bytes == 0) || 521 (ip->i_afp->if_real_bytes == data_bytes)); 522 vecp->i_len = (int)data_bytes; 523 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); 524 vecp++; 525 nvecs++; 526 iip->ili_format.ilf_asize = (unsigned)data_bytes; 527 } 528 break; 529 530 default: 531 ASSERT(0); 532 break; 533 } 534 535 ASSERT(nvecs == iip->ili_item.li_desc->lid_size); 536 iip->ili_format.ilf_size = nvecs; 537 } 538 539 540 /* 541 * This is called to pin the inode associated with the inode log 542 * item in memory so it cannot be written out. Do this by calling 543 * xfs_ipin() to bump the pin count in the inode while holding the 544 * inode pin lock. 545 */ 546 STATIC void 547 xfs_inode_item_pin( 548 xfs_inode_log_item_t *iip) 549 { 550 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 551 xfs_ipin(iip->ili_inode); 552 } 553 554 555 /* 556 * This is called to unpin the inode associated with the inode log 557 * item which was previously pinned with a call to xfs_inode_item_pin(). 558 * Just call xfs_iunpin() on the inode to do this. 559 */ 560 /* ARGSUSED */ 561 STATIC void 562 xfs_inode_item_unpin( 563 xfs_inode_log_item_t *iip, 564 int stale) 565 { 566 xfs_iunpin(iip->ili_inode); 567 } 568 569 /* ARGSUSED */ 570 STATIC void 571 xfs_inode_item_unpin_remove( 572 xfs_inode_log_item_t *iip, 573 xfs_trans_t *tp) 574 { 575 xfs_iunpin(iip->ili_inode); 576 } 577 578 /* 579 * This is called to attempt to lock the inode associated with this 580 * inode log item, in preparation for the push routine which does the actual 581 * iflush. Don't sleep on the inode lock or the flush lock. 582 * 583 * If the flush lock is already held, indicating that the inode has 584 * been or is in the process of being flushed, then (ideally) we'd like to 585 * see if the inode's buffer is still incore, and if so give it a nudge. 586 * We delay doing so until the pushbuf routine, though, to avoid holding 587 * the AIL lock across a call to the blackhole which is the buffer cache. 588 * Also we don't want to sleep in any device strategy routines, which can happen 589 * if we do the subsequent bawrite in here. 590 */ 591 STATIC uint 592 xfs_inode_item_trylock( 593 xfs_inode_log_item_t *iip) 594 { 595 register xfs_inode_t *ip; 596 597 ip = iip->ili_inode; 598 599 if (xfs_ipincount(ip) > 0) { 600 return XFS_ITEM_PINNED; 601 } 602 603 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 604 return XFS_ITEM_LOCKED; 605 } 606 607 if (!xfs_iflock_nowait(ip)) { 608 /* 609 * If someone else isn't already trying to push the inode 610 * buffer, we get to do it. 611 */ 612 if (iip->ili_pushbuf_flag == 0) { 613 iip->ili_pushbuf_flag = 1; 614 #ifdef DEBUG 615 iip->ili_push_owner = current_pid(); 616 #endif 617 /* 618 * Inode is left locked in shared mode. 619 * Pushbuf routine gets to unlock it. 620 */ 621 return XFS_ITEM_PUSHBUF; 622 } else { 623 /* 624 * We hold the AIL lock, so we must specify the 625 * NONOTIFY flag so that we won't double trip. 626 */ 627 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 628 return XFS_ITEM_FLUSHING; 629 } 630 /* NOTREACHED */ 631 } 632 633 /* Stale items should force out the iclog */ 634 if (ip->i_flags & XFS_ISTALE) { 635 xfs_ifunlock(ip); 636 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 637 return XFS_ITEM_PINNED; 638 } 639 640 #ifdef DEBUG 641 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 642 ASSERT(iip->ili_format.ilf_fields != 0); 643 ASSERT(iip->ili_logged == 0); 644 ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); 645 } 646 #endif 647 return XFS_ITEM_SUCCESS; 648 } 649 650 /* 651 * Unlock the inode associated with the inode log item. 652 * Clear the fields of the inode and inode log item that 653 * are specific to the current transaction. If the 654 * hold flags is set, do not unlock the inode. 655 */ 656 STATIC void 657 xfs_inode_item_unlock( 658 xfs_inode_log_item_t *iip) 659 { 660 uint hold; 661 uint iolocked; 662 uint lock_flags; 663 xfs_inode_t *ip; 664 665 ASSERT(iip != NULL); 666 ASSERT(iip->ili_inode->i_itemp != NULL); 667 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 669 XFS_ILI_IOLOCKED_EXCL)) || 670 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL)); 671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 672 XFS_ILI_IOLOCKED_SHARED)) || 673 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED)); 674 /* 675 * Clear the transaction pointer in the inode. 676 */ 677 ip = iip->ili_inode; 678 ip->i_transp = NULL; 679 680 /* 681 * If the inode needed a separate buffer with which to log 682 * its extents, then free it now. 683 */ 684 if (iip->ili_extents_buf != NULL) { 685 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); 686 ASSERT(ip->i_d.di_nextents > 0); 687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 688 ASSERT(ip->i_df.if_bytes > 0); 689 kmem_free(iip->ili_extents_buf); 690 iip->ili_extents_buf = NULL; 691 } 692 if (iip->ili_aextents_buf != NULL) { 693 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); 694 ASSERT(ip->i_d.di_anextents > 0); 695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 696 ASSERT(ip->i_afp->if_bytes > 0); 697 kmem_free(iip->ili_aextents_buf); 698 iip->ili_aextents_buf = NULL; 699 } 700 701 /* 702 * Figure out if we should unlock the inode or not. 703 */ 704 hold = iip->ili_flags & XFS_ILI_HOLD; 705 706 /* 707 * Before clearing out the flags, remember whether we 708 * are holding the inode's IO lock. 709 */ 710 iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY; 711 712 /* 713 * Clear out the fields of the inode log item particular 714 * to the current transaction. 715 */ 716 iip->ili_ilock_recur = 0; 717 iip->ili_iolock_recur = 0; 718 iip->ili_flags = 0; 719 720 /* 721 * Unlock the inode if XFS_ILI_HOLD was not set. 722 */ 723 if (!hold) { 724 lock_flags = XFS_ILOCK_EXCL; 725 if (iolocked & XFS_ILI_IOLOCKED_EXCL) { 726 lock_flags |= XFS_IOLOCK_EXCL; 727 } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) { 728 lock_flags |= XFS_IOLOCK_SHARED; 729 } 730 xfs_iput(iip->ili_inode, lock_flags); 731 } 732 } 733 734 /* 735 * This is called to find out where the oldest active copy of the 736 * inode log item in the on disk log resides now that the last log 737 * write of it completed at the given lsn. Since we always re-log 738 * all dirty data in an inode, the latest copy in the on disk log 739 * is the only one that matters. Therefore, simply return the 740 * given lsn. 741 */ 742 /*ARGSUSED*/ 743 STATIC xfs_lsn_t 744 xfs_inode_item_committed( 745 xfs_inode_log_item_t *iip, 746 xfs_lsn_t lsn) 747 { 748 return (lsn); 749 } 750 751 /* 752 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 753 * failed to get the inode flush lock but did get the inode locked SHARED. 754 * Here we're trying to see if the inode buffer is incore, and if so whether it's 755 * marked delayed write. If that's the case, we'll initiate a bawrite on that 756 * buffer to expedite the process. 757 * 758 * We aren't holding the AIL lock (or the flush lock) when this gets called, 759 * so it is inherently race-y. 760 */ 761 STATIC void 762 xfs_inode_item_pushbuf( 763 xfs_inode_log_item_t *iip) 764 { 765 xfs_inode_t *ip; 766 xfs_mount_t *mp; 767 xfs_buf_t *bp; 768 uint dopush; 769 770 ip = iip->ili_inode; 771 772 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 773 774 /* 775 * The ili_pushbuf_flag keeps others from 776 * trying to duplicate our effort. 777 */ 778 ASSERT(iip->ili_pushbuf_flag != 0); 779 ASSERT(iip->ili_push_owner == current_pid()); 780 781 /* 782 * If a flush is not in progress anymore, chances are that the 783 * inode was taken off the AIL. So, just get out. 784 */ 785 if (completion_done(&ip->i_flush) || 786 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 787 iip->ili_pushbuf_flag = 0; 788 xfs_iunlock(ip, XFS_ILOCK_SHARED); 789 return; 790 } 791 792 mp = ip->i_mount; 793 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 794 iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); 795 796 if (bp != NULL) { 797 if (XFS_BUF_ISDELAYWRITE(bp)) { 798 /* 799 * We were racing with iflush because we don't hold 800 * the AIL lock or the flush lock. However, at this point, 801 * we have the buffer, and we know that it's dirty. 802 * So, it's possible that iflush raced with us, and 803 * this item is already taken off the AIL. 804 * If not, we can flush it async. 805 */ 806 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && 807 !completion_done(&ip->i_flush)); 808 iip->ili_pushbuf_flag = 0; 809 xfs_iunlock(ip, XFS_ILOCK_SHARED); 810 xfs_buftrace("INODE ITEM PUSH", bp); 811 if (XFS_BUF_ISPINNED(bp)) { 812 xfs_log_force(mp, (xfs_lsn_t)0, 813 XFS_LOG_FORCE); 814 } 815 if (dopush) { 816 int error; 817 error = xfs_bawrite(mp, bp); 818 if (error) 819 xfs_fs_cmn_err(CE_WARN, mp, 820 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", 821 error, iip, bp); 822 } else { 823 xfs_buf_relse(bp); 824 } 825 } else { 826 iip->ili_pushbuf_flag = 0; 827 xfs_iunlock(ip, XFS_ILOCK_SHARED); 828 xfs_buf_relse(bp); 829 } 830 return; 831 } 832 /* 833 * We have to be careful about resetting pushbuf flag too early (above). 834 * Even though in theory we can do it as soon as we have the buflock, 835 * we don't want others to be doing work needlessly. They'll come to 836 * this function thinking that pushing the buffer is their 837 * responsibility only to find that the buffer is still locked by 838 * another doing the same thing 839 */ 840 iip->ili_pushbuf_flag = 0; 841 xfs_iunlock(ip, XFS_ILOCK_SHARED); 842 return; 843 } 844 845 846 /* 847 * This is called to asynchronously write the inode associated with this 848 * inode log item out to disk. The inode will already have been locked by 849 * a successful call to xfs_inode_item_trylock(). 850 */ 851 STATIC void 852 xfs_inode_item_push( 853 xfs_inode_log_item_t *iip) 854 { 855 xfs_inode_t *ip; 856 857 ip = iip->ili_inode; 858 859 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 860 ASSERT(!completion_done(&ip->i_flush)); 861 /* 862 * Since we were able to lock the inode's flush lock and 863 * we found it on the AIL, the inode must be dirty. This 864 * is because the inode is removed from the AIL while still 865 * holding the flush lock in xfs_iflush_done(). Thus, if 866 * we found it in the AIL and were able to obtain the flush 867 * lock without sleeping, then there must not have been 868 * anyone in the process of flushing the inode. 869 */ 870 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || 871 iip->ili_format.ilf_fields != 0); 872 873 /* 874 * Write out the inode. The completion routine ('iflush_done') will 875 * pull it from the AIL, mark it clean, unlock the flush lock. 876 */ 877 (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); 878 xfs_iunlock(ip, XFS_ILOCK_SHARED); 879 880 return; 881 } 882 883 /* 884 * XXX rcc - this one really has to do something. Probably needs 885 * to stamp in a new field in the incore inode. 886 */ 887 /* ARGSUSED */ 888 STATIC void 889 xfs_inode_item_committing( 890 xfs_inode_log_item_t *iip, 891 xfs_lsn_t lsn) 892 { 893 iip->ili_last_lsn = lsn; 894 return; 895 } 896 897 /* 898 * This is the ops vector shared by all buf log items. 899 */ 900 static struct xfs_item_ops xfs_inode_item_ops = { 901 .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, 902 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 903 xfs_inode_item_format, 904 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, 905 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, 906 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 907 xfs_inode_item_unpin_remove, 908 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, 909 .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock, 910 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 911 xfs_inode_item_committed, 912 .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push, 913 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf, 914 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 915 xfs_inode_item_committing 916 }; 917 918 919 /* 920 * Initialize the inode log item for a newly allocated (in-core) inode. 921 */ 922 void 923 xfs_inode_item_init( 924 xfs_inode_t *ip, 925 xfs_mount_t *mp) 926 { 927 xfs_inode_log_item_t *iip; 928 929 ASSERT(ip->i_itemp == NULL); 930 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); 931 932 iip->ili_item.li_type = XFS_LI_INODE; 933 iip->ili_item.li_ops = &xfs_inode_item_ops; 934 iip->ili_item.li_mountp = mp; 935 iip->ili_inode = ip; 936 937 /* 938 We have zeroed memory. No need ... 939 iip->ili_extents_buf = NULL; 940 iip->ili_pushbuf_flag = 0; 941 */ 942 943 iip->ili_format.ilf_type = XFS_LI_INODE; 944 iip->ili_format.ilf_ino = ip->i_ino; 945 iip->ili_format.ilf_blkno = ip->i_blkno; 946 iip->ili_format.ilf_len = ip->i_len; 947 iip->ili_format.ilf_boffset = ip->i_boffset; 948 } 949 950 /* 951 * Free the inode log item and any memory hanging off of it. 952 */ 953 void 954 xfs_inode_item_destroy( 955 xfs_inode_t *ip) 956 { 957 #ifdef XFS_TRANS_DEBUG 958 if (ip->i_itemp->ili_root_size != 0) { 959 kmem_free(ip->i_itemp->ili_orig_root); 960 } 961 #endif 962 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 963 } 964 965 966 /* 967 * This is the inode flushing I/O completion routine. It is called 968 * from interrupt level when the buffer containing the inode is 969 * flushed to disk. It is responsible for removing the inode item 970 * from the AIL if it has not been re-logged, and unlocking the inode's 971 * flush lock. 972 */ 973 /*ARGSUSED*/ 974 void 975 xfs_iflush_done( 976 xfs_buf_t *bp, 977 xfs_inode_log_item_t *iip) 978 { 979 xfs_inode_t *ip; 980 981 ip = iip->ili_inode; 982 983 /* 984 * We only want to pull the item from the AIL if it is 985 * actually there and its location in the log has not 986 * changed since we started the flush. Thus, we only bother 987 * if the ili_logged flag is set and the inode's lsn has not 988 * changed. First we check the lsn outside 989 * the lock since it's cheaper, and then we recheck while 990 * holding the lock before removing the inode from the AIL. 991 */ 992 if (iip->ili_logged && 993 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { 994 spin_lock(&ip->i_mount->m_ail_lock); 995 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 996 /* 997 * xfs_trans_delete_ail() drops the AIL lock. 998 */ 999 xfs_trans_delete_ail(ip->i_mount, 1000 (xfs_log_item_t*)iip); 1001 } else { 1002 spin_unlock(&ip->i_mount->m_ail_lock); 1003 } 1004 } 1005 1006 iip->ili_logged = 0; 1007 1008 /* 1009 * Clear the ili_last_fields bits now that we know that the 1010 * data corresponding to them is safely on disk. 1011 */ 1012 iip->ili_last_fields = 0; 1013 1014 /* 1015 * Release the inode's flush lock since we're done with it. 1016 */ 1017 xfs_ifunlock(ip); 1018 1019 return; 1020 } 1021 1022 /* 1023 * This is the inode flushing abort routine. It is called 1024 * from xfs_iflush when the filesystem is shutting down to clean 1025 * up the inode state. 1026 * It is responsible for removing the inode item 1027 * from the AIL if it has not been re-logged, and unlocking the inode's 1028 * flush lock. 1029 */ 1030 void 1031 xfs_iflush_abort( 1032 xfs_inode_t *ip) 1033 { 1034 xfs_inode_log_item_t *iip; 1035 xfs_mount_t *mp; 1036 1037 iip = ip->i_itemp; 1038 mp = ip->i_mount; 1039 if (iip) { 1040 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1041 spin_lock(&mp->m_ail_lock); 1042 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1043 /* 1044 * xfs_trans_delete_ail() drops the AIL lock. 1045 */ 1046 xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); 1047 } else 1048 spin_unlock(&mp->m_ail_lock); 1049 } 1050 iip->ili_logged = 0; 1051 /* 1052 * Clear the ili_last_fields bits now that we know that the 1053 * data corresponding to them is safely on disk. 1054 */ 1055 iip->ili_last_fields = 0; 1056 /* 1057 * Clear the inode logging fields so no more flushes are 1058 * attempted. 1059 */ 1060 iip->ili_format.ilf_fields = 0; 1061 } 1062 /* 1063 * Release the inode's flush lock since we're done with it. 1064 */ 1065 xfs_ifunlock(ip); 1066 } 1067 1068 void 1069 xfs_istale_done( 1070 xfs_buf_t *bp, 1071 xfs_inode_log_item_t *iip) 1072 { 1073 xfs_iflush_abort(iip->ili_inode); 1074 } 1075 1076 /* 1077 * convert an xfs_inode_log_format struct from either 32 or 64 bit versions 1078 * (which can have different field alignments) to the native version 1079 */ 1080 int 1081 xfs_inode_item_format_convert( 1082 xfs_log_iovec_t *buf, 1083 xfs_inode_log_format_t *in_f) 1084 { 1085 if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { 1086 xfs_inode_log_format_32_t *in_f32; 1087 1088 in_f32 = (xfs_inode_log_format_32_t *)buf->i_addr; 1089 in_f->ilf_type = in_f32->ilf_type; 1090 in_f->ilf_size = in_f32->ilf_size; 1091 in_f->ilf_fields = in_f32->ilf_fields; 1092 in_f->ilf_asize = in_f32->ilf_asize; 1093 in_f->ilf_dsize = in_f32->ilf_dsize; 1094 in_f->ilf_ino = in_f32->ilf_ino; 1095 /* copy biggest field of ilf_u */ 1096 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, 1097 in_f32->ilf_u.ilfu_uuid.__u_bits, 1098 sizeof(uuid_t)); 1099 in_f->ilf_blkno = in_f32->ilf_blkno; 1100 in_f->ilf_len = in_f32->ilf_len; 1101 in_f->ilf_boffset = in_f32->ilf_boffset; 1102 return 0; 1103 } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ 1104 xfs_inode_log_format_64_t *in_f64; 1105 1106 in_f64 = (xfs_inode_log_format_64_t *)buf->i_addr; 1107 in_f->ilf_type = in_f64->ilf_type; 1108 in_f->ilf_size = in_f64->ilf_size; 1109 in_f->ilf_fields = in_f64->ilf_fields; 1110 in_f->ilf_asize = in_f64->ilf_asize; 1111 in_f->ilf_dsize = in_f64->ilf_dsize; 1112 in_f->ilf_ino = in_f64->ilf_ino; 1113 /* copy biggest field of ilf_u */ 1114 memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, 1115 in_f64->ilf_u.ilfu_uuid.__u_bits, 1116 sizeof(uuid_t)); 1117 in_f->ilf_blkno = in_f64->ilf_blkno; 1118 in_f->ilf_len = in_f64->ilf_len; 1119 in_f->ilf_boffset = in_f64->ilf_boffset; 1120 return 0; 1121 } 1122 return EFSCORRUPTED; 1123 } 1124