1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_format.h" 23 #include "xfs_log_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_bit.h" 26 #include "xfs_shared.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_trans.h" 30 #include "xfs_trans_priv.h" 31 #include "xfs_buf_item.h" 32 #include "xfs_refcount_item.h" 33 #include "xfs_log.h" 34 #include "xfs_refcount.h" 35 36 37 kmem_zone_t *xfs_cui_zone; 38 kmem_zone_t *xfs_cud_zone; 39 40 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 41 { 42 return container_of(lip, struct xfs_cui_log_item, cui_item); 43 } 44 45 void 46 xfs_cui_item_free( 47 struct xfs_cui_log_item *cuip) 48 { 49 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 50 kmem_free(cuip); 51 else 52 kmem_zone_free(xfs_cui_zone, cuip); 53 } 54 55 STATIC void 56 xfs_cui_item_size( 57 struct xfs_log_item *lip, 58 int *nvecs, 59 int *nbytes) 60 { 61 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 62 63 *nvecs += 1; 64 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 65 } 66 67 /* 68 * This is called to fill in the vector of log iovecs for the 69 * given cui log item. We use only 1 iovec, and we point that 70 * at the cui_log_format structure embedded in the cui item. 71 * It is at this point that we assert that all of the extent 72 * slots in the cui item have been filled. 73 */ 74 STATIC void 75 xfs_cui_item_format( 76 struct xfs_log_item *lip, 77 struct xfs_log_vec *lv) 78 { 79 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 80 struct xfs_log_iovec *vecp = NULL; 81 82 ASSERT(atomic_read(&cuip->cui_next_extent) == 83 cuip->cui_format.cui_nextents); 84 85 cuip->cui_format.cui_type = XFS_LI_CUI; 86 cuip->cui_format.cui_size = 1; 87 88 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 89 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 90 } 91 92 /* 93 * Pinning has no meaning for an cui item, so just return. 94 */ 95 STATIC void 96 xfs_cui_item_pin( 97 struct xfs_log_item *lip) 98 { 99 } 100 101 /* 102 * The unpin operation is the last place an CUI is manipulated in the log. It is 103 * either inserted in the AIL or aborted in the event of a log I/O error. In 104 * either case, the CUI transaction has been successfully committed to make it 105 * this far. Therefore, we expect whoever committed the CUI to either construct 106 * and commit the CUD or drop the CUD's reference in the event of error. Simply 107 * drop the log's CUI reference now that the log is done with it. 108 */ 109 STATIC void 110 xfs_cui_item_unpin( 111 struct xfs_log_item *lip, 112 int remove) 113 { 114 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 115 116 xfs_cui_release(cuip); 117 } 118 119 /* 120 * CUI items have no locking or pushing. However, since CUIs are pulled from 121 * the AIL when their corresponding CUDs are committed to disk, their situation 122 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 123 * will eventually flush the log. This should help in getting the CUI out of 124 * the AIL. 125 */ 126 STATIC uint 127 xfs_cui_item_push( 128 struct xfs_log_item *lip, 129 struct list_head *buffer_list) 130 { 131 return XFS_ITEM_PINNED; 132 } 133 134 /* 135 * The CUI has been either committed or aborted if the transaction has been 136 * cancelled. If the transaction was cancelled, an CUD isn't going to be 137 * constructed and thus we free the CUI here directly. 138 */ 139 STATIC void 140 xfs_cui_item_unlock( 141 struct xfs_log_item *lip) 142 { 143 if (lip->li_flags & XFS_LI_ABORTED) 144 xfs_cui_item_free(CUI_ITEM(lip)); 145 } 146 147 /* 148 * The CUI is logged only once and cannot be moved in the log, so simply return 149 * the lsn at which it's been logged. 150 */ 151 STATIC xfs_lsn_t 152 xfs_cui_item_committed( 153 struct xfs_log_item *lip, 154 xfs_lsn_t lsn) 155 { 156 return lsn; 157 } 158 159 /* 160 * The CUI dependency tracking op doesn't do squat. It can't because 161 * it doesn't know where the free extent is coming from. The dependency 162 * tracking has to be handled by the "enclosing" metadata object. For 163 * example, for inodes, the inode is locked throughout the extent freeing 164 * so the dependency should be recorded there. 165 */ 166 STATIC void 167 xfs_cui_item_committing( 168 struct xfs_log_item *lip, 169 xfs_lsn_t lsn) 170 { 171 } 172 173 /* 174 * This is the ops vector shared by all cui log items. 175 */ 176 static const struct xfs_item_ops xfs_cui_item_ops = { 177 .iop_size = xfs_cui_item_size, 178 .iop_format = xfs_cui_item_format, 179 .iop_pin = xfs_cui_item_pin, 180 .iop_unpin = xfs_cui_item_unpin, 181 .iop_unlock = xfs_cui_item_unlock, 182 .iop_committed = xfs_cui_item_committed, 183 .iop_push = xfs_cui_item_push, 184 .iop_committing = xfs_cui_item_committing, 185 }; 186 187 /* 188 * Allocate and initialize an cui item with the given number of extents. 189 */ 190 struct xfs_cui_log_item * 191 xfs_cui_init( 192 struct xfs_mount *mp, 193 uint nextents) 194 195 { 196 struct xfs_cui_log_item *cuip; 197 198 ASSERT(nextents > 0); 199 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 200 cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 201 KM_SLEEP); 202 else 203 cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP); 204 205 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 206 cuip->cui_format.cui_nextents = nextents; 207 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 208 atomic_set(&cuip->cui_next_extent, 0); 209 atomic_set(&cuip->cui_refcount, 2); 210 211 return cuip; 212 } 213 214 /* 215 * Freeing the CUI requires that we remove it from the AIL if it has already 216 * been placed there. However, the CUI may not yet have been placed in the AIL 217 * when called by xfs_cui_release() from CUD processing due to the ordering of 218 * committed vs unpin operations in bulk insert operations. Hence the reference 219 * count to ensure only the last caller frees the CUI. 220 */ 221 void 222 xfs_cui_release( 223 struct xfs_cui_log_item *cuip) 224 { 225 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 226 if (atomic_dec_and_test(&cuip->cui_refcount)) { 227 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); 228 xfs_cui_item_free(cuip); 229 } 230 } 231 232 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 233 { 234 return container_of(lip, struct xfs_cud_log_item, cud_item); 235 } 236 237 STATIC void 238 xfs_cud_item_size( 239 struct xfs_log_item *lip, 240 int *nvecs, 241 int *nbytes) 242 { 243 *nvecs += 1; 244 *nbytes += sizeof(struct xfs_cud_log_format); 245 } 246 247 /* 248 * This is called to fill in the vector of log iovecs for the 249 * given cud log item. We use only 1 iovec, and we point that 250 * at the cud_log_format structure embedded in the cud item. 251 * It is at this point that we assert that all of the extent 252 * slots in the cud item have been filled. 253 */ 254 STATIC void 255 xfs_cud_item_format( 256 struct xfs_log_item *lip, 257 struct xfs_log_vec *lv) 258 { 259 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 260 struct xfs_log_iovec *vecp = NULL; 261 262 cudp->cud_format.cud_type = XFS_LI_CUD; 263 cudp->cud_format.cud_size = 1; 264 265 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 266 sizeof(struct xfs_cud_log_format)); 267 } 268 269 /* 270 * Pinning has no meaning for an cud item, so just return. 271 */ 272 STATIC void 273 xfs_cud_item_pin( 274 struct xfs_log_item *lip) 275 { 276 } 277 278 /* 279 * Since pinning has no meaning for an cud item, unpinning does 280 * not either. 281 */ 282 STATIC void 283 xfs_cud_item_unpin( 284 struct xfs_log_item *lip, 285 int remove) 286 { 287 } 288 289 /* 290 * There isn't much you can do to push on an cud item. It is simply stuck 291 * waiting for the log to be flushed to disk. 292 */ 293 STATIC uint 294 xfs_cud_item_push( 295 struct xfs_log_item *lip, 296 struct list_head *buffer_list) 297 { 298 return XFS_ITEM_PINNED; 299 } 300 301 /* 302 * The CUD is either committed or aborted if the transaction is cancelled. If 303 * the transaction is cancelled, drop our reference to the CUI and free the 304 * CUD. 305 */ 306 STATIC void 307 xfs_cud_item_unlock( 308 struct xfs_log_item *lip) 309 { 310 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 311 312 if (lip->li_flags & XFS_LI_ABORTED) { 313 xfs_cui_release(cudp->cud_cuip); 314 kmem_zone_free(xfs_cud_zone, cudp); 315 } 316 } 317 318 /* 319 * When the cud item is committed to disk, all we need to do is delete our 320 * reference to our partner cui item and then free ourselves. Since we're 321 * freeing ourselves we must return -1 to keep the transaction code from 322 * further referencing this item. 323 */ 324 STATIC xfs_lsn_t 325 xfs_cud_item_committed( 326 struct xfs_log_item *lip, 327 xfs_lsn_t lsn) 328 { 329 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 330 331 /* 332 * Drop the CUI reference regardless of whether the CUD has been 333 * aborted. Once the CUD transaction is constructed, it is the sole 334 * responsibility of the CUD to release the CUI (even if the CUI is 335 * aborted due to log I/O error). 336 */ 337 xfs_cui_release(cudp->cud_cuip); 338 kmem_zone_free(xfs_cud_zone, cudp); 339 340 return (xfs_lsn_t)-1; 341 } 342 343 /* 344 * The CUD dependency tracking op doesn't do squat. It can't because 345 * it doesn't know where the free extent is coming from. The dependency 346 * tracking has to be handled by the "enclosing" metadata object. For 347 * example, for inodes, the inode is locked throughout the extent freeing 348 * so the dependency should be recorded there. 349 */ 350 STATIC void 351 xfs_cud_item_committing( 352 struct xfs_log_item *lip, 353 xfs_lsn_t lsn) 354 { 355 } 356 357 /* 358 * This is the ops vector shared by all cud log items. 359 */ 360 static const struct xfs_item_ops xfs_cud_item_ops = { 361 .iop_size = xfs_cud_item_size, 362 .iop_format = xfs_cud_item_format, 363 .iop_pin = xfs_cud_item_pin, 364 .iop_unpin = xfs_cud_item_unpin, 365 .iop_unlock = xfs_cud_item_unlock, 366 .iop_committed = xfs_cud_item_committed, 367 .iop_push = xfs_cud_item_push, 368 .iop_committing = xfs_cud_item_committing, 369 }; 370 371 /* 372 * Allocate and initialize an cud item with the given number of extents. 373 */ 374 struct xfs_cud_log_item * 375 xfs_cud_init( 376 struct xfs_mount *mp, 377 struct xfs_cui_log_item *cuip) 378 379 { 380 struct xfs_cud_log_item *cudp; 381 382 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); 383 xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); 384 cudp->cud_cuip = cuip; 385 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 386 387 return cudp; 388 } 389 390 /* 391 * Process a refcount update intent item that was recovered from the log. 392 * We need to update the refcountbt. 393 */ 394 int 395 xfs_cui_recover( 396 struct xfs_mount *mp, 397 struct xfs_cui_log_item *cuip, 398 struct xfs_defer_ops *dfops) 399 { 400 int i; 401 int error = 0; 402 unsigned int refc_type; 403 struct xfs_phys_extent *refc; 404 xfs_fsblock_t startblock_fsb; 405 bool op_ok; 406 struct xfs_cud_log_item *cudp; 407 struct xfs_trans *tp; 408 struct xfs_btree_cur *rcur = NULL; 409 enum xfs_refcount_intent_type type; 410 xfs_fsblock_t new_fsb; 411 xfs_extlen_t new_len; 412 struct xfs_bmbt_irec irec; 413 bool requeue_only = false; 414 415 ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); 416 417 /* 418 * First check the validity of the extents described by the 419 * CUI. If any are bad, then assume that all are bad and 420 * just toss the CUI. 421 */ 422 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 423 refc = &cuip->cui_format.cui_extents[i]; 424 startblock_fsb = XFS_BB_TO_FSB(mp, 425 XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); 426 switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 427 case XFS_REFCOUNT_INCREASE: 428 case XFS_REFCOUNT_DECREASE: 429 case XFS_REFCOUNT_ALLOC_COW: 430 case XFS_REFCOUNT_FREE_COW: 431 op_ok = true; 432 break; 433 default: 434 op_ok = false; 435 break; 436 } 437 if (!op_ok || startblock_fsb == 0 || 438 refc->pe_len == 0 || 439 startblock_fsb >= mp->m_sb.sb_dblocks || 440 refc->pe_len >= mp->m_sb.sb_agblocks || 441 (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) { 442 /* 443 * This will pull the CUI from the AIL and 444 * free the memory associated with it. 445 */ 446 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 447 xfs_cui_release(cuip); 448 return -EIO; 449 } 450 } 451 452 /* 453 * Under normal operation, refcount updates are deferred, so we 454 * wouldn't be adding them directly to a transaction. All 455 * refcount updates manage reservation usage internally and 456 * dynamically by deferring work that won't fit in the 457 * transaction. Normally, any work that needs to be deferred 458 * gets attached to the same defer_ops that scheduled the 459 * refcount update. However, we're in log recovery here, so we 460 * we use the passed in defer_ops and to finish up any work that 461 * doesn't fit. We need to reserve enough blocks to handle a 462 * full btree split on either end of the refcount range. 463 */ 464 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 465 mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); 466 if (error) 467 return error; 468 cudp = xfs_trans_get_cud(tp, cuip); 469 470 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 471 refc = &cuip->cui_format.cui_extents[i]; 472 refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 473 switch (refc_type) { 474 case XFS_REFCOUNT_INCREASE: 475 case XFS_REFCOUNT_DECREASE: 476 case XFS_REFCOUNT_ALLOC_COW: 477 case XFS_REFCOUNT_FREE_COW: 478 type = refc_type; 479 break; 480 default: 481 error = -EFSCORRUPTED; 482 goto abort_error; 483 } 484 if (requeue_only) { 485 new_fsb = refc->pe_startblock; 486 new_len = refc->pe_len; 487 } else 488 error = xfs_trans_log_finish_refcount_update(tp, cudp, 489 dfops, type, refc->pe_startblock, refc->pe_len, 490 &new_fsb, &new_len, &rcur); 491 if (error) 492 goto abort_error; 493 494 /* Requeue what we didn't finish. */ 495 if (new_len > 0) { 496 irec.br_startblock = new_fsb; 497 irec.br_blockcount = new_len; 498 switch (type) { 499 case XFS_REFCOUNT_INCREASE: 500 error = xfs_refcount_increase_extent( 501 tp->t_mountp, dfops, &irec); 502 break; 503 case XFS_REFCOUNT_DECREASE: 504 error = xfs_refcount_decrease_extent( 505 tp->t_mountp, dfops, &irec); 506 break; 507 case XFS_REFCOUNT_ALLOC_COW: 508 error = xfs_refcount_alloc_cow_extent( 509 tp->t_mountp, dfops, 510 irec.br_startblock, 511 irec.br_blockcount); 512 break; 513 case XFS_REFCOUNT_FREE_COW: 514 error = xfs_refcount_free_cow_extent( 515 tp->t_mountp, dfops, 516 irec.br_startblock, 517 irec.br_blockcount); 518 break; 519 default: 520 ASSERT(0); 521 } 522 if (error) 523 goto abort_error; 524 requeue_only = true; 525 } 526 } 527 528 xfs_refcount_finish_one_cleanup(tp, rcur, error); 529 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 530 error = xfs_trans_commit(tp); 531 return error; 532 533 abort_error: 534 xfs_refcount_finish_one_cleanup(tp, rcur, error); 535 xfs_trans_cancel(tp); 536 return error; 537 } 538