1 #include <stdio.h> 2 #include <stdlib.h> 3 #include "kerncompat.h" 4 #include "radix-tree.h" 5 #include "ctree.h" 6 #include "disk-io.h" 7 #include "print-tree.h" 8 #include "transaction.h" 9 10 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 11 *orig_root, u64 num_blocks, u64 search_start, u64 12 search_end, struct btrfs_key *ins); 13 static int finish_current_insert(struct btrfs_trans_handle *trans, struct 14 btrfs_root *extent_root); 15 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root 16 *extent_root); 17 18 /* 19 * pending extents are blocks that we're trying to allocate in the extent 20 * map while trying to grow the map because of other allocations. To avoid 21 * recursing, they are tagged in the radix tree and cleaned up after 22 * other allocations are done. The pending tag is also used in the same 23 * manner for deletes. 24 */ 25 #define CTREE_EXTENT_PENDING_DEL 0 26 27 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root 28 *root, u64 blocknr) 29 { 30 struct btrfs_path path; 31 int ret; 32 struct btrfs_key key; 33 struct btrfs_leaf *l; 34 struct btrfs_extent_item *item; 35 struct btrfs_key ins; 36 u32 refs; 37 38 find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins); 39 btrfs_init_path(&path); 40 key.objectid = blocknr; 41 key.flags = 0; 42 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 43 key.offset = 1; 44 ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1); 45 if (ret != 0) 46 BUG(); 47 BUG_ON(ret != 0); 48 l = &path.nodes[0]->leaf; 49 item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); 50 refs = btrfs_extent_refs(item); 51 btrfs_set_extent_refs(item, refs + 1); 52 53 BUG_ON(list_empty(&path.nodes[0]->dirty)); 54 btrfs_release_path(root->extent_root, &path); 55 finish_current_insert(trans, root->extent_root); 56 run_pending(trans, root->extent_root); 57 return 0; 58 } 59 60 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root 61 *root, u64 blocknr, u32 *refs) 62 { 63 struct btrfs_path path; 64 int ret; 65 struct btrfs_key key; 66 struct btrfs_leaf *l; 67 struct btrfs_extent_item *item; 68 btrfs_init_path(&path); 69 key.objectid = blocknr; 70 key.offset = 1; 71 key.flags = 0; 72 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 73 ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0); 74 if (ret != 0) 75 BUG(); 76 l = &path.nodes[0]->leaf; 77 item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); 78 *refs = btrfs_extent_refs(item); 79 btrfs_release_path(root->extent_root, &path); 80 return 0; 81 } 82 83 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 84 struct btrfs_buffer *buf) 85 { 86 u64 blocknr; 87 int i; 88 89 if (!root->ref_cows) 90 return 0; 91 if (btrfs_is_leaf(&buf->node)) 92 return 0; 93 94 for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { 95 blocknr = btrfs_node_blockptr(&buf->node, i); 96 inc_block_ref(trans, root, blocknr); 97 } 98 return 0; 99 } 100 101 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct 102 btrfs_root *root) 103 { 104 unsigned long gang[8]; 105 u64 first = 0; 106 int ret; 107 int i; 108 109 while(1) { 110 ret = radix_tree_gang_lookup(&root->pinned_radix, 111 (void **)gang, 0, 112 ARRAY_SIZE(gang)); 113 if (!ret) 114 break; 115 if (!first) 116 first = gang[0]; 117 for (i = 0; i < ret; i++) { 118 radix_tree_delete(&root->pinned_radix, gang[i]); 119 } 120 } 121 root->last_insert.objectid = first; 122 root->last_insert.offset = 0; 123 return 0; 124 } 125 126 static int finish_current_insert(struct btrfs_trans_handle *trans, struct 127 btrfs_root *extent_root) 128 { 129 struct btrfs_key ins; 130 struct btrfs_extent_item extent_item; 131 int i; 132 int ret; 133 134 btrfs_set_extent_refs(&extent_item, 1); 135 btrfs_set_extent_owner(&extent_item, 136 btrfs_header_parentid(&extent_root->node->node.header)); 137 ins.offset = 1; 138 ins.flags = 0; 139 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); 140 141 for (i = 0; i < extent_root->current_insert.flags; i++) { 142 ins.objectid = extent_root->current_insert.objectid + i; 143 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, 144 sizeof(extent_item)); 145 BUG_ON(ret); 146 } 147 extent_root->current_insert.offset = 0; 148 return 0; 149 } 150 151 /* 152 * remove an extent from the root, returns 0 on success 153 */ 154 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 155 *root, u64 blocknr, u64 num_blocks, int pin) 156 { 157 struct btrfs_path path; 158 struct btrfs_key key; 159 struct btrfs_root *extent_root = root->extent_root; 160 int ret; 161 struct btrfs_extent_item *ei; 162 struct btrfs_key ins; 163 u32 refs; 164 165 BUG_ON(pin && num_blocks != 1); 166 key.objectid = blocknr; 167 key.flags = 0; 168 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 169 key.offset = num_blocks; 170 171 find_free_extent(trans, root, 0, 0, (u64)-1, &ins); 172 btrfs_init_path(&path); 173 ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); 174 if (ret) { 175 printf("failed to find %Lu\n", key.objectid); 176 btrfs_print_tree(extent_root, extent_root->node); 177 printf("failed to find %Lu\n", key.objectid); 178 BUG(); 179 } 180 ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], 181 struct btrfs_extent_item); 182 BUG_ON(ei->refs == 0); 183 refs = btrfs_extent_refs(ei) - 1; 184 btrfs_set_extent_refs(ei, refs); 185 if (refs == 0) { 186 if (pin) { 187 int err; 188 radix_tree_preload(GFP_KERNEL); 189 err = radix_tree_insert(&extent_root->pinned_radix, 190 blocknr, (void *)blocknr); 191 BUG_ON(err); 192 radix_tree_preload_end(); 193 } 194 ret = btrfs_del_item(trans, extent_root, &path); 195 if (!pin && extent_root->last_insert.objectid > blocknr) 196 extent_root->last_insert.objectid = blocknr; 197 if (ret) 198 BUG(); 199 } 200 btrfs_release_path(extent_root, &path); 201 finish_current_insert(trans, extent_root); 202 return ret; 203 } 204 205 /* 206 * find all the blocks marked as pending in the radix tree and remove 207 * them from the extent map 208 */ 209 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 210 btrfs_root *extent_root) 211 { 212 int ret; 213 struct btrfs_buffer *gang[4]; 214 int i; 215 216 while(1) { 217 ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, 218 (void **)gang, 0, 219 ARRAY_SIZE(gang), 220 CTREE_EXTENT_PENDING_DEL); 221 if (!ret) 222 break; 223 for (i = 0; i < ret; i++) { 224 ret = __free_extent(trans, extent_root, 225 gang[i]->blocknr, 1, 1); 226 radix_tree_tag_clear(&extent_root->cache_radix, 227 gang[i]->blocknr, 228 CTREE_EXTENT_PENDING_DEL); 229 btrfs_block_release(extent_root, gang[i]); 230 } 231 } 232 return 0; 233 } 234 235 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root 236 *extent_root) 237 { 238 while(radix_tree_tagged(&extent_root->cache_radix, 239 CTREE_EXTENT_PENDING_DEL)) 240 del_pending_extents(trans, extent_root); 241 return 0; 242 } 243 244 245 /* 246 * remove an extent from the root, returns 0 on success 247 */ 248 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 249 *root, u64 blocknr, u64 num_blocks, int pin) 250 { 251 struct btrfs_root *extent_root = root->extent_root; 252 struct btrfs_buffer *t; 253 int pending_ret; 254 int ret; 255 256 if (root == extent_root) { 257 t = find_tree_block(root, blocknr); 258 radix_tree_tag_set(&root->cache_radix, blocknr, 259 CTREE_EXTENT_PENDING_DEL); 260 return 0; 261 } 262 ret = __free_extent(trans, root, blocknr, num_blocks, pin); 263 pending_ret = run_pending(trans, root->extent_root); 264 return ret ? ret : pending_ret; 265 } 266 267 /* 268 * walks the btree of allocated extents and find a hole of a given size. 269 * The key ins is changed to record the hole: 270 * ins->objectid == block start 271 * ins->flags = BTRFS_EXTENT_ITEM_KEY 272 * ins->offset == number of blocks 273 * Any available blocks before search_start are skipped. 274 */ 275 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 276 *orig_root, u64 num_blocks, u64 search_start, u64 277 search_end, struct btrfs_key *ins) 278 { 279 struct btrfs_path path; 280 struct btrfs_key key; 281 int ret; 282 u64 hole_size = 0; 283 int slot = 0; 284 u64 last_block; 285 u64 test_block; 286 int start_found; 287 struct btrfs_leaf *l; 288 struct btrfs_root * root = orig_root->extent_root; 289 int total_needed = num_blocks; 290 291 total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; 292 if (root->last_insert.objectid > search_start) 293 search_start = root->last_insert.objectid; 294 295 ins->flags = 0; 296 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 297 298 check_failed: 299 btrfs_init_path(&path); 300 ins->objectid = search_start; 301 ins->offset = 0; 302 start_found = 0; 303 ret = btrfs_search_slot(trans, root, ins, &path, 0, 0); 304 if (ret < 0) 305 goto error; 306 307 if (path.slots[0] > 0) 308 path.slots[0]--; 309 310 while (1) { 311 l = &path.nodes[0]->leaf; 312 slot = path.slots[0]; 313 if (slot >= btrfs_header_nritems(&l->header)) { 314 ret = btrfs_next_leaf(root, &path); 315 if (ret == 0) 316 continue; 317 if (ret < 0) 318 goto error; 319 if (!start_found) { 320 ins->objectid = search_start; 321 ins->offset = (u64)-1; 322 start_found = 1; 323 goto check_pending; 324 } 325 ins->objectid = last_block > search_start ? 326 last_block : search_start; 327 ins->offset = (u64)-1; 328 goto check_pending; 329 } 330 btrfs_disk_key_to_cpu(&key, &l->items[slot].key); 331 if (key.objectid >= search_start) { 332 if (start_found) { 333 if (last_block < search_start) 334 last_block = search_start; 335 hole_size = key.objectid - last_block; 336 if (hole_size > total_needed) { 337 ins->objectid = last_block; 338 ins->offset = hole_size; 339 goto check_pending; 340 } 341 } 342 } 343 start_found = 1; 344 last_block = key.objectid + key.offset; 345 path.slots[0]++; 346 } 347 // FIXME -ENOSPC 348 check_pending: 349 /* we have to make sure we didn't find an extent that has already 350 * been allocated by the map tree or the original allocation 351 */ 352 btrfs_release_path(root, &path); 353 BUG_ON(ins->objectid < search_start); 354 for (test_block = ins->objectid; 355 test_block < ins->objectid + total_needed; test_block++) { 356 if (radix_tree_lookup(&root->pinned_radix, test_block)) { 357 search_start = test_block + 1; 358 goto check_failed; 359 } 360 } 361 BUG_ON(root->current_insert.offset); 362 root->current_insert.offset = total_needed - num_blocks; 363 root->current_insert.objectid = ins->objectid + num_blocks; 364 root->current_insert.flags = 0; 365 root->last_insert.objectid = ins->objectid; 366 ins->offset = num_blocks; 367 return 0; 368 error: 369 btrfs_release_path(root, &path); 370 return ret; 371 } 372 373 /* 374 * finds a free extent and does all the dirty work required for allocation 375 * returns the key for the extent through ins, and a tree buffer for 376 * the first block of the extent through buf. 377 * 378 * returns 0 if everything worked, non-zero otherwise. 379 */ 380 static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root 381 *root, u64 num_blocks, u64 search_start, u64 382 search_end, u64 owner, struct btrfs_key *ins) 383 { 384 int ret; 385 int pending_ret; 386 struct btrfs_root *extent_root = root->extent_root; 387 struct btrfs_extent_item extent_item; 388 389 btrfs_set_extent_refs(&extent_item, 1); 390 btrfs_set_extent_owner(&extent_item, owner); 391 392 if (root == extent_root) { 393 BUG_ON(extent_root->current_insert.offset == 0); 394 BUG_ON(num_blocks != 1); 395 BUG_ON(extent_root->current_insert.flags == 396 extent_root->current_insert.offset); 397 ins->offset = 1; 398 ins->objectid = extent_root->current_insert.objectid + 399 extent_root->current_insert.flags++; 400 return 0; 401 } 402 ret = find_free_extent(trans, root, num_blocks, search_start, 403 search_end, ins); 404 if (ret) 405 return ret; 406 407 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, 408 sizeof(extent_item)); 409 410 finish_current_insert(trans, extent_root); 411 pending_ret = run_pending(trans, extent_root); 412 if (ret) 413 return ret; 414 if (pending_ret) 415 return pending_ret; 416 return 0; 417 } 418 419 /* 420 * helper function to allocate a block for a given tree 421 * returns the tree buffer or NULL. 422 */ 423 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 424 struct btrfs_root *root) 425 { 426 struct btrfs_key ins; 427 int ret; 428 struct btrfs_buffer *buf; 429 430 ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, 431 btrfs_header_parentid(&root->node->node.header), 432 &ins); 433 if (ret) { 434 BUG(); 435 return NULL; 436 } 437 buf = find_tree_block(root, ins.objectid); 438 dirty_tree_block(trans, root, buf); 439 return buf; 440 } 441 442 /* 443 * helper function for drop_snapshot, this walks down the tree dropping ref 444 * counts as it goes. 445 */ 446 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root 447 *root, struct btrfs_path *path, int *level) 448 { 449 struct btrfs_buffer *next; 450 struct btrfs_buffer *cur; 451 u64 blocknr; 452 int ret; 453 u32 refs; 454 455 ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, 456 &refs); 457 BUG_ON(ret); 458 if (refs > 1) 459 goto out; 460 /* 461 * walk down to the last node level and free all the leaves 462 */ 463 while(*level > 0) { 464 cur = path->nodes[*level]; 465 if (path->slots[*level] >= 466 btrfs_header_nritems(&cur->node.header)) 467 break; 468 blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); 469 ret = lookup_block_ref(trans, root, blocknr, &refs); 470 if (refs != 1 || *level == 1) { 471 path->slots[*level]++; 472 ret = btrfs_free_extent(trans, root, blocknr, 1, 1); 473 BUG_ON(ret); 474 continue; 475 } 476 BUG_ON(ret); 477 next = read_tree_block(root, blocknr); 478 if (path->nodes[*level-1]) 479 btrfs_block_release(root, path->nodes[*level-1]); 480 path->nodes[*level-1] = next; 481 *level = btrfs_header_level(&next->node.header); 482 path->slots[*level] = 0; 483 } 484 out: 485 ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, 486 1); 487 btrfs_block_release(root, path->nodes[*level]); 488 path->nodes[*level] = NULL; 489 *level += 1; 490 BUG_ON(ret); 491 return 0; 492 } 493 494 /* 495 * helper for dropping snapshots. This walks back up the tree in the path 496 * to find the first node higher up where we haven't yet gone through 497 * all the slots 498 */ 499 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root 500 *root, struct btrfs_path *path, int *level) 501 { 502 int i; 503 int slot; 504 int ret; 505 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 506 slot = path->slots[i]; 507 if (slot < 508 btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { 509 path->slots[i]++; 510 *level = i; 511 return 0; 512 } else { 513 ret = btrfs_free_extent(trans, root, 514 path->nodes[*level]->blocknr, 515 1, 1); 516 btrfs_block_release(root, path->nodes[*level]); 517 path->nodes[*level] = NULL; 518 *level = i + 1; 519 BUG_ON(ret); 520 } 521 } 522 return 1; 523 } 524 525 /* 526 * drop the reference count on the tree rooted at 'snap'. This traverses 527 * the tree freeing any blocks that have a ref count of zero after being 528 * decremented. 529 */ 530 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 531 *root, struct btrfs_buffer *snap) 532 { 533 int ret = 0; 534 int wret; 535 int level; 536 struct btrfs_path path; 537 int i; 538 int orig_level; 539 540 btrfs_init_path(&path); 541 542 level = btrfs_header_level(&snap->node.header); 543 orig_level = level; 544 path.nodes[level] = snap; 545 path.slots[level] = 0; 546 while(1) { 547 wret = walk_down_tree(trans, root, &path, &level); 548 if (wret > 0) 549 break; 550 if (wret < 0) 551 ret = wret; 552 553 wret = walk_up_tree(trans, root, &path, &level); 554 if (wret > 0) 555 break; 556 if (wret < 0) 557 ret = wret; 558 } 559 for (i = 0; i <= orig_level; i++) { 560 if (path.nodes[i]) { 561 btrfs_block_release(root, path.nodes[i]); 562 } 563 } 564 return ret; 565 } 566